From 64e916dea413f16e1a49ef3241f9a3435ca713d5 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 11 May 2017 12:21:19 -0500 Subject: [PATCH 0001/1398] ARM: dts: LogicPD Torpedo: Fix camera pin mux commit 56322e123235370f1449c7444e311cce857d12f5 upstream. Fix commit 05c4ffc3a266 ("ARM: dts: LogicPD Torpedo: Add MT9P031 Support") In the previous commit, I indicated that the only testing was done by showing the camera showed up when probing. This patch fixes an incorrect pin muxing on cam_d0, cam_d1 and cam_d2. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 08cce17a25a0..43e9364083de 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -249,9 +249,9 @@ OMAP3_CORE1_IOPAD(0x2110, PIN_INPUT | MUX_MODE0) /* cam_xclka.cam_xclka */ OMAP3_CORE1_IOPAD(0x2112, PIN_INPUT | MUX_MODE0) /* cam_pclk.cam_pclk */ - OMAP3_CORE1_IOPAD(0x2114, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */ - OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */ - OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */ + OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */ + OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */ + OMAP3_CORE1_IOPAD(0x211a, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */ OMAP3_CORE1_IOPAD(0x211c, PIN_INPUT | MUX_MODE0) /* cam_d3.cam_d3 */ OMAP3_CORE1_IOPAD(0x211e, PIN_INPUT | MUX_MODE0) /* cam_d4.cam_d4 */ OMAP3_CORE1_IOPAD(0x2120, PIN_INPUT | MUX_MODE0) /* cam_d5.cam_d5 */ -- GitLab From 29bd7003134a3220119e7a75c4b06ac567f966e7 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 17 Aug 2017 06:01:28 -0500 Subject: [PATCH 0002/1398] ARM: dts: omap3: logicpd-torpedo-37xx-devkit: Fix MMC1 cd-gpio commit b7ace5ed8867ca54503727988adec6b20af54eeb upstream. Fixes commit 687c27676151 ("ARM: dts: Add minimal support for LogicPD Torpedo DM3730 devkit") This patch corrects an issue where the cd-gpios was improperly setup using IRQ_TYPE_LEVEL_LOW instead of GPIO_ACTIVE_LOW. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 43e9364083de..b4575bbaf085 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -192,7 +192,7 @@ interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins &mmc1_cd>; - cd-gpios = <&gpio4 31 IRQ_TYPE_LEVEL_LOW>; /* gpio127 */ + cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio127 */ vmmc-supply = <&vmmc1>; bus-width = <4>; cap-power-off-card; -- GitLab From 7031ae2ab37d3df53c4a4e9903329a5d38c745ec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 27 Nov 2017 06:21:25 +0300 Subject: [PATCH 0003/1398] mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d() commit a8f97366452ed491d13cf1e44241bc0b5740b1f0 upstream. Currently, we unconditionally make page table dirty in touch_pmd(). It may result in false-positive can_follow_write_pmd(). We may avoid the situation, if we would only make the page table entry dirty if caller asks for write access -- FOLL_WRITE. The patch also changes touch_pud() in the same way. Signed-off-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Hugh Dickins Signed-off-by: Linus Torvalds [Salvatore Bonaccorso: backport for 4.9: - Adjust context - Drop specific part for PUD-sized transparent hugepages. Support for PUD-sized transparent hugepages was added in v4.11-rc1 ] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8258e9eee806..3cae1dcf069c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -745,20 +745,15 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) + pmd_t *pmd, int flags) { pmd_t _pmd; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pmd is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pmd to - * set the young bit, instead of the current set_pmd_at. - */ - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); + _pmd = pmd_mkyoung(*pmd); + if (flags & FOLL_WRITE) + _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, 1)) + pmd, _pmd, flags & FOLL_WRITE)) update_mmu_cache_pmd(vma, addr, pmd); } @@ -787,7 +782,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); /* * device mapped pages can only be returned if the @@ -1158,7 +1153,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * We don't mlock() pte-mapped THPs. This way we can avoid -- GitLab From 436f19a2e49e57b39e1d46ee018eb5f64ec1031f Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 29 Nov 2017 16:10:01 -0800 Subject: [PATCH 0004/1398] mm/cma: fix alloc_contig_range ret code/potential leak commit 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a upstream. If the call __alloc_contig_migrate_range() in alloc_contig_range returns -EBUSY, processing continues so that test_pages_isolated() is called where there is a tracepoint to identify the busy pages. However, it is possible for busy pages to become available between the calls to these two routines. In this case, the range of pages may be allocated. Unfortunately, the original return code (ret == -EBUSY) is still set and returned to the caller. Therefore, the caller believes the pages were not allocated and they are leaked. Update the comment to indicate that allocation is still possible even if __alloc_contig_migrate_range returns -EBUSY. Also, clear return code in this case so that it is not accidentally used or returned to caller. Link: http://lkml.kernel.org/r/20171122185214.25285-1-mike.kravetz@oracle.com Fixes: 8ef5849fa8a2 ("mm/cma: always check which page caused allocation failure") Signed-off-by: Mike Kravetz Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Joonsoo Kim Cc: Michal Nazarewicz Cc: Laura Abbott Cc: Michal Hocko Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4a044134ce84..ef5ee56095e8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7309,11 +7309,18 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* * In case of -EBUSY, we'd like to know which page causes problem. - * So, just fall through. We will check it in test_pages_isolated(). + * So, just fall through. test_pages_isolated() has a tracepoint + * which will report the busy page. + * + * It is possible that busy pages could become available before + * the call to test_pages_isolated, and the range will actually be + * allocated. So, if we fall through be sure to clear ret so that + * -EBUSY is not accidentally used or returned to caller. */ ret = __alloc_contig_migrate_range(&cc, start, end); if (ret && ret != -EBUSY) goto done; + ret =0; /* * Pages from [start, end) are within a MAX_ORDER_NR_PAGES -- GitLab From cebe139e5712d6925a9b70f3769df6818b6c14dd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:28 -0800 Subject: [PATCH 0005/1398] mm, hugetlbfs: introduce ->split() to vm_operations_struct commit 31383c6865a578834dd953d9dbc88e6b19fe3997 upstream. Patch series "device-dax: fix unaligned munmap handling" When device-dax is operating in huge-page mode we want it to behave like hugetlbfs and fail attempts to split vmas into unaligned ranges. It would be messy to teach the munmap path about device-dax alignment constraints in the same (hstate) way that hugetlbfs communicates this constraint. Instead, these patches introduce a new ->split() vm operation. This patch (of 2): The device-dax interface has similar constraints as hugetlbfs in that it requires the munmap path to unmap in huge page aligned units. Rather than add more custom vma handling code in __split_vma() introduce a new vm operation to perform this vma specific check. Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Cc: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 1 + mm/hugetlb.c | 8 ++++++++ mm/mmap.c | 8 +++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6c9e1ad12831..2217e2f18247 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -347,6 +347,7 @@ struct fault_env { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); + int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); int (*pmd_fault)(struct vm_area_struct *, unsigned long address, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65c36acf8a6b..6ff65c405243 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3135,6 +3135,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) } } +static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) +{ + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; + return 0; +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the @@ -3151,6 +3158,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, .close = hugetlb_vm_op_close, + .split = hugetlb_vm_op_split, }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, diff --git a/mm/mmap.c b/mm/mmap.c index 75d263bd8739..5b48adb4aa56 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2538,9 +2538,11 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *new; int err; - if (is_vm_hugetlb_page(vma) && (addr & - ~(huge_page_mask(hstate_vma(vma))))) - return -EINVAL; + if (vma->vm_ops && vma->vm_ops->split) { + err = vma->vm_ops->split(vma, addr); + if (err) + return err; + } new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) -- GitLab From ba32d7dce43f14ef1a1cb0540959431526cf7fe0 Mon Sep 17 00:00:00 2001 From: chenjie Date: Wed, 29 Nov 2017 16:10:54 -0800 Subject: [PATCH 0006/1398] mm/madvise.c: fix madvise() infinite loop under special circumstances commit 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 upstream. MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings. Unfortunately madvise_willneed() doesn't communicate this information properly to the generic madvise syscall implementation. The calling convention is quite subtle there. madvise_vma() is supposed to either return an error or update &prev otherwise the main loop will never advance to the next vma and it will keep looping for ever without a way to get out of the kernel. It seems this has been broken since introduction. Nobody has noticed because nobody seems to be using MADVISE_WILLNEED on these DAX mappings. [mhocko@suse.com: rewrite changelog] Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place") Signed-off-by: chenjie Signed-off-by: guoxuenan Acked-by: Michal Hocko Cc: Minchan Kim Cc: zhangyi (F) Cc: Miao Xie Cc: Mike Rapoport Cc: Shaohua Li Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: David Rientjes Cc: Anshuman Khandual Cc: Rik van Riel Cc: Carsten Otte Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/madvise.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 55f30ec32e5b..a49afe08698b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -228,15 +228,14 @@ static long madvise_willneed(struct vm_area_struct *vma, { struct file *file = vma->vm_file; + *prev = vma; #ifdef CONFIG_SWAP if (!file) { - *prev = vma; force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { - *prev = vma; force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; @@ -251,7 +250,6 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } - *prev = vma; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; -- GitLab From c251267c8826027af32fe7195650d29797539847 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Nov 2017 14:50:46 -0500 Subject: [PATCH 0007/1398] btrfs: clear space cache inode generation always commit 8e138e0d92c6c9d3d481674fb14e3439b495be37 upstream. We discovered a box that had double allocations, and suspected the space cache may be to blame. While auditing the write out path I noticed that if we've already setup the space cache we will just carry on. This means that any error we hit after cache_save_setup before we go to actually write the cache out we won't reset the inode generation, so whatever was already written will be considered correct, except it'll be stale. Fix this by _always_ resetting the generation on the block group inode, this way we only ever have valid or invalid cache. With this patch I was no longer able to reproduce cache corruption with dm-log-writes and my bpf error injection tool. Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 705bb5f5a87f..c4cff5cc9c93 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3397,13 +3397,6 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, goto again; } - /* We've already setup this transaction, go ahead and exit */ - if (block_group->cache_generation == trans->transid && - i_size_read(inode)) { - dcs = BTRFS_DC_SETUP; - goto out_put; - } - /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -3427,6 +3420,13 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, } WARN_ON(ret); + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(root, &root->fs_info->global_block_rsv); -- GitLab From 1c8ea4145097f8a69a8230b0e40c083ffc1b53e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:10 -0400 Subject: [PATCH 0008/1398] nfsd: Fix stateid races between OPEN and CLOSE commit 15ca08d3299682dc49bad73251677b2c5017ef08 upstream. Open file stateids can linger on the nfs4_file list of stateids even after they have been closed. In order to avoid reusing such a stateid, and confusing the client, we need to recheck the nfs4_stid's type after taking the mutex. Otherwise, we risk reusing an old stateid that was already closed, which will confuse clients that expect new stateids to conform to RFC7530 Sections 9.1.4.2 and 16.2.5 or RFC5661 Sections 8.2.2 and 18.2.4. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 67 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ec2a69dac536..8fae634457b3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3513,7 +3513,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - if (local->st_stateowner == &oo->oo_owner) { + if (local->st_stateowner != &oo->oo_owner) + continue; + if (local->st_stid.sc_type == NFS4_OPEN_STID) { ret = local; atomic_inc(&ret->st_stid.sc_count); break; @@ -3522,6 +3524,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) return ret; } +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ + __be32 ret = nfs_ok; + + switch (s->sc_type) { + default: + break; + case NFS4_CLOSED_STID: + case NFS4_CLOSED_DELEG_STID: + ret = nfserr_bad_stateid; + break; + case NFS4_REVOKED_DELEG_STID: + ret = nfserr_deleg_revoked; + } + return ret; +} + +/* Lock the stateid st_mutex, and deal with races with CLOSE */ +static __be32 +nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) +{ + __be32 ret; + + mutex_lock(&stp->st_mutex); + ret = nfsd4_verify_open_stid(&stp->st_stid); + if (ret != nfs_ok) + mutex_unlock(&stp->st_mutex); + return ret; +} + +static struct nfs4_ol_stateid * +nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *stp; + for (;;) { + spin_lock(&fp->fi_lock); + stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); + if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) + break; + nfs4_put_stid(&stp->st_stid); + } + return stp; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3566,6 +3614,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) mutex_init(&stp->st_mutex); mutex_lock(&stp->st_mutex); +retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3590,7 +3639,11 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { - mutex_lock(&retstp->st_mutex); + /* Handle races with CLOSE */ + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); stp = retstp; @@ -4411,9 +4464,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; - spin_lock(&fp->fi_lock); - stp = nfsd4_find_existing_open(fp, open); - spin_unlock(&fp->fi_lock); + stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4427,7 +4478,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); @@ -5314,7 +5364,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) bool unhashed; LIST_HEAD(reaplist); - s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -5353,10 +5402,12 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; + + stp->st_stid.sc_type = NFS4_CLOSED_STID; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); + mutex_unlock(&stp->st_mutex); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); -- GitLab From 1c404259164405f9a61379285f6e1fcb57ea43aa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:11 -0400 Subject: [PATCH 0009/1398] nfsd: Fix another OPEN stateid race commit d8a1a000555ecd1b824ac1ed6df8fe364dfbbbb0 upstream. If nfsd4_process_open2() is initialising a new stateid, and yet the call to nfs4_get_vfs_file() fails for some reason, then we must declare the stateid closed, and unhash it before dropping the mutex. Right now, we unhash the stateid after dropping the mutex, and without changing the stateid type, meaning that another OPEN could theoretically look it up and attempt to use it. Reported-by: Andrew W Elble Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8fae634457b3..7161913cdaeb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4453,6 +4453,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; + bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, @@ -4472,11 +4473,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } + if (!stp) { + stp = init_open_stateid(fp, open); + if (!open->op_stp) + new_stp = true; + } + /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. + * + * stp is already locked. */ - if (stp) { + if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { @@ -4484,22 +4493,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - /* stp is returned locked. */ - stp = init_open_stateid(fp, open); - /* See if we lost the race to some other thread */ - if (stp->st_access_bmap != 0) { - status = nfs4_upgrade_open(rqstp, fp, current_fh, - stp, open); - if (status) { - mutex_unlock(&stp->st_mutex); - goto out; - } - goto upgrade_out; - } status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - mutex_unlock(&stp->st_mutex); + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); + mutex_unlock(&stp->st_mutex); goto out; } @@ -4508,7 +4506,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } -upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); -- GitLab From aa1a6cf1586bb0aa7c6585ebaffd7b5e3ae4f040 Mon Sep 17 00:00:00 2001 From: Naofumi Honda Date: Thu, 9 Nov 2017 10:57:16 -0500 Subject: [PATCH 0010/1398] nfsd: fix panic in posix_unblock_lock called from nfs4_laundromat commit 64ebe12494fd5d193f014ce38e1fd83cc57883c8 upstream. From kernel 4.9, my two nfsv4 servers sometimes suffer from "panic: unable to handle kernel page request" in posix_unblock_lock() called from nfs4_laundromat(). These panics diseappear if we revert the commit "nfsd: add a LRU list for blocked locks". The cause appears to be a typo in nfs4_laundromat(), which is also present in nfs4_state_shutdown_net(). Fixes: 7919d0a27f1e "nfsd: add a LRU list for blocked locks" Cc: jlayton@redhat.com Reveiwed-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7161913cdaeb..9ebb2d7c8182 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4732,7 +4732,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); @@ -7143,7 +7143,7 @@ nfs4_state_shutdown_net(struct net *net) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); -- GitLab From dc554abdaee496a8dd0dbafec7abb83328a575ae Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 29 Jan 2017 06:40:15 -0600 Subject: [PATCH 0011/1398] mfd: twl4030-power: Fix pmic for boards that need vmmc1 on reboot commit ad48ed0c5763dc08931407e455dff5acdbe96e81 upstream. At least two different omap3630/3730 boards booting from MMC1 fail to reboot if the "ti,twl4030-power-idle-osc-off" or "ti,twl4030-power-idle" compatible flags are set. This patch will keep the vmmc1 powered up during reboot allowing the bootloader to load. Signed-off-by: Adam Ford Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/twl4030-power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index 1beb722f6080..e1e69a480c56 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -701,6 +701,7 @@ static struct twl4030_ins omap3_wrst_seq[] = { TWL_RESOURCE_RESET(RES_MAIN_REF), TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2), TWL_RESOURCE_RESET(RES_VUSB_3V1), + TWL_RESOURCE_RESET(RES_VMMC1), TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1), TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0), TWL_RESOURCE_ON(RES_RESET), -- GitLab From e18a963b36a001b0067f9091b4ea23aa152af341 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 3 Jan 2017 11:37:48 -0600 Subject: [PATCH 0012/1398] ARM: OMAP2+: Fix WL1283 Bluetooth Baud Rate commit a3ac350793d90d1da631c8beeee9352387974ed5 upstream. Commit 485fa1261f78 ("ARM: OMAP2+: LogicPD Torpedo + Wireless: Add Bluetooth") set the wrong baud rate for the UART. The Baud rate was 300,000 and it should be 3,000,000 for WL1283. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/pdata-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 770216baa737..da310bb779b9 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -162,7 +162,7 @@ static struct ti_st_plat_data wilink7_pdata = { .nshutdown_gpio = 162, .dev_name = "/dev/ttyO1", .flow_cntrl = 1, - .baud_rate = 300000, + .baud_rate = 3000000, }; static struct platform_device wl128x_device = { -- GitLab From f5477da6c3b0f8df578a55dfdeebe18458303e4d Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:11:30 +0200 Subject: [PATCH 0013/1398] KVM: x86: pvclock: Handle first-time write to pvclock-page contains random junk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 51c4b8bba674cfd2260d173602c4dac08e4c3a99 upstream. When guest passes KVM it's pvclock-page GPA via WRMSR to MSR_KVM_SYSTEM_TIME / MSR_KVM_SYSTEM_TIME_NEW, KVM don't initialize pvclock-page to some start-values. It just requests a clock-update which will happen before entering to guest. The clock-update logic will call kvm_setup_pvclock_page() to update the pvclock-page with info. However, kvm_setup_pvclock_page() *wrongly* assumes that the version-field is initialized to an even number. This is wrong because at first-time write, field could be any-value. Fix simply makes sure that if first-time version-field is odd, increment it once more to make it even and only then start standard logic. This follows same logic as done in other pvclock shared-pages (See kvm_write_wall_clock() and record_steal_time()). Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 595f8149c0d9..a4294fd1caa7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1797,6 +1797,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, -- GitLab From c0a4c22aad0f80a88b2c7cae652a29f385a12e9d Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:32 +0200 Subject: [PATCH 0014/1398] KVM: x86: Exit to user-mode on #UD intercept when emulator requires MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 61cb57c9ed631c95b54f8e9090c89d18b3695b3c upstream. Instruction emulation after trapping a #UD exception can result in an MMIO access, for example when emulating a MOVBE on a processor that doesn't support the instruction. In this case, the #UD vmexit handler must exit to user mode, but there wasn't any code to do so. Add it for both VMX and SVM. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4fbf0c94f2d1..23f1a6bd7a0d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2149,6 +2149,8 @@ static int ud_interception(struct vcpu_svm *svm) int er; er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0f0b27d96f27..f0d3de153e29 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5502,6 +5502,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; -- GitLab From 67945527ee240ff96daee0ef33d997a2fe43a419 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 10 Nov 2017 10:49:38 +0100 Subject: [PATCH 0015/1398] KVM: x86: inject exceptions produced by x86_decode_insn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6ea6e84309ca7e0e850b3083e6b09344ee15c290 upstream. Sometimes, a processor might execute an instruction while another processor is updating the page tables for that instruction's code page, but before the TLB shootdown completes. The interesting case happens if the page is in the TLB. In general, the processor will succeed in executing the instruction and nothing bad happens. However, what if the instruction is an MMIO access? If *that* happens, KVM invokes the emulator, and the emulator gets the updated page tables. If the update side had marked the code page as non present, the page table walk then will fail and so will x86_decode_insn. Unfortunately, even though kvm_fetch_guest_virt is correctly returning X86EMUL_PROPAGATE_FAULT, x86_decode_insn's caller treats the failure as a fatal error if the instruction cannot simply be reexecuted (as is the case for MMIO). And this in fact happened sometimes when rebooting Windows 2012r2 guests. Just checking ctxt->have_exception and injecting the exception if true is enough to fix the case. Thanks to Eduardo Habkost for helping in the debugging of this issue. Reported-by: Yanan Fu Cc: Eduardo Habkost Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4294fd1caa7..02d45296a97c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5579,6 +5579,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); -- GitLab From e02746e22d24233dca60302ddad1c071603394d7 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:49 +0000 Subject: [PATCH 0016/1398] KVM: lapic: Split out x2apic ldr calculation commit e872fa94662d0644057c7c80b3071bdb9249e5ab upstream. Split out the ldr calculation from kvm_apic_set_x2apic_id since we're about to reuse it in the following patch. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3f05c044720b..be21de711a86 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -246,9 +246,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) +{ + return ((id >> 4) << 16) | (1 << (id & 0xf)); +} + static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + u32 ldr = kvm_apic_calc_x2apic_ldr(id); kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); -- GitLab From 9aad75786e3d986499934fdaec7f911949a2f3e3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:50 +0000 Subject: [PATCH 0017/1398] KVM: lapic: Fixup LDR on load in x2apic commit 12806ba937382fdfdbad62a399aa2dce65c10fcd upstream. In x2apic mode the LDR is fixed based on the ID rather than separately loadable like it was before x2. When kvm_apic_set_state is called, the base is set, and if it has the X2APIC_ENABLE flag set then the LDR is calculated; however that value gets overwritten by the memcpy a few lines below overwriting it with the value that came from userland. The symptom is a lack of EOI after loading the state (e.g. after a QEMU migration) and is due to the EOI bitmap being wrong due to the incorrect LDR. This was seen with a Win2016 guest under Qemu with irqchip=split whose USB mouse didn't work after a VM migration. This corresponds to RH bug: https://bugzilla.redhat.com/show_bug.cgi?id=1502591 Reported-by: Yiqian Wei Signed-off-by: Dr. David Alan Gilbert [Applied fixup from Liran Alon. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index be21de711a86..b24b3c6d686e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2034,6 +2034,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2044,6 +2045,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, else *id <<= 24; } + + /* In x2APIC mode, the LDR is fixed and based on the id */ + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(*id); } return 0; -- GitLab From 0bf4d0517667877c2b0955586070fd7198943896 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:29 +0200 Subject: [PATCH 0018/1398] mmc: core: Do not leave the block driver in a suspended state commit ebe7dd45cf49e3b49cacbaace17f9f878f21fbea upstream. The block driver must be resumed if the mmc bus fails to suspend the card. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index c64266f5a399..60ebe5b4500b 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -155,6 +155,9 @@ static int mmc_bus_suspend(struct device *dev) return ret; ret = host->bus_ops->suspend(host); + if (ret) + pm_generic_resume(dev); + return ret; } -- GitLab From c814d6701b08fe58d6eb95641a089bde51afe951 Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:07 +0100 Subject: [PATCH 0019/1398] mmc: core: prepend 0x to OCR entry in sysfs commit c892b0d81705c566f575e489efc3c50762db1bde upstream. The sysfs entry "ocr" was missing the 0x prefix to identify it as hex formatted. Fixes: 5fb06af7a33b ("mmc: core: Extend sysfs with OCR register") Signed-off-by: Bastian Stender [Ulf: Amended change to also cover SD-cards] Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 2 +- drivers/mmc/core/sd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 4f4a627f6b20..0c6de9f12ee8 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -752,7 +752,7 @@ MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size); MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult); MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_fwrev_show(struct device *dev, struct device_attribute *attr, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index f6f40a1673ae..f09148a4ab55 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -683,7 +683,7 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_dsr_show(struct device *dev, -- GitLab From 388d8c9fa37194a782f7a7d7c21b7590fd08893d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 27 Nov 2017 20:46:22 +0100 Subject: [PATCH 0020/1398] eeprom: at24: fix reading from 24MAC402/24MAC602 commit 644a1f19c6c8393d0c4168a5adf79056da6822eb upstream. Chip datasheet mentions that word addresses other than the actual start position of the MAC delivers undefined results. So fix this. Current implementation doesn't work due to this wrong offset. Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at24.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 051b14766ef9..f7de115b2388 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -365,7 +365,8 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, memset(msg, 0, sizeof(msg)); msg[0].addr = client->addr; msg[0].buf = addrbuf; - addrbuf[0] = 0x90 + offset; + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + addrbuf[0] = 0xa0 - at24->chip.byte_len + offset; msg[0].len = 1; msg[1].addr = client->addr; msg[1].flags = I2C_M_RD; -- GitLab From d3da2efa6d9db2be41615fbc514cd056a06f5deb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 Nov 2017 22:06:13 +0100 Subject: [PATCH 0021/1398] eeprom: at24: correctly set the size for at24mac402 commit 5478e478eee3b096b8d998d4ed445da30da2dfbc upstream. There's an ilog2() expansion in AT24_DEVICE_MAGIC() which rounds down the actual size of EUI-48 byte array in at24mac402 eeproms to 4 from 6, making it impossible to read it all. Fix it by manually adjusting the value in probe(). This patch contains a temporary fix that is suitable for stable branches. Eventually we'll probably remove the call to ilog2() while converting the magic values to actual structs. Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at24.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index f7de115b2388..3ddd57a55acf 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -639,6 +639,16 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_warn(&client->dev, "page_size looks suspicious (no power of 2)!\n"); + /* + * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while + * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4. + * + * Eventually we'll get rid of the magic values altoghether in favor of + * real structs, but for now just manually set the right size. + */ + if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4) + chip.byte_len = 6; + /* Use I2C operations unless we're stuck with SMBus extensions. */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { if (chip.flags & AT24_FLAG_ADDR16) -- GitLab From 19699abea32154abcf2135c71f2eb0f260f3ffa2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 24 Nov 2017 07:47:50 +0100 Subject: [PATCH 0022/1398] eeprom: at24: check at24_read/write arguments commit d9bcd462daf34aebb8de9ad7f76de0198bb5a0f0 upstream. So far we completely rely on the caller to provide valid arguments. To be on the safe side perform an own sanity check. Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at24.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 3ddd57a55acf..19c10dc56513 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -507,6 +507,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return count; + if (off + count > at24->chip.byte_len) + return -EINVAL; + /* * Read data from chip, protecting against concurrent updates * from this host, but not from other I2C masters. @@ -539,6 +542,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return -EINVAL; + if (off + count > at24->chip.byte_len) + return -EINVAL; + /* * Write data to chip, protecting against concurrent updates * from this host, but not from other I2C masters. -- GitLab From 53cf83b3c73446a3f047e6abb805521a83baa133 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Nov 2017 12:28:17 +0100 Subject: [PATCH 0023/1398] i2c: i801: Fix Failed to allocate irq -2147483648 error commit 6e0c9507bf51e1517a80ad0ac171e5402528fcef upstream. On Apollo Lake devices the BIOS does not set up IRQ routing for the i801 SMBUS controller IRQ, so we end up with dev->irq set to IRQ_NOTCONNECTED. Detect this and do not try to use the irq in this case silencing: i801_smbus 0000:00:1f.1: Failed to allocate irq -2147483648: -107 BugLink: https://communities.intel.com/thread/114759 Signed-off-by: Hans de Goede Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index eb3627f35d12..e6fe21a6135b 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1592,6 +1592,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Default timeout in interrupt mode: 200 ms */ priv->adapter.timeout = HZ / 5; + if (dev->irq == IRQ_NOTCONNECTED) + priv->features &= ~FEATURE_IRQ; + if (priv->features & FEATURE_IRQ) { u16 pcictl, pcists; -- GitLab From 8588eb0ce6a639be06110d7bbc8f59d8468ed9b7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 24 Nov 2017 15:14:25 -0800 Subject: [PATCH 0024/1398] bcache: Fix building error on MIPS commit cf33c1ee5254c6a430bc1538232b49c3ea13e613 upstream. This patch try to fix the building error on MIPS. The reason is MIPS has already defined the PTR macro, which conflicts with the PTR macro in include/uapi/linux/bcache.h. [fixed by mlyle: corrected a line-length issue] Signed-off-by: Huacai Chen Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/alloc.c | 2 +- drivers/md/bcache/extents.c | 2 +- drivers/md/bcache/journal.c | 2 +- include/uapi/linux/bcache.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 3fba31cea66e..537903bf9add 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -477,7 +477,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, if (b == -1) goto err; - k->ptr[i] = PTR(ca->buckets[b].gen, + k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 243de0bf15cd..4bf15182c4da 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -584,7 +584,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey return false; for (i = 0; i < KEY_PTRS(l); i++) - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) return false; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 6925023e12d4..08f20b7cd199 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -508,7 +508,7 @@ static void journal_reclaim(struct cache_set *c) continue; ja->cur_idx = next; - k->ptr[n++] = PTR(0, + k->ptr[n++] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 22b6ad31c706..8562b1cb776b 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -90,7 +90,7 @@ PTR_FIELD(PTR_GEN, 0, 8) #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) -#define PTR(gen, offset, dev) \ +#define MAKE_PTR(gen, offset, dev) \ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) /* Bkey utility code */ -- GitLab From eafbee43b7ec603c002e34597ab5f8fc170d7ab4 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 27 Nov 2017 17:31:00 +0100 Subject: [PATCH 0025/1398] hwmon: (jc42) optionally try to disable the SMBUS timeout commit 68615eb01f82256c19e41967bfb3eef902f77033 upstream. With a nxp,se97 chip on an atmel sama5d31 board, the I2C adapter driver is not always capable of avoiding the 25-35 ms timeout as specified by the SMBUS protocol. This may cause silent corruption of the last bit of any transfer, e.g. a one is read instead of a zero if the sensor chip times out. This also affects the eeprom half of the nxp-se97 chip, where this silent corruption was originally noticed. Other I2C adapters probably suffer similar issues, e.g. bit-banging comes to mind as risky... The SMBUS register in the nxp chip is not a standard Jedec register, but it is not special to the nxp chips either, at least the atmel chips have the same mechanism. Therefore, do not special case this on the manufacturer, it is opt-in via the device property anyway. Signed-off-by: Peter Rosin Acked-by: Rob Herring Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/hwmon/jc42.txt | 4 ++++ drivers/hwmon/jc42.c | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt index 07a250498fbb..f569db58f64a 100644 --- a/Documentation/devicetree/bindings/hwmon/jc42.txt +++ b/Documentation/devicetree/bindings/hwmon/jc42.txt @@ -34,6 +34,10 @@ Required properties: - reg: I2C address +Optional properties: +- smbus-timeout-disable: When set, the smbus timeout function will be disabled. + This is not supported on all chips. + Example: temp-sensor@1a { diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 1bf22eff0b08..0f1f6421845f 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -22,6 +22,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -45,6 +46,7 @@ static const unsigned short normal_i2c[] = { #define JC42_REG_TEMP 0x05 #define JC42_REG_MANID 0x06 #define JC42_REG_DEVICEID 0x07 +#define JC42_REG_SMBUS 0x22 /* NXP and Atmel, possibly others? */ /* Status bits in temperature register */ #define JC42_ALARM_CRIT_BIT 15 @@ -73,6 +75,9 @@ static const unsigned short normal_i2c[] = { #define ONS_MANID 0x1b09 /* ON Semiconductor */ #define STM_MANID 0x104a /* ST Microelectronics */ +/* SMBUS register */ +#define SMBUS_STMOUT BIT(7) /* SMBus time-out, active low */ + /* Supported chips */ /* Analog Devices */ @@ -476,6 +481,22 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) data->extended = !!(cap & JC42_CAP_RANGE); + if (device_property_read_bool(dev, "smbus-timeout-disable")) { + int smbus; + + /* + * Not all chips support this register, but from a + * quick read of various datasheets no chip appears + * incompatible with the below attempt to disable + * the timeout. And the whole thing is opt-in... + */ + smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS); + if (smbus < 0) + return smbus; + i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS, + smbus | SMBUS_STMOUT); + } + config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG); if (config < 0) return config; -- GitLab From f425b050254ed8ccd996bb0a943b5d7d8259ac72 Mon Sep 17 00:00:00 2001 From: Jeff Lien Date: Tue, 21 Nov 2017 10:44:37 -0600 Subject: [PATCH 0026/1398] nvme-pci: add quirk for delay before CHK RDY for WDC SN200 commit 8c97eeccf0ad8783c057830119467b877bdfced7 upstream. And increase the existing delay to cover this device as well. Signed-off-by: Jeff Lien Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/pci.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8edafd8cb8ce..5c52a6182765 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -84,7 +84,7 @@ enum nvme_quirks { * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was * found empirically. */ -#define NVME_QUIRK_DELAY_AMOUNT 2000 +#define NVME_QUIRK_DELAY_AMOUNT 2300 enum nvme_ctrl_state { NVME_CTRL_NEW, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 54ea90f89b70..e48ecb9303ca 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2109,6 +2109,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, -- GitLab From b0f63389dab9c7fe819e898f69b63c686e491f28 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Nov 2017 17:19:29 -0500 Subject: [PATCH 0027/1398] Revert "drm/radeon: dont switch vt on suspend" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18c437caa5b18a235dd65cec224eab54bebcee65 upstream. Fixes distorted colors on some cards on resume from suspend. This reverts commit b9729b17a414f99c61f4db9ac9f9ed987fa0cbfe. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=98832 Bug: https://bugs.freedesktop.org/show_bug.cgi?id=99163 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=107001 Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 0daad446d2c7..af84705b82ed 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -252,7 +252,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { -- GitLab From d316675a9da4c2bb28a247ad5e1044e5bd0daaf7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Sep 2017 11:13:28 +0300 Subject: [PATCH 0028/1398] drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40a9960b046290939b56ce8e51f365258f27f264 upstream. We shifted some code around in commit 9cca0b8e5df0 ("drm/amdgpu: move amdgpu_cs_sysvm_access_required into find_mapping") and now my static checker complains that "r" might not be initialized at the end of the function. I've reviewed the code, and that seems possible, but it's also possible I may have missed something. Reviewed-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 743a12df6971..3bb2b9b5ef9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -648,7 +648,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r, idx = 0; + int i, r = 0, idx = 0; r = amdgpu_cs_sysvm_access_required(p); if (r) -- GitLab From 9cd48ba5fc9d1699fce31b625c32fd8bc77a28c0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Sep 2017 11:14:13 +0300 Subject: [PATCH 0029/1398] drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 78aa02c713fcf19e9bc8511ab61a5fd6c877cc01 upstream. After commit ea09729c9302 ("drm/amdgpu: rework page directory filling v2") then it becomes a lot harder to verify that "r" is initialized. My static checker complains and so I've reviewed the code. It does look like it might be buggy... Anyway, it doesn't hurt to set "r" to zero at the start. Reviewed-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 968c4260d7a7..47503759906b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -744,7 +744,7 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - int r; + int r = 0; r = amdgpu_vm_update_pd_or_shadow(adev, vm, true); if (r) -- GitLab From 3405805db8bbe72e831059f93775e7048d5f742d Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Mon, 30 Oct 2017 11:56:13 +0100 Subject: [PATCH 0030/1398] drm/radeon: fix atombios on big endian commit 4f626a4ac8f57ddabf06d03870adab91e463217f upstream. The function for byteswapping the data send to/from atombios was buggy for num_bytes not divisible by four. The function must be aware of the fact that after byte-swapping the u32 units, valid bytes might end up after the num_bytes boundary. This patch was tested on kernel 3.12 and allowed us to sucesfully use DisplayPort on and Radeon SI card. Namely it fixed the link training and EDID readout. The function is patched both in radeon and amd drivers, since the functions and the fixes are identical. Signed-off-by: Roman Kapl Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 38 ++++++++++---------- drivers/gpu/drm/radeon/atombios_dp.c | 38 ++++++++++---------- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index f8fdbd1378a7..26afdffab5a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1788,34 +1788,32 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); } -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 432cb46f6a34..fd7682bf335d 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -45,34 +45,32 @@ static char *pre_emph_names[] = { /***** radeon AUX functions *****/ -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); -- GitLab From 25abe3a9585eeadbbb65851c2082f232f1b6ee40 Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 7 Aug 2017 21:55:45 +1000 Subject: [PATCH 0031/1398] drm/panel: simple: Add missing panel_simple_unprepare() calls commit f3621a8eb59a913612c8e6e37d81f16b649f8b6c upstream. During panel removal or system shutdown panel_simple_disable() is called which disables the panel backlight but the panel is still powered due to missing calls to panel_simple_unprepare(). Fixes: d02fd93e2cd8 ("drm/panel: simple - Disable panel on shutdown") Signed-off-by: Jonathan Liu Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20170807115545.27747-1-net147@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panel/panel-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 27cb42467b20..6f65846b1783 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -369,6 +369,7 @@ static int panel_simple_remove(struct device *dev) drm_panel_remove(&panel->base); panel_simple_disable(&panel->base); + panel_simple_unprepare(&panel->base); if (panel->ddc) put_device(&panel->ddc->dev); @@ -384,6 +385,7 @@ static void panel_simple_shutdown(struct device *dev) struct panel_simple *panel = dev_get_drvdata(dev); panel_simple_disable(&panel->base); + panel_simple_unprepare(&panel->base); } static const struct drm_display_mode ampire_am800480r3tmqwa1h_mode = { -- GitLab From 44df87e6dfadc423407f2b7a546396b86a37d462 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 15 Aug 2017 15:14:25 +0100 Subject: [PATCH 0032/1398] drm/hisilicon: Ensure LDI regs are properly configured. commit a2f042430784d86eb2b7a6d2a869f552da30edba upstream. This patch fixes the following soft lockup: BUG: soft lockup - CPU#0 stuck for 23s! [weston:307] On weston idle-timeout the IP is powered down and reset asserted. On weston resume we get a massive vblank IRQ storm due to the LDI registers having lost some state. This state loss is caused by ade_crtc_atomic_begin() not calling ade_ldi_set_mode(). With this patch applied resuming from Weston idle-timeout works well. Signed-off-by: Peter Griffin Tested-by: John Stultz Reviewed-by: Xinliang Liu Signed-off-by: Xinliang Liu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 7e7a4d43d6b6..0f563c954520 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -521,9 +521,12 @@ static void ade_crtc_atomic_begin(struct drm_crtc *crtc, { struct ade_crtc *acrtc = to_ade_crtc(crtc); struct ade_hw_ctx *ctx = acrtc->ctx; + struct drm_display_mode *mode = &crtc->state->mode; + struct drm_display_mode *adj_mode = &crtc->state->adjusted_mode; if (!ctx->power_on) (void)ade_power_up(ctx); + ade_ldi_set_mode(acrtc, mode, adj_mode); } static void ade_crtc_atomic_flush(struct drm_crtc *crtc, -- GitLab From 5b1c8c96dfab3a15fafcbb573e98d17a12fbfcbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 30 Oct 2017 14:57:43 +0100 Subject: [PATCH 0033/1398] drm/ttm: once more fix ttm_buffer_object_transfer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4d98e5ee6084f6d7bc578c5d5f86de7156aaa4cb upstream. When the mutex is locked just in the moment we copy it we end up with a warning that we release a locked mutex. Fix this by properly reinitializing the mutex. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index bf6e21655c57..7d22f9874d5f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -473,6 +473,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); INIT_LIST_HEAD(&fbo->io_reserve_lru); + mutex_init(&fbo->wu_mutex); fbo->moving = NULL; drm_vma_node_reset(&fbo->vma_node); atomic_set(&fbo->cpu_writers, 0); -- GitLab From fb541279bbe07a2e14ee9324c336984ae23e23c3 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 17 Nov 2017 16:41:16 +0800 Subject: [PATCH 0034/1398] drm/amd/pp: fix typecast error in powerplay. commit 8d8258bdab735d9f3c4b78e091ecfbb2b2b1f2ca upstream. resulted in unexpected data truncation Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index 4477c55a58e3..a8b59b3decd8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -850,9 +850,9 @@ static int init_over_drive_limits( const ATOM_Tonga_POWERPLAYTABLE *powerplay_table) { hwmgr->platform_descriptor.overdriveLimit.engineClock = - le16_to_cpu(powerplay_table->ulMaxODEngineClock); + le32_to_cpu(powerplay_table->ulMaxODEngineClock); hwmgr->platform_descriptor.overdriveLimit.memoryClock = - le16_to_cpu(powerplay_table->ulMaxODMemoryClock); + le32_to_cpu(powerplay_table->ulMaxODMemoryClock); hwmgr->platform_descriptor.minOverdriveVDDC = 0; hwmgr->platform_descriptor.maxOverdriveVDDC = 0; -- GitLab From a11ca51bf70c81240c2c17af66813e549abb691a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Dec 2017 12:59:57 +0100 Subject: [PATCH 0035/1398] Revert "x86/entry/64: Add missing irqflags tracing to native_load_gs_index()" This reverts commit 0d794d0d018f23fb09c50f6ae26868bd6ae343d6 which is commit 0d794d0d018f23fb09c50f6ae26868bd6ae343d6 upstream. Andy writes: I think the thing to do is to revert the patch from -stable. The bug it fixes is very minor, and the regression is that it made a pre-existing bug in some nearly-undebuggable core resume code much easier to hit. I don't feel comfortable with a backport of the latter fix until it has a good long soak in Linus' tree. Reported-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index be9df513141e..e7b0e7ff4c58 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -54,19 +54,15 @@ ENTRY(native_usergs_sysret64) ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_FLAGS flags:req +.macro TRACE_IRQS_IRETQ #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, \flags /* interrupts off? */ + bt $9, EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: #endif .endm -.macro TRACE_IRQS_IRETQ - TRACE_IRQS_FLAGS EFLAGS(%rsp) -.endm - /* * When dynamic function tracer is enabled it will add a breakpoint * to all locations that it is about to modify, sync CPUs, update @@ -872,13 +868,11 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 ENTRY(native_load_gs_index) pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) - TRACE_IRQS_OFF SWAPGS .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS - TRACE_IRQS_FLAGS (%rsp) popfq ret END(native_load_gs_index) -- GitLab From daf3a68d52826e697eb20c1f290aa198d43dfad5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 25 Aug 2017 17:34:41 +1000 Subject: [PATCH 0036/1398] NFS: revalidate "." etc correctly on "open". commit b688741cb06695312f18b730653d6611e1bad28d upstream. For correct close-to-open semantics, NFS must validate the change attribute of a directory (or file) on open. Since commit ecf3d1f1aa74 ("vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op"), open() of "." or a path ending ".." is not revalidated reliably (except when that direct is a mount point). Prior to that commit, "." was revalidated using nfs_lookup_revalidate() which checks the LOOKUP_OPEN flag and forces revalidation if the flag is set. Since that commit, nfs_weak_revalidate() is used for NFSv3 (which ignores the flags) and nothing is used for NFSv4. This is fixed by using nfs_lookup_verify_inode() in nfs_weak_revalidate(). This does the revalidation exactly when needed. Also, add a definition of .d_weak_revalidate for NFSv4. The incorrect behavior is easily demonstrated by running "echo *" in some non-mountpoint NFS directory while watching network traffic. Without this patch, "echo *" sometimes doesn't produce any traffic. With the patch it always does. Fixes: ecf3d1f1aa74 ("vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op") cc: stable@vger.kernel.org (3.9+) Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d04ec3814779..65566d5fcf39 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1292,7 +1292,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1443,6 +1443,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, -- GitLab From 838cdb26a538aa83326639f53ce4f8112f23dd96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Nov 2017 21:41:56 +0200 Subject: [PATCH 0037/1398] drm/i915: Don't try indexed reads to alternate slave addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ae5c631e605a452a5a0e73205a92810c01ed954b upstream. We can only specify the one slave address to indexed reads/writes. Make sure the messages we check are destined to the same slave address before deciding to do an indexed transfer. Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit c4deb62d7821672265b87952bcd1c808f3bf3e8f) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 79aab9ad6faa..f8b406b88c77 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -430,6 +430,7 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && + msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && (msgs[i + 1].flags & I2C_M_RD)); } -- GitLab From f990312aaa74faf14d00a1b050c55a66dfe3153a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Nov 2017 21:41:57 +0200 Subject: [PATCH 0038/1398] drm/i915: Prevent zero length "index" write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56350fb8978bbf4aafe08f21234e161dd128b417 upstream. The hardware always writes one or two bytes in the index portion of an indexed transfer. Make sure the message we send as the index doesn't have a zero length. Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit bb9e0d4bca50f429152e74a459160b41f3d60fb2) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_i2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index f8b406b88c77..6769aa1b6922 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -431,7 +431,8 @@ gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } -- GitLab From 284bbc782445283e9a5124666dda8010f379f179 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 Dec 2017 11:24:35 +0100 Subject: [PATCH 0039/1398] Linux 4.9.67 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8e62f9e2a08c..70546af61a0a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 66 +SUBLEVEL = 67 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 56256bbc423fe67075b0bd2f284b4a12c1a7798b Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 21 May 2015 07:47:09 +0200 Subject: [PATCH 0040/1398] BACKPORT: dt/bindings: add bindings for optee Introduces linaro prefix and adds bindings for ARM TrustZone based OP-TEE implementation. Change-Id: I8992ce7476d3cb232600f1691ef698f7403cdc62 Acked-by: Rob Herring Signed-off-by: Jens Wiklander (cherry picked from commit c8bfafb1594435889b571b79325011e8b7fd087b) Signed-off-by: Victor Chong --- .../bindings/arm/firmware/linaro,optee-tz.txt | 31 +++++++++++++++++++ .../devicetree/bindings/vendor-prefixes.txt | 1 + 2 files changed, 32 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt new file mode 100644 index 000000000000..d38834c67dff --- /dev/null +++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt @@ -0,0 +1,31 @@ +OP-TEE Device Tree Bindings + +OP-TEE is a piece of software using hardware features to provide a Trusted +Execution Environment. The security can be provided with ARM TrustZone, but +also by virtualization or a separate chip. + +We're using "linaro" as the first part of the compatible property for +the reference implementation maintained by Linaro. + +* OP-TEE based on ARM TrustZone required properties: + +- compatible : should contain "linaro,optee-tz" + +- method : The method of calling the OP-TEE Trusted OS. Permitted + values are: + + "smc" : SMC #0, with the register assignments specified + in drivers/tee/optee/optee_smc.h + + "hvc" : HVC #0, with the register assignments specified + in drivers/tee/optee/optee_smc.h + + + +Example: + firmware { + optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index bceffffb7502..d0526e05aea3 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -156,6 +156,7 @@ lacie LaCie lantiq Lantiq Semiconductor lenovo Lenovo Group Ltd. lg LG Corporation +linaro Linaro Limited linux Linux-specific binding lltc Linear Technology Corporation lsi LSI Corp. (LSI Logic) -- GitLab From fb938966c7d784b4c3b952cf3287e91d7c7dabdc Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 11 Mar 2015 14:39:39 +0100 Subject: [PATCH 0041/1398] BACKPORT: tee: generic TEE subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial patch for generic TEE subsystem. This subsystem provides: * Registration/un-registration of TEE drivers. * Shared memory between normal world and secure world. * Ioctl interface for interaction with user space. * Sysfs implementation_id of TEE driver A TEE (Trusted Execution Environment) driver is a driver that interfaces with a trusted OS running in some secure environment, for example, TrustZone on ARM cpus, or a separate secure co-processor etc. The TEE subsystem can serve a TEE driver for a Global Platform compliant TEE, but it's not limited to only Global Platform TEEs. This patch builds on other similar implementations trying to solve the same problem: * "optee_linuxdriver" by among others Jean-michel DELORME and Emmanuel MICHEL * "Generic TrustZone Driver" by Javier González Change-Id: I2336be6189bafb63ca0f49e0a1b16ec75b4514ac Acked-by: Andreas Dannenberg Tested-by: Jerome Forissier (HiKey) Tested-by: Volodymyr Babchuk (RCAR H3) Tested-by: Scott Branden Reviewed-by: Javier González Signed-off-by: Jens Wiklander (cherry picked from commit 967c9cca2cc50569efc65945325c173cecba83bd) Signed-off-by: Victor Chong --- Documentation/ioctl/ioctl-number.txt | 1 + MAINTAINERS | 7 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/tee/Kconfig | 8 + drivers/tee/Makefile | 4 + drivers/tee/tee_core.c | 893 +++++++++++++++++++++++++++ drivers/tee/tee_private.h | 129 ++++ drivers/tee/tee_shm.c | 358 +++++++++++ drivers/tee/tee_shm_pool.c | 156 +++++ include/linux/tee_drv.h | 277 +++++++++ include/uapi/linux/tee.h | 346 +++++++++++ 12 files changed, 2182 insertions(+) create mode 100644 drivers/tee/Kconfig create mode 100644 drivers/tee/Makefile create mode 100644 drivers/tee/tee_core.c create mode 100644 drivers/tee/tee_private.h create mode 100644 drivers/tee/tee_shm.c create mode 100644 drivers/tee/tee_shm_pool.c create mode 100644 include/linux/tee_drv.h create mode 100644 include/uapi/linux/tee.h diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 81c7f2bb7daf..efb38da700c8 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -308,6 +308,7 @@ Code Seq#(hex) Include File Comments 0xA3 80-8F Port ACL in development: 0xA3 90-9F linux/dtlk.h +0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem 0xAA 00-3F linux/uapi/linux/userfaultfd.h 0xAB 00-1F linux/nbd.h 0xAC 00-1F linux/raw.h diff --git a/MAINTAINERS b/MAINTAINERS index 63cefa62324c..13daf6829fda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10655,6 +10655,13 @@ F: drivers/hwtracing/stm/ F: include/linux/stm.h F: include/uapi/linux/stm.h +TEE SUBSYSTEM +M: Jens Wiklander +S: Maintained +F: include/linux/tee_drv.h +F: include/uapi/linux/tee.h +F: drivers/tee/ + THUNDERBOLT DRIVER M: Andreas Noever S: Maintained diff --git a/drivers/Kconfig b/drivers/Kconfig index e1e2066cecdb..de581c13ec9a 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -202,4 +202,6 @@ source "drivers/hwtracing/intel_th/Kconfig" source "drivers/fpga/Kconfig" +source "drivers/tee/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 733bf0b2613f..9a90575c3f38 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -174,3 +174,4 @@ obj-$(CONFIG_STM) += hwtracing/stm/ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_NVMEM) += nvmem/ obj-$(CONFIG_FPGA) += fpga/ +obj-$(CONFIG_TEE) += tee/ diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig new file mode 100644 index 000000000000..50c244ead46d --- /dev/null +++ b/drivers/tee/Kconfig @@ -0,0 +1,8 @@ +# Generic Trusted Execution Environment Configuration +config TEE + tristate "Trusted Execution Environment support" + select DMA_SHARED_BUFFER + select GENERIC_ALLOCATOR + help + This implements a generic interface towards a Trusted Execution + Environment (TEE). diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile new file mode 100644 index 000000000000..ec64047a86e2 --- /dev/null +++ b/drivers/tee/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_TEE) += tee.o +tee-objs += tee_core.o +tee-objs += tee_shm.o +tee-objs += tee_shm_pool.o diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c new file mode 100644 index 000000000000..5c60bf4423e6 --- /dev/null +++ b/drivers/tee/tee_core.c @@ -0,0 +1,893 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "tee_private.h" + +#define TEE_NUM_DEVICES 32 + +#define TEE_IOCTL_PARAM_SIZE(x) (sizeof(struct tee_param) * (x)) + +/* + * Unprivileged devices in the lower half range and privileged devices in + * the upper half range. + */ +static DECLARE_BITMAP(dev_mask, TEE_NUM_DEVICES); +static DEFINE_SPINLOCK(driver_lock); + +static struct class *tee_class; +static dev_t tee_devt; + +static int tee_open(struct inode *inode, struct file *filp) +{ + int rc; + struct tee_device *teedev; + struct tee_context *ctx; + + teedev = container_of(inode->i_cdev, struct tee_device, cdev); + if (!tee_device_get(teedev)) + return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto err; + } + + ctx->teedev = teedev; + INIT_LIST_HEAD(&ctx->list_shm); + filp->private_data = ctx; + rc = teedev->desc->ops->open(ctx); + if (rc) + goto err; + + return 0; +err: + kfree(ctx); + tee_device_put(teedev); + return rc; +} + +static int tee_release(struct inode *inode, struct file *filp) +{ + struct tee_context *ctx = filp->private_data; + struct tee_device *teedev = ctx->teedev; + struct tee_shm *shm; + + ctx->teedev->desc->ops->release(ctx); + mutex_lock(&ctx->teedev->mutex); + list_for_each_entry(shm, &ctx->list_shm, link) + shm->ctx = NULL; + mutex_unlock(&ctx->teedev->mutex); + kfree(ctx); + tee_device_put(teedev); + return 0; +} + +static int tee_ioctl_version(struct tee_context *ctx, + struct tee_ioctl_version_data __user *uvers) +{ + struct tee_ioctl_version_data vers; + + ctx->teedev->desc->ops->get_version(ctx->teedev, &vers); + if (copy_to_user(uvers, &vers, sizeof(vers))) + return -EFAULT; + return 0; +} + +static int tee_ioctl_shm_alloc(struct tee_context *ctx, + struct tee_ioctl_shm_alloc_data __user *udata) +{ + long ret; + struct tee_ioctl_shm_alloc_data data; + struct tee_shm *shm; + + if (copy_from_user(&data, udata, sizeof(data))) + return -EFAULT; + + /* Currently no input flags are supported */ + if (data.flags) + return -EINVAL; + + data.id = -1; + + shm = tee_shm_alloc(ctx, data.size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + data.id = shm->id; + data.flags = shm->flags; + data.size = shm->size; + + if (copy_to_user(udata, &data, sizeof(data))) + ret = -EFAULT; + else + ret = tee_shm_get_fd(shm); + + /* + * When user space closes the file descriptor the shared memory + * should be freed or if tee_shm_get_fd() failed then it will + * be freed immediately. + */ + tee_shm_put(shm); + return ret; +} + +static int params_from_user(struct tee_context *ctx, struct tee_param *params, + size_t num_params, + struct tee_ioctl_param __user *uparams) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_shm *shm; + struct tee_ioctl_param ip; + + if (copy_from_user(&ip, uparams + n, sizeof(ip))) + return -EFAULT; + + /* All unused attribute bits has to be zero */ + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK) + return -EINVAL; + + params[n].attr = ip.attr; + switch (ip.attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_NONE: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + params[n].u.value.a = ip.a; + params[n].u.value.b = ip.b; + params[n].u.value.c = ip.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + /* + * If we fail to get a pointer to a shared memory + * object (and increase the ref count) from an + * identifier we return an error. All pointers that + * has been added in params have an increased ref + * count. It's the callers responibility to do + * tee_shm_put() on all resolved pointers. + */ + shm = tee_shm_get_from_id(ctx, ip.c); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + params[n].u.memref.shm_offs = ip.a; + params[n].u.memref.size = ip.b; + params[n].u.memref.shm = shm; + break; + default: + /* Unknown attribute */ + return -EINVAL; + } + } + return 0; +} + +static int params_to_user(struct tee_ioctl_param __user *uparams, + size_t num_params, struct tee_param *params) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_ioctl_param __user *up = uparams + n; + struct tee_param *p = params + n; + + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + if (put_user(p->u.value.a, &up->a) || + put_user(p->u.value.b, &up->b) || + put_user(p->u.value.c, &up->c)) + return -EFAULT; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + if (put_user((u64)p->u.memref.size, &up->b)) + return -EFAULT; + default: + break; + } + } + return 0; +} + +static bool param_is_memref(struct tee_param *param) +{ + switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + return true; + default: + return false; + } +} + +static int tee_ioctl_open_session(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + size_t n; + struct tee_ioctl_buf_data buf; + struct tee_ioctl_open_session_arg __user *uarg; + struct tee_ioctl_open_session_arg arg; + struct tee_ioctl_param __user *uparams = NULL; + struct tee_param *params = NULL; + bool have_session = false; + + if (!ctx->teedev->desc->ops->open_session) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_ioctl_open_session_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len) + return -EINVAL; + + if (arg.num_params) { + params = kcalloc(arg.num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) + return -ENOMEM; + uparams = uarg->params; + rc = params_from_user(ctx, params, arg.num_params, uparams); + if (rc) + goto out; + } + + rc = ctx->teedev->desc->ops->open_session(ctx, &arg, params); + if (rc) + goto out; + have_session = true; + + if (put_user(arg.session, &uarg->session) || + put_user(arg.ret, &uarg->ret) || + put_user(arg.ret_origin, &uarg->ret_origin)) { + rc = -EFAULT; + goto out; + } + rc = params_to_user(uparams, arg.num_params, params); +out: + /* + * If we've succeeded to open the session but failed to communicate + * it back to user space, close the session again to avoid leakage. + */ + if (rc && have_session && ctx->teedev->desc->ops->close_session) + ctx->teedev->desc->ops->close_session(ctx, arg.session); + + if (params) { + /* Decrease ref count for all valid shared memory pointers */ + for (n = 0; n < arg.num_params; n++) + if (param_is_memref(params + n) && + params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + kfree(params); + } + + return rc; +} + +static int tee_ioctl_invoke(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + size_t n; + struct tee_ioctl_buf_data buf; + struct tee_ioctl_invoke_arg __user *uarg; + struct tee_ioctl_invoke_arg arg; + struct tee_ioctl_param __user *uparams = NULL; + struct tee_param *params = NULL; + + if (!ctx->teedev->desc->ops->invoke_func) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_ioctl_invoke_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len) + return -EINVAL; + + if (arg.num_params) { + params = kcalloc(arg.num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) + return -ENOMEM; + uparams = uarg->params; + rc = params_from_user(ctx, params, arg.num_params, uparams); + if (rc) + goto out; + } + + rc = ctx->teedev->desc->ops->invoke_func(ctx, &arg, params); + if (rc) + goto out; + + if (put_user(arg.ret, &uarg->ret) || + put_user(arg.ret_origin, &uarg->ret_origin)) { + rc = -EFAULT; + goto out; + } + rc = params_to_user(uparams, arg.num_params, params); +out: + if (params) { + /* Decrease ref count for all valid shared memory pointers */ + for (n = 0; n < arg.num_params; n++) + if (param_is_memref(params + n) && + params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + kfree(params); + } + return rc; +} + +static int tee_ioctl_cancel(struct tee_context *ctx, + struct tee_ioctl_cancel_arg __user *uarg) +{ + struct tee_ioctl_cancel_arg arg; + + if (!ctx->teedev->desc->ops->cancel_req) + return -EINVAL; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + return ctx->teedev->desc->ops->cancel_req(ctx, arg.cancel_id, + arg.session); +} + +static int +tee_ioctl_close_session(struct tee_context *ctx, + struct tee_ioctl_close_session_arg __user *uarg) +{ + struct tee_ioctl_close_session_arg arg; + + if (!ctx->teedev->desc->ops->close_session) + return -EINVAL; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + return ctx->teedev->desc->ops->close_session(ctx, arg.session); +} + +static int params_to_supp(struct tee_context *ctx, + struct tee_ioctl_param __user *uparams, + size_t num_params, struct tee_param *params) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_ioctl_param ip; + struct tee_param *p = params + n; + + ip.attr = p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK; + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + ip.a = p->u.value.a; + ip.b = p->u.value.b; + ip.c = p->u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + ip.b = p->u.memref.size; + if (!p->u.memref.shm) { + ip.a = 0; + ip.c = (u64)-1; /* invalid shm id */ + break; + } + ip.a = p->u.memref.shm_offs; + ip.c = p->u.memref.shm->id; + break; + default: + ip.a = 0; + ip.b = 0; + ip.c = 0; + break; + } + + if (copy_to_user(uparams + n, &ip, sizeof(ip))) + return -EFAULT; + } + + return 0; +} + +static int tee_ioctl_supp_recv(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + struct tee_ioctl_buf_data buf; + struct tee_iocl_supp_recv_arg __user *uarg; + struct tee_param *params; + u32 num_params; + u32 func; + + if (!ctx->teedev->desc->ops->supp_recv) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_iocl_supp_recv_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (get_user(num_params, &uarg->num_params)) + return -EFAULT; + + if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) != buf.buf_len) + return -EINVAL; + + params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL); + if (!params) + return -ENOMEM; + + rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params); + if (rc) + goto out; + + if (put_user(func, &uarg->func) || + put_user(num_params, &uarg->num_params)) { + rc = -EFAULT; + goto out; + } + + rc = params_to_supp(ctx, uarg->params, num_params, params); +out: + kfree(params); + return rc; +} + +static int params_from_supp(struct tee_param *params, size_t num_params, + struct tee_ioctl_param __user *uparams) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_param *p = params + n; + struct tee_ioctl_param ip; + + if (copy_from_user(&ip, uparams + n, sizeof(ip))) + return -EFAULT; + + /* All unused attribute bits has to be zero */ + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK) + return -EINVAL; + + p->attr = ip.attr; + switch (ip.attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + /* Only out and in/out values can be updated */ + p->u.value.a = ip.a; + p->u.value.b = ip.b; + p->u.value.c = ip.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + /* + * Only the size of the memref can be updated. + * Since we don't have access to the original + * parameters here, only store the supplied size. + * The driver will copy the updated size into the + * original parameters. + */ + p->u.memref.shm = NULL; + p->u.memref.shm_offs = 0; + p->u.memref.size = ip.b; + break; + default: + memset(&p->u, 0, sizeof(p->u)); + break; + } + } + return 0; +} + +static int tee_ioctl_supp_send(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + long rc; + struct tee_ioctl_buf_data buf; + struct tee_iocl_supp_send_arg __user *uarg; + struct tee_param *params; + u32 num_params; + u32 ret; + + /* Not valid for this driver */ + if (!ctx->teedev->desc->ops->supp_send) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_iocl_supp_send_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (get_user(ret, &uarg->ret) || + get_user(num_params, &uarg->num_params)) + return -EFAULT; + + if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) > buf.buf_len) + return -EINVAL; + + params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL); + if (!params) + return -ENOMEM; + + rc = params_from_supp(params, num_params, uarg->params); + if (rc) + goto out; + + rc = ctx->teedev->desc->ops->supp_send(ctx, ret, num_params, params); +out: + kfree(params); + return rc; +} + +static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct tee_context *ctx = filp->private_data; + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case TEE_IOC_VERSION: + return tee_ioctl_version(ctx, uarg); + case TEE_IOC_SHM_ALLOC: + return tee_ioctl_shm_alloc(ctx, uarg); + case TEE_IOC_OPEN_SESSION: + return tee_ioctl_open_session(ctx, uarg); + case TEE_IOC_INVOKE: + return tee_ioctl_invoke(ctx, uarg); + case TEE_IOC_CANCEL: + return tee_ioctl_cancel(ctx, uarg); + case TEE_IOC_CLOSE_SESSION: + return tee_ioctl_close_session(ctx, uarg); + case TEE_IOC_SUPPL_RECV: + return tee_ioctl_supp_recv(ctx, uarg); + case TEE_IOC_SUPPL_SEND: + return tee_ioctl_supp_send(ctx, uarg); + default: + return -EINVAL; + } +} + +static const struct file_operations tee_fops = { + .owner = THIS_MODULE, + .open = tee_open, + .release = tee_release, + .unlocked_ioctl = tee_ioctl, + .compat_ioctl = tee_ioctl, +}; + +static void tee_release_device(struct device *dev) +{ + struct tee_device *teedev = container_of(dev, struct tee_device, dev); + + spin_lock(&driver_lock); + clear_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + mutex_destroy(&teedev->mutex); + idr_destroy(&teedev->idr); + kfree(teedev); +} + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data) +{ + struct tee_device *teedev; + void *ret; + int rc; + int offs = 0; + + if (!teedesc || !teedesc->name || !teedesc->ops || + !teedesc->ops->get_version || !teedesc->ops->open || + !teedesc->ops->release || !pool) + return ERR_PTR(-EINVAL); + + teedev = kzalloc(sizeof(*teedev), GFP_KERNEL); + if (!teedev) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + if (teedesc->flags & TEE_DESC_PRIVILEGED) + offs = TEE_NUM_DEVICES / 2; + + spin_lock(&driver_lock); + teedev->id = find_next_zero_bit(dev_mask, TEE_NUM_DEVICES, offs); + if (teedev->id < TEE_NUM_DEVICES) + set_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + + if (teedev->id >= TEE_NUM_DEVICES) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + snprintf(teedev->name, sizeof(teedev->name), "tee%s%d", + teedesc->flags & TEE_DESC_PRIVILEGED ? "priv" : "", + teedev->id - offs); + + teedev->dev.class = tee_class; + teedev->dev.release = tee_release_device; + teedev->dev.parent = dev; + + teedev->dev.devt = MKDEV(MAJOR(tee_devt), teedev->id); + + rc = dev_set_name(&teedev->dev, "%s", teedev->name); + if (rc) { + ret = ERR_PTR(rc); + goto err_devt; + } + + cdev_init(&teedev->cdev, &tee_fops); + teedev->cdev.owner = teedesc->owner; + teedev->cdev.kobj.parent = &teedev->dev.kobj; + + dev_set_drvdata(&teedev->dev, driver_data); + device_initialize(&teedev->dev); + + /* 1 as tee_device_unregister() does one final tee_device_put() */ + teedev->num_users = 1; + init_completion(&teedev->c_no_users); + mutex_init(&teedev->mutex); + idr_init(&teedev->idr); + + teedev->desc = teedesc; + teedev->pool = pool; + + return teedev; +err_devt: + unregister_chrdev_region(teedev->dev.devt, 1); +err: + pr_err("could not register %s driver\n", + teedesc->flags & TEE_DESC_PRIVILEGED ? "privileged" : "client"); + if (teedev && teedev->id < TEE_NUM_DEVICES) { + spin_lock(&driver_lock); + clear_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + } + kfree(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_device_alloc); + +static ssize_t implementation_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tee_device *teedev = container_of(dev, struct tee_device, dev); + struct tee_ioctl_version_data vers; + + teedev->desc->ops->get_version(teedev, &vers); + return scnprintf(buf, PAGE_SIZE, "%d\n", vers.impl_id); +} +static DEVICE_ATTR_RO(implementation_id); + +static struct attribute *tee_dev_attrs[] = { + &dev_attr_implementation_id.attr, + NULL +}; + +static const struct attribute_group tee_dev_group = { + .attrs = tee_dev_attrs, +}; + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev) +{ + int rc; + + if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) { + dev_err(&teedev->dev, "attempt to register twice\n"); + return -EINVAL; + } + + rc = cdev_add(&teedev->cdev, teedev->dev.devt, 1); + if (rc) { + dev_err(&teedev->dev, + "unable to cdev_add() %s, major %d, minor %d, err=%d\n", + teedev->name, MAJOR(teedev->dev.devt), + MINOR(teedev->dev.devt), rc); + return rc; + } + + rc = device_add(&teedev->dev); + if (rc) { + dev_err(&teedev->dev, + "unable to device_add() %s, major %d, minor %d, err=%d\n", + teedev->name, MAJOR(teedev->dev.devt), + MINOR(teedev->dev.devt), rc); + goto err_device_add; + } + + rc = sysfs_create_group(&teedev->dev.kobj, &tee_dev_group); + if (rc) { + dev_err(&teedev->dev, + "failed to create sysfs attributes, err=%d\n", rc); + goto err_sysfs_create_group; + } + + teedev->flags |= TEE_DEVICE_FLAG_REGISTERED; + return 0; + +err_sysfs_create_group: + device_del(&teedev->dev); +err_device_add: + cdev_del(&teedev->cdev); + return rc; +} +EXPORT_SYMBOL_GPL(tee_device_register); + +void tee_device_put(struct tee_device *teedev) +{ + mutex_lock(&teedev->mutex); + /* Shouldn't put in this state */ + if (!WARN_ON(!teedev->desc)) { + teedev->num_users--; + if (!teedev->num_users) { + teedev->desc = NULL; + complete(&teedev->c_no_users); + } + } + mutex_unlock(&teedev->mutex); +} + +bool tee_device_get(struct tee_device *teedev) +{ + mutex_lock(&teedev->mutex); + if (!teedev->desc) { + mutex_unlock(&teedev->mutex); + return false; + } + teedev->num_users++; + mutex_unlock(&teedev->mutex); + return true; +} + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev) +{ + if (!teedev) + return; + + if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) { + sysfs_remove_group(&teedev->dev.kobj, &tee_dev_group); + cdev_del(&teedev->cdev); + device_del(&teedev->dev); + } + + tee_device_put(teedev); + wait_for_completion(&teedev->c_no_users); + + /* + * No need to take a mutex any longer now since teedev->desc was + * set to NULL before teedev->c_no_users was completed. + */ + + teedev->pool = NULL; + + put_device(&teedev->dev); +} +EXPORT_SYMBOL_GPL(tee_device_unregister); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @teedev: Device containing the driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev) +{ + return dev_get_drvdata(&teedev->dev); +} +EXPORT_SYMBOL_GPL(tee_get_drvdata); + +static int __init tee_init(void) +{ + int rc; + + tee_class = class_create(THIS_MODULE, "tee"); + if (IS_ERR(tee_class)) { + pr_err("couldn't create class\n"); + return PTR_ERR(tee_class); + } + + rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee"); + if (rc) { + pr_err("failed to allocate char dev region\n"); + class_destroy(tee_class); + tee_class = NULL; + } + + return rc; +} + +static void __exit tee_exit(void) +{ + class_destroy(tee_class); + tee_class = NULL; + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); +} + +subsys_initcall(tee_init); +module_exit(tee_exit); + +MODULE_AUTHOR("Linaro"); +MODULE_DESCRIPTION("TEE Driver"); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h new file mode 100644 index 000000000000..21cb6be8bce9 --- /dev/null +++ b/drivers/tee/tee_private.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef TEE_PRIVATE_H +#define TEE_PRIVATE_H + +#include +#include +#include +#include +#include +#include + +struct tee_device; + +/** + * struct tee_shm - shared memory object + * @teedev: device used to allocate the object + * @ctx: context using the object, if NULL the context is gone + * @link link element + * @paddr: physical address of the shared memory + * @kaddr: virtual address of the shared memory + * @size: size of shared memory + * @dmabuf: dmabuf used to for exporting to user space + * @flags: defined by TEE_SHM_* in tee_drv.h + * @id: unique id of a shared memory object on this device + */ +struct tee_shm { + struct tee_device *teedev; + struct tee_context *ctx; + struct list_head link; + phys_addr_t paddr; + void *kaddr; + size_t size; + struct dma_buf *dmabuf; + u32 flags; + int id; +}; + +struct tee_shm_pool_mgr; + +/** + * struct tee_shm_pool_mgr_ops - shared memory pool manager operations + * @alloc: called when allocating shared memory + * @free: called when freeing shared memory + */ +struct tee_shm_pool_mgr_ops { + int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, + size_t size); + void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); +}; + +/** + * struct tee_shm_pool_mgr - shared memory manager + * @ops: operations + * @private_data: private data for the shared memory manager + */ +struct tee_shm_pool_mgr { + const struct tee_shm_pool_mgr_ops *ops; + void *private_data; +}; + +/** + * struct tee_shm_pool - shared memory pool + * @private_mgr: pool manager for shared memory only between kernel + * and secure world + * @dma_buf_mgr: pool manager for shared memory exported to user space + * @destroy: called when destroying the pool + * @private_data: private data for the pool + */ +struct tee_shm_pool { + struct tee_shm_pool_mgr private_mgr; + struct tee_shm_pool_mgr dma_buf_mgr; + void (*destroy)(struct tee_shm_pool *pool); + void *private_data; +}; + +#define TEE_DEVICE_FLAG_REGISTERED 0x1 +#define TEE_MAX_DEV_NAME_LEN 32 + +/** + * struct tee_device - TEE Device representation + * @name: name of device + * @desc: description of device + * @id: unique id of device + * @flags: represented by TEE_DEVICE_FLAG_REGISTERED above + * @dev: embedded basic device structure + * @cdev: embedded cdev + * @num_users: number of active users of this device + * @c_no_user: completion used when unregistering the device + * @mutex: mutex protecting @num_users and @idr + * @idr: register of shared memory object allocated on this device + * @pool: shared memory pool + */ +struct tee_device { + char name[TEE_MAX_DEV_NAME_LEN]; + const struct tee_desc *desc; + int id; + unsigned int flags; + + struct device dev; + struct cdev cdev; + + size_t num_users; + struct completion c_no_users; + struct mutex mutex; /* protects num_users and idr */ + + struct idr idr; + struct tee_shm_pool *pool; +}; + +int tee_shm_init(void); + +int tee_shm_get_fd(struct tee_shm *shm); + +bool tee_device_get(struct tee_device *teedev); +void tee_device_put(struct tee_device *teedev); + +#endif /*TEE_PRIVATE_H*/ diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c new file mode 100644 index 000000000000..0be1e3e93bee --- /dev/null +++ b/drivers/tee/tee_shm.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include "tee_private.h" + +static void tee_shm_release(struct tee_shm *shm) +{ + struct tee_device *teedev = shm->teedev; + struct tee_shm_pool_mgr *poolm; + + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + if (shm->ctx) + list_del(&shm->link); + mutex_unlock(&teedev->mutex); + + if (shm->flags & TEE_SHM_DMA_BUF) + poolm = &teedev->pool->dma_buf_mgr; + else + poolm = &teedev->pool->private_mgr; + + poolm->ops->free(poolm, shm); + kfree(shm); + + tee_device_put(teedev); +} + +static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment + *attach, enum dma_data_direction dir) +{ + return NULL; +} + +static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *table, + enum dma_data_direction dir) +{ +} + +static void tee_shm_op_release(struct dma_buf *dmabuf) +{ + struct tee_shm *shm = dmabuf->priv; + + tee_shm_release(shm); +} + +static void *tee_shm_op_kmap_atomic(struct dma_buf *dmabuf, unsigned long pgnum) +{ + return NULL; +} + +static void *tee_shm_op_kmap(struct dma_buf *dmabuf, unsigned long pgnum) +{ + return NULL; +} + +static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + struct tee_shm *shm = dmabuf->priv; + size_t size = vma->vm_end - vma->vm_start; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static struct dma_buf_ops tee_shm_dma_buf_ops = { + .map_dma_buf = tee_shm_op_map_dma_buf, + .unmap_dma_buf = tee_shm_op_unmap_dma_buf, + .release = tee_shm_op_release, + .kmap_atomic = tee_shm_op_kmap_atomic, + .kmap = tee_shm_op_kmap, + .mmap = tee_shm_op_mmap, +}; + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* in . TEE_SHM_MAPPED must currently always be + * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and + * associated with a dma-buf handle, else driver private memory. + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) +{ + struct tee_device *teedev = ctx->teedev; + struct tee_shm_pool_mgr *poolm = NULL; + struct tee_shm *shm; + void *ret; + int rc; + + if (!(flags & TEE_SHM_MAPPED)) { + dev_err(teedev->dev.parent, + "only mapped allocations supported\n"); + return ERR_PTR(-EINVAL); + } + + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); + return ERR_PTR(-EINVAL); + } + + if (!tee_device_get(teedev)) + return ERR_PTR(-EINVAL); + + if (!teedev->pool) { + /* teedev has been detached from driver */ + ret = ERR_PTR(-EINVAL); + goto err_dev_put; + } + + shm = kzalloc(sizeof(*shm), GFP_KERNEL); + if (!shm) { + ret = ERR_PTR(-ENOMEM); + goto err_dev_put; + } + + shm->flags = flags; + shm->teedev = teedev; + shm->ctx = ctx; + if (flags & TEE_SHM_DMA_BUF) + poolm = &teedev->pool->dma_buf_mgr; + else + poolm = &teedev->pool->private_mgr; + + rc = poolm->ops->alloc(poolm, shm, size); + if (rc) { + ret = ERR_PTR(rc); + goto err_kfree; + } + + mutex_lock(&teedev->mutex); + shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); + mutex_unlock(&teedev->mutex); + if (shm->id < 0) { + ret = ERR_PTR(shm->id); + goto err_pool_free; + } + + if (flags & TEE_SHM_DMA_BUF) { + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &tee_shm_dma_buf_ops; + exp_info.size = shm->size; + exp_info.flags = O_RDWR; + exp_info.priv = shm; + + shm->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(shm->dmabuf)) { + ret = ERR_CAST(shm->dmabuf); + goto err_rem; + } + } + mutex_lock(&teedev->mutex); + list_add_tail(&shm->link, &ctx->list_shm); + mutex_unlock(&teedev->mutex); + + return shm; +err_rem: + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + mutex_unlock(&teedev->mutex); +err_pool_free: + poolm->ops->free(poolm, shm); +err_kfree: + kfree(shm); +err_dev_put: + tee_device_put(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_shm_alloc); + +/** + * tee_shm_get_fd() - Increase reference count and return file descriptor + * @shm: Shared memory handle + * @returns user space file descriptor to shared memory + */ +int tee_shm_get_fd(struct tee_shm *shm) +{ + u32 req_flags = TEE_SHM_MAPPED | TEE_SHM_DMA_BUF; + int fd; + + if ((shm->flags & req_flags) != req_flags) + return -EINVAL; + + fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + if (fd >= 0) + get_dma_buf(shm->dmabuf); + return fd; +} + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm) +{ + /* + * dma_buf_put() decreases the dmabuf reference counter and will + * call tee_shm_release() when the last reference is gone. + * + * In the case of driver private memory we call tee_shm_release + * directly instead as it doesn't have a reference counter. + */ + if (shm->flags & TEE_SHM_DMA_BUF) + dma_buf_put(shm->dmabuf); + else + tee_shm_release(shm); +} +EXPORT_SYMBOL_GPL(tee_shm_free); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa) +{ + /* Check that we're in the range of the shm */ + if ((char *)va < (char *)shm->kaddr) + return -EINVAL; + if ((char *)va >= ((char *)shm->kaddr + shm->size)) + return -EINVAL; + + return tee_shm_get_pa( + shm, (unsigned long)va - (unsigned long)shm->kaddr, pa); +} +EXPORT_SYMBOL_GPL(tee_shm_va2pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va) +{ + /* Check that we're in the range of the shm */ + if (pa < shm->paddr) + return -EINVAL; + if (pa >= (shm->paddr + shm->size)) + return -EINVAL; + + if (va) { + void *v = tee_shm_get_va(shm, pa - shm->paddr); + + if (IS_ERR(v)) + return PTR_ERR(v); + *va = v; + } + return 0; +} +EXPORT_SYMBOL_GPL(tee_shm_pa2va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs) +{ + if (offs >= shm->size) + return ERR_PTR(-EINVAL); + return (char *)shm->kaddr + offs; +} +EXPORT_SYMBOL_GPL(tee_shm_get_va); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa) +{ + if (offs >= shm->size) + return -EINVAL; + if (pa) + *pa = shm->paddr + offs; + return 0; +} +EXPORT_SYMBOL_GPL(tee_shm_get_pa); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) +{ + struct tee_device *teedev; + struct tee_shm *shm; + + if (!ctx) + return ERR_PTR(-EINVAL); + + teedev = ctx->teedev; + mutex_lock(&teedev->mutex); + shm = idr_find(&teedev->idr, id); + if (!shm || shm->ctx != ctx) + shm = ERR_PTR(-EINVAL); + else if (shm->flags & TEE_SHM_DMA_BUF) + get_dma_buf(shm->dmabuf); + mutex_unlock(&teedev->mutex); + return shm; +} +EXPORT_SYMBOL_GPL(tee_shm_get_from_id); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +int tee_shm_get_id(struct tee_shm *shm) +{ + return shm->id; +} +EXPORT_SYMBOL_GPL(tee_shm_get_id); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm) +{ + if (shm->flags & TEE_SHM_DMA_BUF) + dma_buf_put(shm->dmabuf); +} +EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c new file mode 100644 index 000000000000..fb4f8522a526 --- /dev/null +++ b/drivers/tee/tee_shm_pool.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include "tee_private.h" + +static int pool_op_gen_alloc(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm, size_t size) +{ + unsigned long va; + struct gen_pool *genpool = poolm->private_data; + size_t s = roundup(size, 1 << genpool->min_alloc_order); + + va = gen_pool_alloc(genpool, s); + if (!va) + return -ENOMEM; + + memset((void *)va, 0, s); + shm->kaddr = (void *)va; + shm->paddr = gen_pool_virt_to_phys(genpool, va); + shm->size = s; + return 0; +} + +static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm) +{ + gen_pool_free(poolm->private_data, (unsigned long)shm->kaddr, + shm->size); + shm->kaddr = NULL; +} + +static const struct tee_shm_pool_mgr_ops pool_ops_generic = { + .alloc = pool_op_gen_alloc, + .free = pool_op_gen_free, +}; + +static void pool_res_mem_destroy(struct tee_shm_pool *pool) +{ + gen_pool_destroy(pool->private_mgr.private_data); + gen_pool_destroy(pool->dma_buf_mgr.private_data); +} + +static int pool_res_mem_mgr_init(struct tee_shm_pool_mgr *mgr, + struct tee_shm_pool_mem_info *info, + int min_alloc_order) +{ + size_t page_mask = PAGE_SIZE - 1; + struct gen_pool *genpool = NULL; + int rc; + + /* + * Start and end must be page aligned + */ + if ((info->vaddr & page_mask) || (info->paddr & page_mask) || + (info->size & page_mask)) + return -EINVAL; + + genpool = gen_pool_create(min_alloc_order, -1); + if (!genpool) + return -ENOMEM; + + gen_pool_set_algo(genpool, gen_pool_best_fit, NULL); + rc = gen_pool_add_virt(genpool, info->vaddr, info->paddr, info->size, + -1); + if (rc) { + gen_pool_destroy(genpool); + return rc; + } + + mgr->private_data = genpool; + mgr->ops = &pool_ops_generic; + return 0; +} + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info) +{ + struct tee_shm_pool *pool = NULL; + int ret; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) { + ret = -ENOMEM; + goto err; + } + + /* + * Create the pool for driver private shared memory + */ + ret = pool_res_mem_mgr_init(&pool->private_mgr, priv_info, + 3 /* 8 byte aligned */); + if (ret) + goto err; + + /* + * Create the pool for dma_buf shared memory + */ + ret = pool_res_mem_mgr_init(&pool->dma_buf_mgr, dmabuf_info, + PAGE_SHIFT); + if (ret) + goto err; + + pool->destroy = pool_res_mem_destroy; + return pool; +err: + if (ret == -ENOMEM) + pr_err("%s: can't allocate memory for res_mem shared memory pool\n", __func__); + if (pool && pool->private_mgr.private_data) + gen_pool_destroy(pool->private_mgr.private_data); + kfree(pool); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * There must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool) +{ + pool->destroy(pool); + kfree(pool); +} +EXPORT_SYMBOL_GPL(tee_shm_pool_free); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h new file mode 100644 index 000000000000..0f175b8f6456 --- /dev/null +++ b/include/linux/tee_drv.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __TEE_DRV_H +#define __TEE_DRV_H + +#include +#include +#include +#include + +/* + * The file describes the API provided by the generic TEE driver to the + * specific TEE driver. + */ + +#define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ +#define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ + +struct tee_device; +struct tee_shm; +struct tee_shm_pool; + +/** + * struct tee_context - driver specific context on file pointer data + * @teedev: pointer to this drivers struct tee_device + * @list_shm: List of shared memory object owned by this context + * @data: driver specific context data, managed by the driver + */ +struct tee_context { + struct tee_device *teedev; + struct list_head list_shm; + void *data; +}; + +struct tee_param_memref { + size_t shm_offs; + size_t size; + struct tee_shm *shm; +}; + +struct tee_param_value { + u64 a; + u64 b; + u64 c; +}; + +struct tee_param { + u64 attr; + union { + struct tee_param_memref memref; + struct tee_param_value value; + } u; +}; + +/** + * struct tee_driver_ops - driver operations vtable + * @get_version: returns version of driver + * @open: called when the device file is opened + * @release: release this open file + * @open_session: open a new session + * @close_session: close a session + * @invoke_func: invoke a trusted function + * @cancel_req: request cancel of an ongoing invoke or open + * @supp_revc: called for supplicant to get a command + * @supp_send: called for supplicant to send a response + */ +struct tee_driver_ops { + void (*get_version)(struct tee_device *teedev, + struct tee_ioctl_version_data *vers); + int (*open)(struct tee_context *ctx); + void (*release)(struct tee_context *ctx); + int (*open_session)(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); + int (*close_session)(struct tee_context *ctx, u32 session); + int (*invoke_func)(struct tee_context *ctx, + struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); + int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); + int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); + int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); +}; + +/** + * struct tee_desc - Describes the TEE driver to the subsystem + * @name: name of driver + * @ops: driver operations vtable + * @owner: module providing the driver + * @flags: Extra properties of driver, defined by TEE_DESC_* below + */ +#define TEE_DESC_PRIVILEGED 0x1 +struct tee_desc { + const char *name; + const struct tee_driver_ops *ops; + struct module *owner; + u32 flags; +}; + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data); + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev); + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev); + +/** + * struct tee_shm_pool_mem_info - holds information needed to create a shared + * memory pool + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + */ +struct tee_shm_pool_mem_info { + unsigned long vaddr; + phys_addr_t paddr; + size_t size; +}; + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * The must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev); + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If + * TEE_SHM_DMA_BUF global shared memory will be allocated and associated + * with a dma-buf handle, else driver private memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +int tee_shm_get_id(struct tee_shm *shm); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); + +#endif /*__TEE_DRV_H*/ diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h new file mode 100644 index 000000000000..370d8845ab21 --- /dev/null +++ b/include/uapi/linux/tee.h @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TEE_H +#define __TEE_H + +#include +#include + +/* + * This file describes the API provided by a TEE driver to user space. + * + * Each TEE driver defines a TEE specific protocol which is used for the + * data passed back and forth using TEE_IOC_CMD. + */ + +/* Helpers to make the ioctl defines */ +#define TEE_IOC_MAGIC 0xa4 +#define TEE_IOC_BASE 0 + +/* Flags relating to shared memory */ +#define TEE_IOCTL_SHM_MAPPED 0x1 /* memory mapped in normal world */ +#define TEE_IOCTL_SHM_DMA_BUF 0x2 /* dma-buf handle on shared memory */ + +#define TEE_MAX_ARG_SIZE 1024 + +#define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ + +/* + * TEE Implementation ID + */ +#define TEE_IMPL_ID_OPTEE 1 + +/* + * OP-TEE specific capabilities + */ +#define TEE_OPTEE_CAP_TZ (1 << 0) + +/** + * struct tee_ioctl_version_data - TEE version + * @impl_id: [out] TEE implementation id + * @impl_caps: [out] Implementation specific capabilities + * @gen_caps: [out] Generic capabilities, defined by TEE_GEN_CAPS_* above + * + * Identifies the TEE implementation, @impl_id is one of TEE_IMPL_ID_* above. + * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_* + * is valid when @impl_id == TEE_IMPL_ID_OPTEE. + */ +struct tee_ioctl_version_data { + __u32 impl_id; + __u32 impl_caps; + __u32 gen_caps; +}; + +/** + * TEE_IOC_VERSION - query version of TEE + * + * Takes a tee_ioctl_version_data struct and returns with the TEE version + * data filled in. + */ +#define TEE_IOC_VERSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \ + struct tee_ioctl_version_data) + +/** + * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument + * @size: [in/out] Size of shared memory to allocate + * @flags: [in/out] Flags to/from allocation. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_ALLOC below. + */ +struct tee_ioctl_shm_alloc_data { + __u64 size; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_ALLOC - allocate shared memory + * + * Allocates shared memory between the user space process and secure OS. + * + * Returns a file descriptor on success or < 0 on failure + * + * The returned file descriptor is used to map the shared memory into user + * space. The shared memory is freed when the descriptor is closed and the + * memory is unmapped. + */ +#define TEE_IOC_SHM_ALLOC _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \ + struct tee_ioctl_shm_alloc_data) + +/** + * struct tee_ioctl_buf_data - Variable sized buffer + * @buf_ptr: [in] A __user pointer to a buffer + * @buf_len: [in] Length of the buffer above + * + * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE, + * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below. + */ +struct tee_ioctl_buf_data { + __u64 buf_ptr; + __u64 buf_len; +}; + +/* + * Attributes for struct tee_ioctl_param, selects field in the union + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE 0 /* parameter not used */ + +/* + * These defines value parameters (struct tee_ioctl_param_value) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT 1 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT 2 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT 3 /* input and output */ + +/* + * These defines shared memory reference parameters (struct + * tee_ioctl_param_memref) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT 5 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT 6 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT 7 /* input and output */ + +/* + * Mask for the type part of the attribute, leaves room for more types + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK 0xff + +/* + * Matches TEEC_LOGIN_* in GP TEE Client API + * Are only defined for GP compliant TEEs + */ +#define TEE_IOCTL_LOGIN_PUBLIC 0 +#define TEE_IOCTL_LOGIN_USER 1 +#define TEE_IOCTL_LOGIN_GROUP 2 +#define TEE_IOCTL_LOGIN_APPLICATION 4 +#define TEE_IOCTL_LOGIN_USER_APPLICATION 5 +#define TEE_IOCTL_LOGIN_GROUP_APPLICATION 6 + +/** + * struct tee_ioctl_param - parameter + * @attr: attributes + * @a: if a memref, offset into the shared memory object, else a value parameter + * @b: if a memref, size of the buffer, else a value parameter + * @c: if a memref, shared memory identifier, else a value parameter + * + * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in + * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and + * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE + * indicates that none of the members are used. + * + * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an + * identifier representing the shared memory object. A memref can reference + * a part of a shared memory by specifying an offset (@a) and size (@b) of + * the object. To supply the entire shared memory object set the offset + * (@a) to 0 and size (@b) to the previously returned size of the object. + */ +struct tee_ioctl_param { + __u64 attr; + __u64 a; + __u64 b; + __u64 c; +}; + +#define TEE_IOCTL_UUID_LEN 16 + +/** + * struct tee_ioctl_open_session_arg - Open session argument + * @uuid: [in] UUID of the Trusted Application + * @clnt_uuid: [in] UUID of client + * @clnt_login: [in] Login class of client, TEE_IOCTL_LOGIN_* above + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [out] Session id + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_open_session_arg { + __u8 uuid[TEE_IOCTL_UUID_LEN]; + __u8 clnt_uuid[TEE_IOCTL_UUID_LEN]; + __u32 clnt_login; + __u32 cancel_id; + __u32 session; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_ioctl_open_session_arg followed by any array of struct + * tee_ioctl_param + */ +#define TEE_IOC_OPEN_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted + * Application + * @func: [in] Trusted Application function, specific to the TA + * @session: [in] Session id + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_invoke_arg { + __u32 func; + __u32 session; + __u32 cancel_id; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_INVOKE - Invokes a function in a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_invoke_func_arg followed by any array of struct tee_param + */ +#define TEE_IOC_INVOKE _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [in] Session id, if the session is opened, else set to 0 + */ +struct tee_ioctl_cancel_arg { + __u32 cancel_id; + __u32 session; +}; + +/** + * TEE_IOC_CANCEL - Cancels an open session or invoke + */ +#define TEE_IOC_CANCEL _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \ + struct tee_ioctl_cancel_arg) + +/** + * struct tee_ioctl_close_session_arg - Closes an open session + * @session: [in] Session id + */ +struct tee_ioctl_close_session_arg { + __u32 session; +}; + +/** + * TEE_IOC_CLOSE_SESSION - Closes a session + */ +#define TEE_IOC_CLOSE_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \ + struct tee_ioctl_close_session_arg) + +/** + * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function + * @func: [in] supplicant function + * @num_params [in/out] number of parameters following this struct + * + * @num_params is the number of params that tee-supplicant has room to + * receive when input, @num_params is the number of actual params + * tee-supplicant receives when output. + */ +struct tee_iocl_supp_recv_arg { + __u32 func; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_recv_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_RECV _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_iocl_supp_send_arg - Send a response to a received request + * @ret: [out] return value + * @num_params [in] number of parameters following this struct + */ +struct tee_iocl_supp_send_arg { + __u32 ret; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_send_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_SEND _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \ + struct tee_ioctl_buf_data) + +/* + * Five syscalls are used when communicating with the TEE driver. + * open(): opens the device associated with the driver + * ioctl(): as described above operating on the file descriptor from open() + * close(): two cases + * - closes the device file descriptor + * - closes a file descriptor connected to allocated shared memory + * mmap(): maps shared memory into user space using information from struct + * tee_ioctl_shm_alloc_data + * munmap(): unmaps previously shared memory + */ + +#endif /*__TEE_H*/ -- GitLab From 18ebb2fac706f7a454642d963e27915ddcbb3f57 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Tue, 14 Apr 2015 14:33:20 +0200 Subject: [PATCH 0042/1398] BACKPORT: tee: add OP-TEE driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a OP-TEE driver which also can be compiled as a loadable module. * Targets ARM and ARM64 * Supports using reserved memory from OP-TEE as shared memory * Probes OP-TEE version using SMCs * Accepts requests on privileged and unprivileged device * Uses OPTEE message protocol version 2 to communicate with secure world Change-Id: Id9fc5fda759496a28855c6968fe09f4f4767a63d Acked-by: Andreas Dannenberg Tested-by: Jerome Forissier (HiKey) Tested-by: Volodymyr Babchuk (RCAR H3) Tested-by: Scott Branden Reviewed-by: Javier González Signed-off-by: Jens Wiklander (cherry picked from commit 4fb0a5eb364d239722e745c02aef0dbd4e0f1ad2) Signed-off-by: Victor Chong --- MAINTAINERS | 5 + drivers/tee/Kconfig | 10 + drivers/tee/Makefile | 1 + drivers/tee/optee/Kconfig | 7 + drivers/tee/optee/Makefile | 5 + drivers/tee/optee/call.c | 444 +++++++++++++++++++++ drivers/tee/optee/core.c | 622 ++++++++++++++++++++++++++++++ drivers/tee/optee/optee_msg.h | 418 ++++++++++++++++++++ drivers/tee/optee/optee_private.h | 183 +++++++++ drivers/tee/optee/optee_smc.h | 450 +++++++++++++++++++++ drivers/tee/optee/rpc.c | 396 +++++++++++++++++++ drivers/tee/optee/supp.c | 273 +++++++++++++ 12 files changed, 2814 insertions(+) create mode 100644 drivers/tee/optee/Kconfig create mode 100644 drivers/tee/optee/Makefile create mode 100644 drivers/tee/optee/call.c create mode 100644 drivers/tee/optee/core.c create mode 100644 drivers/tee/optee/optee_msg.h create mode 100644 drivers/tee/optee/optee_private.h create mode 100644 drivers/tee/optee/optee_smc.h create mode 100644 drivers/tee/optee/rpc.c create mode 100644 drivers/tee/optee/supp.c diff --git a/MAINTAINERS b/MAINTAINERS index 13daf6829fda..bf6e78f2fd64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9010,6 +9010,11 @@ F: arch/*/oprofile/ F: drivers/oprofile/ F: include/linux/oprofile.h +OP-TEE DRIVER +M: Jens Wiklander +S: Maintained +F: drivers/tee/optee/ + ORACLE CLUSTER FILESYSTEM 2 (OCFS2) M: Mark Fasheh M: Joel Becker diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig index 50c244ead46d..2330a4eb4e8b 100644 --- a/drivers/tee/Kconfig +++ b/drivers/tee/Kconfig @@ -6,3 +6,13 @@ config TEE help This implements a generic interface towards a Trusted Execution Environment (TEE). + +if TEE + +menu "TEE drivers" + +source "drivers/tee/optee/Kconfig" + +endmenu + +endif diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile index ec64047a86e2..7a4e4a1ac39c 100644 --- a/drivers/tee/Makefile +++ b/drivers/tee/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_TEE) += tee.o tee-objs += tee_core.o tee-objs += tee_shm.o tee-objs += tee_shm_pool.o +obj-$(CONFIG_OPTEE) += optee/ diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig new file mode 100644 index 000000000000..0126de898036 --- /dev/null +++ b/drivers/tee/optee/Kconfig @@ -0,0 +1,7 @@ +# OP-TEE Trusted Execution Environment Configuration +config OPTEE + tristate "OP-TEE" + depends on HAVE_ARM_SMCCC + help + This implements the OP-TEE Trusted Execution Environment (TEE) + driver. diff --git a/drivers/tee/optee/Makefile b/drivers/tee/optee/Makefile new file mode 100644 index 000000000000..92fe5789bcce --- /dev/null +++ b/drivers/tee/optee/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_OPTEE) += optee.o +optee-objs += core.o +optee-objs += call.o +optee-objs += rpc.o +optee-objs += supp.o diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c new file mode 100644 index 000000000000..f7b7b404c990 --- /dev/null +++ b/drivers/tee/optee/call.c @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" + +struct optee_call_waiter { + struct list_head list_node; + struct completion c; +}; + +static void optee_cq_wait_init(struct optee_call_queue *cq, + struct optee_call_waiter *w) +{ + /* + * We're preparing to make a call to secure world. In case we can't + * allocate a thread in secure world we'll end up waiting in + * optee_cq_wait_for_completion(). + * + * Normally if there's no contention in secure world the call will + * complete and we can cleanup directly with optee_cq_wait_final(). + */ + mutex_lock(&cq->mutex); + + /* + * We add ourselves to the queue, but we don't wait. This + * guarantees that we don't lose a completion if secure world + * returns busy and another thread just exited and try to complete + * someone. + */ + init_completion(&w->c); + list_add_tail(&w->list_node, &cq->waiters); + + mutex_unlock(&cq->mutex); +} + +static void optee_cq_wait_for_completion(struct optee_call_queue *cq, + struct optee_call_waiter *w) +{ + wait_for_completion(&w->c); + + mutex_lock(&cq->mutex); + + /* Move to end of list to get out of the way for other waiters */ + list_del(&w->list_node); + reinit_completion(&w->c); + list_add_tail(&w->list_node, &cq->waiters); + + mutex_unlock(&cq->mutex); +} + +static void optee_cq_complete_one(struct optee_call_queue *cq) +{ + struct optee_call_waiter *w; + + list_for_each_entry(w, &cq->waiters, list_node) { + if (!completion_done(&w->c)) { + complete(&w->c); + break; + } + } +} + +static void optee_cq_wait_final(struct optee_call_queue *cq, + struct optee_call_waiter *w) +{ + /* + * We're done with the call to secure world. The thread in secure + * world that was used for this call is now available for some + * other task to use. + */ + mutex_lock(&cq->mutex); + + /* Get out of the list */ + list_del(&w->list_node); + + /* Wake up one eventual waiting task */ + optee_cq_complete_one(cq); + + /* + * If we're completed we've got a completion from another task that + * was just done with its call to secure world. Since yet another + * thread now is available in secure world wake up another eventual + * waiting task. + */ + if (completion_done(&w->c)) + optee_cq_complete_one(cq); + + mutex_unlock(&cq->mutex); +} + +/* Requires the filpstate mutex to be held */ +static struct optee_session *find_session(struct optee_context_data *ctxdata, + u32 session_id) +{ + struct optee_session *sess; + + list_for_each_entry(sess, &ctxdata->sess_list, list_node) + if (sess->session_id == session_id) + return sess; + + return NULL; +} + +/** + * optee_do_call_with_arg() - Do an SMC to OP-TEE in secure world + * @ctx: calling context + * @parg: physical address of message to pass to secure world + * + * Does and SMC to OP-TEE in secure world and handles eventual resulting + * Remote Procedure Calls (RPC) from OP-TEE. + * + * Returns return code from secure world, 0 is OK + */ +u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) +{ + struct optee *optee = tee_get_drvdata(ctx->teedev); + struct optee_call_waiter w; + struct optee_rpc_param param = { }; + u32 ret; + + param.a0 = OPTEE_SMC_CALL_WITH_ARG; + reg_pair_from_64(¶m.a1, ¶m.a2, parg); + /* Initialize waiter */ + optee_cq_wait_init(&optee->call_queue, &w); + while (true) { + struct arm_smccc_res res; + + optee->invoke_fn(param.a0, param.a1, param.a2, param.a3, + param.a4, param.a5, param.a6, param.a7, + &res); + + if (res.a0 == OPTEE_SMC_RETURN_ETHREAD_LIMIT) { + /* + * Out of threads in secure world, wait for a thread + * become available. + */ + optee_cq_wait_for_completion(&optee->call_queue, &w); + } else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) { + param.a0 = res.a0; + param.a1 = res.a1; + param.a2 = res.a2; + param.a3 = res.a3; + optee_handle_rpc(ctx, ¶m); + } else { + ret = res.a0; + break; + } + } + + /* + * We're done with our thread in secure world, if there's any + * thread waiters wake up one. + */ + optee_cq_wait_final(&optee->call_queue, &w); + + return ret; +} + +static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params, + struct optee_msg_arg **msg_arg, + phys_addr_t *msg_parg) +{ + int rc; + struct tee_shm *shm; + struct optee_msg_arg *ma; + + shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params), + TEE_SHM_MAPPED); + if (IS_ERR(shm)) + return shm; + + ma = tee_shm_get_va(shm, 0); + if (IS_ERR(ma)) { + rc = PTR_ERR(ma); + goto out; + } + + rc = tee_shm_get_pa(shm, 0, msg_parg); + if (rc) + goto out; + + memset(ma, 0, OPTEE_MSG_GET_ARG_SIZE(num_params)); + ma->num_params = num_params; + *msg_arg = ma; +out: + if (rc) { + tee_shm_free(shm); + return ERR_PTR(rc); + } + + return shm; +} + +int optee_open_session(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param) +{ + struct optee_context_data *ctxdata = ctx->data; + int rc; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess = NULL; + + /* +2 for the meta parameters added below */ + shm = get_msg_arg(ctx, arg->num_params + 2, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + msg_arg->cmd = OPTEE_MSG_CMD_OPEN_SESSION; + msg_arg->cancel_id = arg->cancel_id; + + /* + * Initialize and add the meta parameters needed when opening a + * session. + */ + msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT | + OPTEE_MSG_ATTR_META; + msg_arg->params[1].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT | + OPTEE_MSG_ATTR_META; + memcpy(&msg_arg->params[0].u.value, arg->uuid, sizeof(arg->uuid)); + memcpy(&msg_arg->params[1].u.value, arg->uuid, sizeof(arg->clnt_uuid)); + msg_arg->params[1].u.value.c = arg->clnt_login; + + rc = optee_to_msg_param(msg_arg->params + 2, arg->num_params, param); + if (rc) + goto out; + + sess = kzalloc(sizeof(*sess), GFP_KERNEL); + if (!sess) { + rc = -ENOMEM; + goto out; + } + + if (optee_do_call_with_arg(ctx, msg_parg)) { + msg_arg->ret = TEEC_ERROR_COMMUNICATION; + msg_arg->ret_origin = TEEC_ORIGIN_COMMS; + } + + if (msg_arg->ret == TEEC_SUCCESS) { + /* A new session has been created, add it to the list. */ + sess->session_id = msg_arg->session; + mutex_lock(&ctxdata->mutex); + list_add(&sess->list_node, &ctxdata->sess_list); + mutex_unlock(&ctxdata->mutex); + } else { + kfree(sess); + } + + if (optee_from_msg_param(param, arg->num_params, msg_arg->params + 2)) { + arg->ret = TEEC_ERROR_COMMUNICATION; + arg->ret_origin = TEEC_ORIGIN_COMMS; + /* Close session again to avoid leakage */ + optee_close_session(ctx, msg_arg->session); + } else { + arg->session = msg_arg->session; + arg->ret = msg_arg->ret; + arg->ret_origin = msg_arg->ret_origin; + } +out: + tee_shm_free(shm); + + return rc; +} + +int optee_close_session(struct tee_context *ctx, u32 session) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess; + + /* Check that the session is valid and remove it from the list */ + mutex_lock(&ctxdata->mutex); + sess = find_session(ctxdata, session); + if (sess) + list_del(&sess->list_node); + mutex_unlock(&ctxdata->mutex); + if (!sess) + return -EINVAL; + kfree(sess); + + shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + msg_arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION; + msg_arg->session = session; + optee_do_call_with_arg(ctx, msg_parg); + + tee_shm_free(shm); + return 0; +} + +int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, + struct tee_param *param) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess; + int rc; + + /* Check that the session is valid */ + mutex_lock(&ctxdata->mutex); + sess = find_session(ctxdata, arg->session); + mutex_unlock(&ctxdata->mutex); + if (!sess) + return -EINVAL; + + shm = get_msg_arg(ctx, arg->num_params, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + msg_arg->cmd = OPTEE_MSG_CMD_INVOKE_COMMAND; + msg_arg->func = arg->func; + msg_arg->session = arg->session; + msg_arg->cancel_id = arg->cancel_id; + + rc = optee_to_msg_param(msg_arg->params, arg->num_params, param); + if (rc) + goto out; + + if (optee_do_call_with_arg(ctx, msg_parg)) { + msg_arg->ret = TEEC_ERROR_COMMUNICATION; + msg_arg->ret_origin = TEEC_ORIGIN_COMMS; + } + + if (optee_from_msg_param(param, arg->num_params, msg_arg->params)) { + msg_arg->ret = TEEC_ERROR_COMMUNICATION; + msg_arg->ret_origin = TEEC_ORIGIN_COMMS; + } + + arg->ret = msg_arg->ret; + arg->ret_origin = msg_arg->ret_origin; +out: + tee_shm_free(shm); + return rc; +} + +int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess; + + /* Check that the session is valid */ + mutex_lock(&ctxdata->mutex); + sess = find_session(ctxdata, session); + mutex_unlock(&ctxdata->mutex); + if (!sess) + return -EINVAL; + + shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + msg_arg->cmd = OPTEE_MSG_CMD_CANCEL; + msg_arg->session = session; + msg_arg->cancel_id = cancel_id; + optee_do_call_with_arg(ctx, msg_parg); + + tee_shm_free(shm); + return 0; +} + +/** + * optee_enable_shm_cache() - Enables caching of some shared memory allocation + * in OP-TEE + * @optee: main service struct + */ +void optee_enable_shm_cache(struct optee *optee) +{ + struct optee_call_waiter w; + + /* We need to retry until secure world isn't busy. */ + optee_cq_wait_init(&optee->call_queue, &w); + while (true) { + struct arm_smccc_res res; + + optee->invoke_fn(OPTEE_SMC_ENABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0, + 0, &res); + if (res.a0 == OPTEE_SMC_RETURN_OK) + break; + optee_cq_wait_for_completion(&optee->call_queue, &w); + } + optee_cq_wait_final(&optee->call_queue, &w); +} + +/** + * optee_disable_shm_cache() - Disables caching of some shared memory allocation + * in OP-TEE + * @optee: main service struct + */ +void optee_disable_shm_cache(struct optee *optee) +{ + struct optee_call_waiter w; + + /* We need to retry until secure world isn't busy. */ + optee_cq_wait_init(&optee->call_queue, &w); + while (true) { + union { + struct arm_smccc_res smccc; + struct optee_smc_disable_shm_cache_result result; + } res; + + optee->invoke_fn(OPTEE_SMC_DISABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0, + 0, &res.smccc); + if (res.result.status == OPTEE_SMC_RETURN_ENOTAVAIL) + break; /* All shm's freed */ + if (res.result.status == OPTEE_SMC_RETURN_OK) { + struct tee_shm *shm; + + shm = reg_pair_to_ptr(res.result.shm_upper32, + res.result.shm_lower32); + tee_shm_free(shm); + } else { + optee_cq_wait_for_completion(&optee->call_queue, &w); + } + } + optee_cq_wait_final(&optee->call_queue, &w); +} diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c new file mode 100644 index 000000000000..58169e519422 --- /dev/null +++ b/drivers/tee/optee/core.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" + +#define DRIVER_NAME "optee" + +#define OPTEE_SHM_NUM_PRIV_PAGES 1 + +/** + * optee_from_msg_param() - convert from OPTEE_MSG parameters to + * struct tee_param + * @params: subsystem internal parameter representation + * @num_params: number of elements in the parameter arrays + * @msg_params: OPTEE_MSG parameters + * Returns 0 on success or <0 on failure + */ +int optee_from_msg_param(struct tee_param *params, size_t num_params, + const struct optee_msg_param *msg_params) +{ + int rc; + size_t n; + struct tee_shm *shm; + phys_addr_t pa; + + for (n = 0; n < num_params; n++) { + struct tee_param *p = params + n; + const struct optee_msg_param *mp = msg_params + n; + u32 attr = mp->attr & OPTEE_MSG_ATTR_TYPE_MASK; + + switch (attr) { + case OPTEE_MSG_ATTR_TYPE_NONE: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE; + memset(&p->u, 0, sizeof(p->u)); + break; + case OPTEE_MSG_ATTR_TYPE_VALUE_INPUT: + case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT + + attr - OPTEE_MSG_ATTR_TYPE_VALUE_INPUT; + p->u.value.a = mp->u.value.a; + p->u.value.b = mp->u.value.b; + p->u.value.c = mp->u.value.c; + break; + case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT: + case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT + + attr - OPTEE_MSG_ATTR_TYPE_TMEM_INPUT; + p->u.memref.size = mp->u.tmem.size; + shm = (struct tee_shm *)(unsigned long) + mp->u.tmem.shm_ref; + if (!shm) { + p->u.memref.shm_offs = 0; + p->u.memref.shm = NULL; + break; + } + rc = tee_shm_get_pa(shm, 0, &pa); + if (rc) + return rc; + p->u.memref.shm_offs = mp->u.tmem.buf_ptr - pa; + p->u.memref.shm = shm; + + /* Check that the memref is covered by the shm object */ + if (p->u.memref.size) { + size_t o = p->u.memref.shm_offs + + p->u.memref.size - 1; + + rc = tee_shm_get_pa(shm, o, NULL); + if (rc) + return rc; + } + break; + default: + return -EINVAL; + } + } + return 0; +} + +/** + * optee_to_msg_param() - convert from struct tee_params to OPTEE_MSG parameters + * @msg_params: OPTEE_MSG parameters + * @num_params: number of elements in the parameter arrays + * @params: subsystem itnernal parameter representation + * Returns 0 on success or <0 on failure + */ +int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, + const struct tee_param *params) +{ + int rc; + size_t n; + phys_addr_t pa; + + for (n = 0; n < num_params; n++) { + const struct tee_param *p = params + n; + struct optee_msg_param *mp = msg_params + n; + + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_NONE: + mp->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE; + memset(&mp->u, 0, sizeof(mp->u)); + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + mp->attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT; + mp->u.value.a = p->u.value.a; + mp->u.value.b = p->u.value.b; + mp->u.value.c = p->u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; + mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm; + mp->u.tmem.size = p->u.memref.size; + if (!p->u.memref.shm) { + mp->u.tmem.buf_ptr = 0; + break; + } + rc = tee_shm_get_pa(p->u.memref.shm, + p->u.memref.shm_offs, &pa); + if (rc) + return rc; + mp->u.tmem.buf_ptr = pa; + mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED << + OPTEE_MSG_ATTR_CACHE_SHIFT; + break; + default: + return -EINVAL; + } + } + return 0; +} + +static void optee_get_version(struct tee_device *teedev, + struct tee_ioctl_version_data *vers) +{ + struct tee_ioctl_version_data v = { + .impl_id = TEE_IMPL_ID_OPTEE, + .impl_caps = TEE_OPTEE_CAP_TZ, + .gen_caps = TEE_GEN_CAP_GP, + }; + *vers = v; +} + +static int optee_open(struct tee_context *ctx) +{ + struct optee_context_data *ctxdata; + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + + ctxdata = kzalloc(sizeof(*ctxdata), GFP_KERNEL); + if (!ctxdata) + return -ENOMEM; + + if (teedev == optee->supp_teedev) { + bool busy = true; + + mutex_lock(&optee->supp.ctx_mutex); + if (!optee->supp.ctx) { + busy = false; + optee->supp.ctx = ctx; + } + mutex_unlock(&optee->supp.ctx_mutex); + if (busy) { + kfree(ctxdata); + return -EBUSY; + } + } + + mutex_init(&ctxdata->mutex); + INIT_LIST_HEAD(&ctxdata->sess_list); + + ctx->data = ctxdata; + return 0; +} + +static void optee_release(struct tee_context *ctx) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct tee_shm *shm; + struct optee_msg_arg *arg = NULL; + phys_addr_t parg; + struct optee_session *sess; + struct optee_session *sess_tmp; + + if (!ctxdata) + return; + + shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED); + if (!IS_ERR(shm)) { + arg = tee_shm_get_va(shm, 0); + /* + * If va2pa fails for some reason, we can't call + * optee_close_session(), only free the memory. Secure OS + * will leak sessions and finally refuse more sessions, but + * we will at least let normal world reclaim its memory. + */ + if (!IS_ERR(arg)) + tee_shm_va2pa(shm, arg, &parg); + } + + list_for_each_entry_safe(sess, sess_tmp, &ctxdata->sess_list, + list_node) { + list_del(&sess->list_node); + if (!IS_ERR_OR_NULL(arg)) { + memset(arg, 0, sizeof(*arg)); + arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION; + arg->session = sess->session_id; + optee_do_call_with_arg(ctx, parg); + } + kfree(sess); + } + kfree(ctxdata); + + if (!IS_ERR(shm)) + tee_shm_free(shm); + + ctx->data = NULL; + + if (teedev == optee->supp_teedev) { + mutex_lock(&optee->supp.ctx_mutex); + optee->supp.ctx = NULL; + mutex_unlock(&optee->supp.ctx_mutex); + } +} + +static struct tee_driver_ops optee_ops = { + .get_version = optee_get_version, + .open = optee_open, + .release = optee_release, + .open_session = optee_open_session, + .close_session = optee_close_session, + .invoke_func = optee_invoke_func, + .cancel_req = optee_cancel_req, +}; + +static struct tee_desc optee_desc = { + .name = DRIVER_NAME "-clnt", + .ops = &optee_ops, + .owner = THIS_MODULE, +}; + +static struct tee_driver_ops optee_supp_ops = { + .get_version = optee_get_version, + .open = optee_open, + .release = optee_release, + .supp_recv = optee_supp_recv, + .supp_send = optee_supp_send, +}; + +static struct tee_desc optee_supp_desc = { + .name = DRIVER_NAME "-supp", + .ops = &optee_supp_ops, + .owner = THIS_MODULE, + .flags = TEE_DESC_PRIVILEGED, +}; + +static bool optee_msg_api_uid_is_optee_api(optee_invoke_fn *invoke_fn) +{ + struct arm_smccc_res res; + + invoke_fn(OPTEE_SMC_CALLS_UID, 0, 0, 0, 0, 0, 0, 0, &res); + + if (res.a0 == OPTEE_MSG_UID_0 && res.a1 == OPTEE_MSG_UID_1 && + res.a2 == OPTEE_MSG_UID_2 && res.a3 == OPTEE_MSG_UID_3) + return true; + return false; +} + +static bool optee_msg_api_revision_is_compatible(optee_invoke_fn *invoke_fn) +{ + union { + struct arm_smccc_res smccc; + struct optee_smc_calls_revision_result result; + } res; + + invoke_fn(OPTEE_SMC_CALLS_REVISION, 0, 0, 0, 0, 0, 0, 0, &res.smccc); + + if (res.result.major == OPTEE_MSG_REVISION_MAJOR && + (int)res.result.minor >= OPTEE_MSG_REVISION_MINOR) + return true; + return false; +} + +static bool optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn, + u32 *sec_caps) +{ + union { + struct arm_smccc_res smccc; + struct optee_smc_exchange_capabilities_result result; + } res; + u32 a1 = 0; + + /* + * TODO This isn't enough to tell if it's UP system (from kernel + * point of view) or not, is_smp() returns the the information + * needed, but can't be called directly from here. + */ + if (!IS_ENABLED(CONFIG_SMP) || nr_cpu_ids == 1) + a1 |= OPTEE_SMC_NSEC_CAP_UNIPROCESSOR; + + invoke_fn(OPTEE_SMC_EXCHANGE_CAPABILITIES, a1, 0, 0, 0, 0, 0, 0, + &res.smccc); + + if (res.result.status != OPTEE_SMC_RETURN_OK) + return false; + + *sec_caps = res.result.capabilities; + return true; +} + +static struct tee_shm_pool * +optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm) +{ + union { + struct arm_smccc_res smccc; + struct optee_smc_get_shm_config_result result; + } res; + struct tee_shm_pool *pool; + unsigned long vaddr; + phys_addr_t paddr; + size_t size; + phys_addr_t begin; + phys_addr_t end; + void *va; + struct tee_shm_pool_mem_info priv_info; + struct tee_shm_pool_mem_info dmabuf_info; + + invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, &res.smccc); + if (res.result.status != OPTEE_SMC_RETURN_OK) { + pr_info("shm service not available\n"); + return ERR_PTR(-ENOENT); + } + + if (res.result.settings != OPTEE_SMC_SHM_CACHED) { + pr_err("only normal cached shared memory supported\n"); + return ERR_PTR(-EINVAL); + } + + begin = roundup(res.result.start, PAGE_SIZE); + end = rounddown(res.result.start + res.result.size, PAGE_SIZE); + paddr = begin; + size = end - begin; + + if (size < 2 * OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE) { + pr_err("too small shared memory area\n"); + return ERR_PTR(-EINVAL); + } + + va = memremap(paddr, size, MEMREMAP_WB); + if (!va) { + pr_err("shared memory ioremap failed\n"); + return ERR_PTR(-EINVAL); + } + vaddr = (unsigned long)va; + + priv_info.vaddr = vaddr; + priv_info.paddr = paddr; + priv_info.size = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; + dmabuf_info.vaddr = vaddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; + dmabuf_info.paddr = paddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; + dmabuf_info.size = size - OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; + + pool = tee_shm_pool_alloc_res_mem(&priv_info, &dmabuf_info); + if (IS_ERR(pool)) { + memunmap(va); + goto out; + } + + *memremaped_shm = va; +out: + return pool; +} + +/* Simple wrapper functions to be able to use a function pointer */ +static void optee_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *res) +{ + arm_smccc_smc(a0, a1, a2, a3, a4, a5, a6, a7, res); +} + +static void optee_smccc_hvc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *res) +{ + arm_smccc_hvc(a0, a1, a2, a3, a4, a5, a6, a7, res); +} + +static optee_invoke_fn *get_invoke_func(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warn("missing \"method\" property\n"); + return ERR_PTR(-ENXIO); + } + + if (!strcmp("hvc", method)) + return optee_smccc_hvc; + else if (!strcmp("smc", method)) + return optee_smccc_smc; + + pr_warn("invalid \"method\" property: %s\n", method); + return ERR_PTR(-EINVAL); +} + +static struct optee *optee_probe(struct device_node *np) +{ + optee_invoke_fn *invoke_fn; + struct tee_shm_pool *pool; + struct optee *optee = NULL; + void *memremaped_shm = NULL; + struct tee_device *teedev; + u32 sec_caps; + int rc; + + invoke_fn = get_invoke_func(np); + if (IS_ERR(invoke_fn)) + return (void *)invoke_fn; + + if (!optee_msg_api_uid_is_optee_api(invoke_fn)) { + pr_warn("api uid mismatch\n"); + return ERR_PTR(-EINVAL); + } + + if (!optee_msg_api_revision_is_compatible(invoke_fn)) { + pr_warn("api revision mismatch\n"); + return ERR_PTR(-EINVAL); + } + + if (!optee_msg_exchange_capabilities(invoke_fn, &sec_caps)) { + pr_warn("capabilities mismatch\n"); + return ERR_PTR(-EINVAL); + } + + /* + * We have no other option for shared memory, if secure world + * doesn't have any reserved memory we can use we can't continue. + */ + if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM)) + return ERR_PTR(-EINVAL); + + pool = optee_config_shm_memremap(invoke_fn, &memremaped_shm); + if (IS_ERR(pool)) + return (void *)pool; + + optee = kzalloc(sizeof(*optee), GFP_KERNEL); + if (!optee) { + rc = -ENOMEM; + goto err; + } + + optee->invoke_fn = invoke_fn; + + teedev = tee_device_alloc(&optee_desc, NULL, pool, optee); + if (IS_ERR(teedev)) { + rc = PTR_ERR(teedev); + goto err; + } + optee->teedev = teedev; + + teedev = tee_device_alloc(&optee_supp_desc, NULL, pool, optee); + if (IS_ERR(teedev)) { + rc = PTR_ERR(teedev); + goto err; + } + optee->supp_teedev = teedev; + + rc = tee_device_register(optee->teedev); + if (rc) + goto err; + + rc = tee_device_register(optee->supp_teedev); + if (rc) + goto err; + + mutex_init(&optee->call_queue.mutex); + INIT_LIST_HEAD(&optee->call_queue.waiters); + optee_wait_queue_init(&optee->wait_queue); + optee_supp_init(&optee->supp); + optee->memremaped_shm = memremaped_shm; + optee->pool = pool; + + optee_enable_shm_cache(optee); + + pr_info("initialized driver\n"); + return optee; +err: + if (optee) { + /* + * tee_device_unregister() is safe to call even if the + * devices hasn't been registered with + * tee_device_register() yet. + */ + tee_device_unregister(optee->supp_teedev); + tee_device_unregister(optee->teedev); + kfree(optee); + } + if (pool) + tee_shm_pool_free(pool); + if (memremaped_shm) + memunmap(memremaped_shm); + return ERR_PTR(rc); +} + +static void optee_remove(struct optee *optee) +{ + /* + * Ask OP-TEE to free all cached shared memory objects to decrease + * reference counters and also avoid wild pointers in secure world + * into the old shared memory range. + */ + optee_disable_shm_cache(optee); + + /* + * The two devices has to be unregistered before we can free the + * other resources. + */ + tee_device_unregister(optee->supp_teedev); + tee_device_unregister(optee->teedev); + + tee_shm_pool_free(optee->pool); + if (optee->memremaped_shm) + memunmap(optee->memremaped_shm); + optee_wait_queue_exit(&optee->wait_queue); + optee_supp_uninit(&optee->supp); + mutex_destroy(&optee->call_queue.mutex); + + kfree(optee); +} + +static const struct of_device_id optee_match[] = { + { .compatible = "linaro,optee-tz" }, + {}, +}; + +static struct optee *optee_svc; + +static int __init optee_driver_init(void) +{ + struct device_node *fw_np; + struct device_node *np; + struct optee *optee; + + /* Node is supposed to be below /firmware */ + fw_np = of_find_node_by_name(NULL, "firmware"); + if (!fw_np) + return -ENODEV; + + np = of_find_matching_node(fw_np, optee_match); + of_node_put(fw_np); + if (!np) + return -ENODEV; + + optee = optee_probe(np); + of_node_put(np); + + if (IS_ERR(optee)) + return PTR_ERR(optee); + + optee_svc = optee; + + return 0; +} +module_init(optee_driver_init); + +static void __exit optee_driver_exit(void) +{ + struct optee *optee = optee_svc; + + optee_svc = NULL; + if (optee) + optee_remove(optee); +} +module_exit(optee_driver_exit); + +MODULE_AUTHOR("Linaro"); +MODULE_DESCRIPTION("OP-TEE driver"); +MODULE_SUPPORTED_DEVICE(""); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h new file mode 100644 index 000000000000..dd7a06ee0462 --- /dev/null +++ b/drivers/tee/optee/optee_msg.h @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _OPTEE_MSG_H +#define _OPTEE_MSG_H + +#include +#include + +/* + * This file defines the OP-TEE message protocol used to communicate + * with an instance of OP-TEE running in secure world. + * + * This file is divided into three sections. + * 1. Formatting of messages. + * 2. Requests from normal world + * 3. Requests from secure world, Remote Procedure Call (RPC), handled by + * tee-supplicant. + */ + +/***************************************************************************** + * Part 1 - formatting of messages + *****************************************************************************/ + +#define OPTEE_MSG_ATTR_TYPE_NONE 0x0 +#define OPTEE_MSG_ATTR_TYPE_VALUE_INPUT 0x1 +#define OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT 0x2 +#define OPTEE_MSG_ATTR_TYPE_VALUE_INOUT 0x3 +#define OPTEE_MSG_ATTR_TYPE_RMEM_INPUT 0x5 +#define OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT 0x6 +#define OPTEE_MSG_ATTR_TYPE_RMEM_INOUT 0x7 +#define OPTEE_MSG_ATTR_TYPE_TMEM_INPUT 0x9 +#define OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT 0xa +#define OPTEE_MSG_ATTR_TYPE_TMEM_INOUT 0xb + +#define OPTEE_MSG_ATTR_TYPE_MASK GENMASK(7, 0) + +/* + * Meta parameter to be absorbed by the Secure OS and not passed + * to the Trusted Application. + * + * Currently only used with OPTEE_MSG_CMD_OPEN_SESSION. + */ +#define OPTEE_MSG_ATTR_META BIT(8) + +/* + * The temporary shared memory object is not physically contigous and this + * temp memref is followed by another fragment until the last temp memref + * that doesn't have this bit set. + */ +#define OPTEE_MSG_ATTR_FRAGMENT BIT(9) + +/* + * Memory attributes for caching passed with temp memrefs. The actual value + * used is defined outside the message protocol with the exception of + * OPTEE_MSG_ATTR_CACHE_PREDEFINED which means the attributes already + * defined for the memory range should be used. If optee_smc.h is used as + * bearer of this protocol OPTEE_SMC_SHM_* is used for values. + */ +#define OPTEE_MSG_ATTR_CACHE_SHIFT 16 +#define OPTEE_MSG_ATTR_CACHE_MASK GENMASK(2, 0) +#define OPTEE_MSG_ATTR_CACHE_PREDEFINED 0 + +/* + * Same values as TEE_LOGIN_* from TEE Internal API + */ +#define OPTEE_MSG_LOGIN_PUBLIC 0x00000000 +#define OPTEE_MSG_LOGIN_USER 0x00000001 +#define OPTEE_MSG_LOGIN_GROUP 0x00000002 +#define OPTEE_MSG_LOGIN_APPLICATION 0x00000004 +#define OPTEE_MSG_LOGIN_APPLICATION_USER 0x00000005 +#define OPTEE_MSG_LOGIN_APPLICATION_GROUP 0x00000006 + +/** + * struct optee_msg_param_tmem - temporary memory reference parameter + * @buf_ptr: Address of the buffer + * @size: Size of the buffer + * @shm_ref: Temporary shared memory reference, pointer to a struct tee_shm + * + * Secure and normal world communicates pointers as physical address + * instead of the virtual address. This is because secure and normal world + * have completely independent memory mapping. Normal world can even have a + * hypervisor which need to translate the guest physical address (AKA IPA + * in ARM documentation) to a real physical address before passing the + * structure to secure world. + */ +struct optee_msg_param_tmem { + u64 buf_ptr; + u64 size; + u64 shm_ref; +}; + +/** + * struct optee_msg_param_rmem - registered memory reference parameter + * @offs: Offset into shared memory reference + * @size: Size of the buffer + * @shm_ref: Shared memory reference, pointer to a struct tee_shm + */ +struct optee_msg_param_rmem { + u64 offs; + u64 size; + u64 shm_ref; +}; + +/** + * struct optee_msg_param_value - opaque value parameter + * + * Value parameters are passed unchecked between normal and secure world. + */ +struct optee_msg_param_value { + u64 a; + u64 b; + u64 c; +}; + +/** + * struct optee_msg_param - parameter used together with struct optee_msg_arg + * @attr: attributes + * @tmem: parameter by temporary memory reference + * @rmem: parameter by registered memory reference + * @value: parameter by opaque value + * + * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in + * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value, + * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates tmem and + * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates rmem. + * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used. + */ +struct optee_msg_param { + u64 attr; + union { + struct optee_msg_param_tmem tmem; + struct optee_msg_param_rmem rmem; + struct optee_msg_param_value value; + } u; +}; + +/** + * struct optee_msg_arg - call argument + * @cmd: Command, one of OPTEE_MSG_CMD_* or OPTEE_MSG_RPC_CMD_* + * @func: Trusted Application function, specific to the Trusted Application, + * used if cmd == OPTEE_MSG_CMD_INVOKE_COMMAND + * @session: In parameter for all OPTEE_MSG_CMD_* except + * OPTEE_MSG_CMD_OPEN_SESSION where it's an output parameter instead + * @cancel_id: Cancellation id, a unique value to identify this request + * @ret: return value + * @ret_origin: origin of the return value + * @num_params: number of parameters supplied to the OS Command + * @params: the parameters supplied to the OS Command + * + * All normal calls to Trusted OS uses this struct. If cmd requires further + * information than what these field holds it can be passed as a parameter + * tagged as meta (setting the OPTEE_MSG_ATTR_META bit in corresponding + * attrs field). All parameters tagged as meta has to come first. + * + * Temp memref parameters can be fragmented if supported by the Trusted OS + * (when optee_smc.h is bearer of this protocol this is indicated with + * OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM). If a logical memref parameter is + * fragmented then has all but the last fragment the + * OPTEE_MSG_ATTR_FRAGMENT bit set in attrs. Even if a memref is fragmented + * it will still be presented as a single logical memref to the Trusted + * Application. + */ +struct optee_msg_arg { + u32 cmd; + u32 func; + u32 session; + u32 cancel_id; + u32 pad; + u32 ret; + u32 ret_origin; + u32 num_params; + + /* num_params tells the actual number of element in params */ + struct optee_msg_param params[0]; +}; + +/** + * OPTEE_MSG_GET_ARG_SIZE - return size of struct optee_msg_arg + * + * @num_params: Number of parameters embedded in the struct optee_msg_arg + * + * Returns the size of the struct optee_msg_arg together with the number + * of embedded parameters. + */ +#define OPTEE_MSG_GET_ARG_SIZE(num_params) \ + (sizeof(struct optee_msg_arg) + \ + sizeof(struct optee_msg_param) * (num_params)) + +/***************************************************************************** + * Part 2 - requests from normal world + *****************************************************************************/ + +/* + * Return the following UID if using API specified in this file without + * further extensions: + * 384fb3e0-e7f8-11e3-af63-0002a5d5c51b. + * Represented in 4 32-bit words in OPTEE_MSG_UID_0, OPTEE_MSG_UID_1, + * OPTEE_MSG_UID_2, OPTEE_MSG_UID_3. + */ +#define OPTEE_MSG_UID_0 0x384fb3e0 +#define OPTEE_MSG_UID_1 0xe7f811e3 +#define OPTEE_MSG_UID_2 0xaf630002 +#define OPTEE_MSG_UID_3 0xa5d5c51b +#define OPTEE_MSG_FUNCID_CALLS_UID 0xFF01 + +/* + * Returns 2.0 if using API specified in this file without further + * extensions. Represented in 2 32-bit words in OPTEE_MSG_REVISION_MAJOR + * and OPTEE_MSG_REVISION_MINOR + */ +#define OPTEE_MSG_REVISION_MAJOR 2 +#define OPTEE_MSG_REVISION_MINOR 0 +#define OPTEE_MSG_FUNCID_CALLS_REVISION 0xFF03 + +/* + * Get UUID of Trusted OS. + * + * Used by non-secure world to figure out which Trusted OS is installed. + * Note that returned UUID is the UUID of the Trusted OS, not of the API. + * + * Returns UUID in 4 32-bit words in the same way as + * OPTEE_MSG_FUNCID_CALLS_UID described above. + */ +#define OPTEE_MSG_OS_OPTEE_UUID_0 0x486178e0 +#define OPTEE_MSG_OS_OPTEE_UUID_1 0xe7f811e3 +#define OPTEE_MSG_OS_OPTEE_UUID_2 0xbc5e0002 +#define OPTEE_MSG_OS_OPTEE_UUID_3 0xa5d5c51b +#define OPTEE_MSG_FUNCID_GET_OS_UUID 0x0000 + +/* + * Get revision of Trusted OS. + * + * Used by non-secure world to figure out which version of the Trusted OS + * is installed. Note that the returned revision is the revision of the + * Trusted OS, not of the API. + * + * Returns revision in 2 32-bit words in the same way as + * OPTEE_MSG_CALLS_REVISION described above. + */ +#define OPTEE_MSG_FUNCID_GET_OS_REVISION 0x0001 + +/* + * Do a secure call with struct optee_msg_arg as argument + * The OPTEE_MSG_CMD_* below defines what goes in struct optee_msg_arg::cmd + * + * OPTEE_MSG_CMD_OPEN_SESSION opens a session to a Trusted Application. + * The first two parameters are tagged as meta, holding two value + * parameters to pass the following information: + * param[0].u.value.a-b uuid of Trusted Application + * param[1].u.value.a-b uuid of Client + * param[1].u.value.c Login class of client OPTEE_MSG_LOGIN_* + * + * OPTEE_MSG_CMD_INVOKE_COMMAND invokes a command a previously opened + * session to a Trusted Application. struct optee_msg_arg::func is Trusted + * Application function, specific to the Trusted Application. + * + * OPTEE_MSG_CMD_CLOSE_SESSION closes a previously opened session to + * Trusted Application. + * + * OPTEE_MSG_CMD_CANCEL cancels a currently invoked command. + * + * OPTEE_MSG_CMD_REGISTER_SHM registers a shared memory reference. The + * information is passed as: + * [in] param[0].attr OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + * [| OPTEE_MSG_ATTR_FRAGMENT] + * [in] param[0].u.tmem.buf_ptr physical address (of first fragment) + * [in] param[0].u.tmem.size size (of first fragment) + * [in] param[0].u.tmem.shm_ref holds shared memory reference + * ... + * The shared memory can optionally be fragmented, temp memrefs can follow + * each other with all but the last with the OPTEE_MSG_ATTR_FRAGMENT bit set. + * + * OPTEE_MSG_CMD_UNREGISTER_SHM unregisteres a previously registered shared + * memory reference. The information is passed as: + * [in] param[0].attr OPTEE_MSG_ATTR_TYPE_RMEM_INPUT + * [in] param[0].u.rmem.shm_ref holds shared memory reference + * [in] param[0].u.rmem.offs 0 + * [in] param[0].u.rmem.size 0 + */ +#define OPTEE_MSG_CMD_OPEN_SESSION 0 +#define OPTEE_MSG_CMD_INVOKE_COMMAND 1 +#define OPTEE_MSG_CMD_CLOSE_SESSION 2 +#define OPTEE_MSG_CMD_CANCEL 3 +#define OPTEE_MSG_CMD_REGISTER_SHM 4 +#define OPTEE_MSG_CMD_UNREGISTER_SHM 5 +#define OPTEE_MSG_FUNCID_CALL_WITH_ARG 0x0004 + +/***************************************************************************** + * Part 3 - Requests from secure world, RPC + *****************************************************************************/ + +/* + * All RPC is done with a struct optee_msg_arg as bearer of information, + * struct optee_msg_arg::arg holds values defined by OPTEE_MSG_RPC_CMD_* below + * + * RPC communication with tee-supplicant is reversed compared to normal + * client communication desribed above. The supplicant receives requests + * and sends responses. + */ + +/* + * Load a TA into memory, defined in tee-supplicant + */ +#define OPTEE_MSG_RPC_CMD_LOAD_TA 0 + +/* + * Reserved + */ +#define OPTEE_MSG_RPC_CMD_RPMB 1 + +/* + * File system access, defined in tee-supplicant + */ +#define OPTEE_MSG_RPC_CMD_FS 2 + +/* + * Get time + * + * Returns number of seconds and nano seconds since the Epoch, + * 1970-01-01 00:00:00 +0000 (UTC). + * + * [out] param[0].u.value.a Number of seconds + * [out] param[0].u.value.b Number of nano seconds. + */ +#define OPTEE_MSG_RPC_CMD_GET_TIME 3 + +/* + * Wait queue primitive, helper for secure world to implement a wait queue. + * + * If secure world need to wait for a secure world mutex it issues a sleep + * request instead of spinning in secure world. Conversely is a wakeup + * request issued when a secure world mutex with a thread waiting thread is + * unlocked. + * + * Waiting on a key + * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP + * [in] param[0].u.value.b wait key + * + * Waking up a key + * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP + * [in] param[0].u.value.b wakeup key + */ +#define OPTEE_MSG_RPC_CMD_WAIT_QUEUE 4 +#define OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP 0 +#define OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP 1 + +/* + * Suspend execution + * + * [in] param[0].value .a number of milliseconds to suspend + */ +#define OPTEE_MSG_RPC_CMD_SUSPEND 5 + +/* + * Allocate a piece of shared memory + * + * Shared memory can optionally be fragmented, to support that additional + * spare param entries are allocated to make room for eventual fragments. + * The spare param entries has .attr = OPTEE_MSG_ATTR_TYPE_NONE when + * unused. All returned temp memrefs except the last should have the + * OPTEE_MSG_ATTR_FRAGMENT bit set in the attr field. + * + * [in] param[0].u.value.a type of memory one of + * OPTEE_MSG_RPC_SHM_TYPE_* below + * [in] param[0].u.value.b requested size + * [in] param[0].u.value.c required alignment + * + * [out] param[0].u.tmem.buf_ptr physical address (of first fragment) + * [out] param[0].u.tmem.size size (of first fragment) + * [out] param[0].u.tmem.shm_ref shared memory reference + * ... + * [out] param[n].u.tmem.buf_ptr physical address + * [out] param[n].u.tmem.size size + * [out] param[n].u.tmem.shm_ref shared memory reference (same value + * as in param[n-1].u.tmem.shm_ref) + */ +#define OPTEE_MSG_RPC_CMD_SHM_ALLOC 6 +/* Memory that can be shared with a non-secure user space application */ +#define OPTEE_MSG_RPC_SHM_TYPE_APPL 0 +/* Memory only shared with non-secure kernel */ +#define OPTEE_MSG_RPC_SHM_TYPE_KERNEL 1 + +/* + * Free shared memory previously allocated with OPTEE_MSG_RPC_CMD_SHM_ALLOC + * + * [in] param[0].u.value.a type of memory one of + * OPTEE_MSG_RPC_SHM_TYPE_* above + * [in] param[0].u.value.b value of shared memory reference + * returned in param[0].u.tmem.shm_ref + * above + */ +#define OPTEE_MSG_RPC_CMD_SHM_FREE 7 + +#endif /* _OPTEE_MSG_H */ diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h new file mode 100644 index 000000000000..c374cd594314 --- /dev/null +++ b/drivers/tee/optee/optee_private.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef OPTEE_PRIVATE_H +#define OPTEE_PRIVATE_H + +#include +#include +#include +#include +#include "optee_msg.h" + +#define OPTEE_MAX_ARG_SIZE 1024 + +/* Some Global Platform error codes used in this driver */ +#define TEEC_SUCCESS 0x00000000 +#define TEEC_ERROR_BAD_PARAMETERS 0xFFFF0006 +#define TEEC_ERROR_COMMUNICATION 0xFFFF000E +#define TEEC_ERROR_OUT_OF_MEMORY 0xFFFF000C + +#define TEEC_ORIGIN_COMMS 0x00000002 + +typedef void (optee_invoke_fn)(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, + struct arm_smccc_res *); + +struct optee_call_queue { + /* Serializes access to this struct */ + struct mutex mutex; + struct list_head waiters; +}; + +struct optee_wait_queue { + /* Serializes access to this struct */ + struct mutex mu; + struct list_head db; +}; + +/** + * struct optee_supp - supplicant synchronization struct + * @ctx the context of current connected supplicant. + * if !NULL the supplicant device is available for use, + * else busy + * @ctx_mutex: held while accessing @ctx + * @func: supplicant function id to call + * @ret: call return value + * @num_params: number of elements in @param + * @param: parameters for @func + * @req_posted: if true, a request has been posted to the supplicant + * @supp_next_send: if true, next step is for supplicant to send response + * @thrd_mutex: held by the thread doing a request to supplicant + * @supp_mutex: held by supplicant while operating on this struct + * @data_to_supp: supplicant is waiting on this for next request + * @data_from_supp: requesting thread is waiting on this to get the result + */ +struct optee_supp { + struct tee_context *ctx; + /* Serializes access of ctx */ + struct mutex ctx_mutex; + + u32 func; + u32 ret; + size_t num_params; + struct tee_param *param; + + bool req_posted; + bool supp_next_send; + /* Serializes access to this struct for requesting thread */ + struct mutex thrd_mutex; + /* Serializes access to this struct for supplicant threads */ + struct mutex supp_mutex; + struct completion data_to_supp; + struct completion data_from_supp; +}; + +/** + * struct optee - main service struct + * @supp_teedev: supplicant device + * @teedev: client device + * @invoke_fn: function to issue smc or hvc + * @call_queue: queue of threads waiting to call @invoke_fn + * @wait_queue: queue of threads from secure world waiting for a + * secure world sync object + * @supp: supplicant synchronization struct for RPC to supplicant + * @pool: shared memory pool + * @memremaped_shm virtual address of memory in shared memory pool + */ +struct optee { + struct tee_device *supp_teedev; + struct tee_device *teedev; + optee_invoke_fn *invoke_fn; + struct optee_call_queue call_queue; + struct optee_wait_queue wait_queue; + struct optee_supp supp; + struct tee_shm_pool *pool; + void *memremaped_shm; +}; + +struct optee_session { + struct list_head list_node; + u32 session_id; +}; + +struct optee_context_data { + /* Serializes access to this struct */ + struct mutex mutex; + struct list_head sess_list; +}; + +struct optee_rpc_param { + u32 a0; + u32 a1; + u32 a2; + u32 a3; + u32 a4; + u32 a5; + u32 a6; + u32 a7; +}; + +void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param); + +void optee_wait_queue_init(struct optee_wait_queue *wq); +void optee_wait_queue_exit(struct optee_wait_queue *wq); + +u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, + struct tee_param *param); + +int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len); +int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len); +void optee_supp_init(struct optee_supp *supp); +void optee_supp_uninit(struct optee_supp *supp); + +int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); +int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); + +u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg); +int optee_open_session(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); +int optee_close_session(struct tee_context *ctx, u32 session); +int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); +int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session); + +void optee_enable_shm_cache(struct optee *optee); +void optee_disable_shm_cache(struct optee *optee); + +int optee_from_msg_param(struct tee_param *params, size_t num_params, + const struct optee_msg_param *msg_params); +int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, + const struct tee_param *params); + +/* + * Small helpers + */ + +static inline void *reg_pair_to_ptr(u32 reg0, u32 reg1) +{ + return (void *)(unsigned long)(((u64)reg0 << 32) | reg1); +} + +static inline void reg_pair_from_64(u32 *reg0, u32 *reg1, u64 val) +{ + *reg0 = val >> 32; + *reg1 = val; +} + +#endif /*OPTEE_PRIVATE_H*/ diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h new file mode 100644 index 000000000000..13b7c98cdf25 --- /dev/null +++ b/drivers/tee/optee/optee_smc.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef OPTEE_SMC_H +#define OPTEE_SMC_H + +#include +#include + +#define OPTEE_SMC_STD_CALL_VAL(func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS, (func_num)) +#define OPTEE_SMC_FAST_CALL_VAL(func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS, (func_num)) + +/* + * Function specified by SMC Calling convention. + */ +#define OPTEE_SMC_FUNCID_CALLS_COUNT 0xFF00 +#define OPTEE_SMC_CALLS_COUNT \ + ARM_SMCCC_CALL_VAL(OPTEE_SMC_FAST_CALL, SMCCC_SMC_32, \ + SMCCC_OWNER_TRUSTED_OS_END, \ + OPTEE_SMC_FUNCID_CALLS_COUNT) + +/* + * Normal cached memory (write-back), shareable for SMP systems and not + * shareable for UP systems. + */ +#define OPTEE_SMC_SHM_CACHED 1 + +/* + * a0..a7 is used as register names in the descriptions below, on arm32 + * that translates to r0..r7 and on arm64 to w0..w7. In both cases it's + * 32-bit registers. + */ + +/* + * Function specified by SMC Calling convention + * + * Return one of the following UIDs if using API specified in this file + * without further extentions: + * 65cb6b93-af0c-4617-8ed6-644a8d1140f8 + * see also OPTEE_SMC_UID_* in optee_msg.h + */ +#define OPTEE_SMC_FUNCID_CALLS_UID OPTEE_MSG_FUNCID_CALLS_UID +#define OPTEE_SMC_CALLS_UID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS_END, \ + OPTEE_SMC_FUNCID_CALLS_UID) + +/* + * Function specified by SMC Calling convention + * + * Returns 2.0 if using API specified in this file without further extentions. + * see also OPTEE_MSG_REVISION_* in optee_msg.h + */ +#define OPTEE_SMC_FUNCID_CALLS_REVISION OPTEE_MSG_FUNCID_CALLS_REVISION +#define OPTEE_SMC_CALLS_REVISION \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS_END, \ + OPTEE_SMC_FUNCID_CALLS_REVISION) + +struct optee_smc_calls_revision_result { + unsigned long major; + unsigned long minor; + unsigned long reserved0; + unsigned long reserved1; +}; + +/* + * Get UUID of Trusted OS. + * + * Used by non-secure world to figure out which Trusted OS is installed. + * Note that returned UUID is the UUID of the Trusted OS, not of the API. + * + * Returns UUID in a0-4 in the same way as OPTEE_SMC_CALLS_UID + * described above. + */ +#define OPTEE_SMC_FUNCID_GET_OS_UUID OPTEE_MSG_FUNCID_GET_OS_UUID +#define OPTEE_SMC_CALL_GET_OS_UUID \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_UUID) + +/* + * Get revision of Trusted OS. + * + * Used by non-secure world to figure out which version of the Trusted OS + * is installed. Note that the returned revision is the revision of the + * Trusted OS, not of the API. + * + * Returns revision in a0-1 in the same way as OPTEE_SMC_CALLS_REVISION + * described above. + */ +#define OPTEE_SMC_FUNCID_GET_OS_REVISION OPTEE_MSG_FUNCID_GET_OS_REVISION +#define OPTEE_SMC_CALL_GET_OS_REVISION \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_REVISION) + +/* + * Call with struct optee_msg_arg as argument + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC*CALL_WITH_ARG + * a1 Upper 32bit of a 64bit physical pointer to a struct optee_msg_arg + * a2 Lower 32bit of a 64bit physical pointer to a struct optee_msg_arg + * a3 Cache settings, not used if physical pointer is in a predefined shared + * memory area else per OPTEE_SMC_SHM_* + * a4-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 Return value, OPTEE_SMC_RETURN_* + * a1-3 Not used + * a4-7 Preserved + * + * OPTEE_SMC_RETURN_ETHREAD_LIMIT return register usage: + * a0 Return value, OPTEE_SMC_RETURN_ETHREAD_LIMIT + * a1-3 Preserved + * a4-7 Preserved + * + * RPC return register usage: + * a0 Return value, OPTEE_SMC_RETURN_IS_RPC(val) + * a1-2 RPC parameters + * a3-7 Resume information, must be preserved + * + * Possible return values: + * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION Trusted OS does not recognize this + * function. + * OPTEE_SMC_RETURN_OK Call completed, result updated in + * the previously supplied struct + * optee_msg_arg. + * OPTEE_SMC_RETURN_ETHREAD_LIMIT Number of Trusted OS threads exceeded, + * try again later. + * OPTEE_SMC_RETURN_EBADADDR Bad physcial pointer to struct + * optee_msg_arg. + * OPTEE_SMC_RETURN_EBADCMD Bad/unknown cmd in struct optee_msg_arg + * OPTEE_SMC_RETURN_IS_RPC() Call suspended by RPC call to normal + * world. + */ +#define OPTEE_SMC_FUNCID_CALL_WITH_ARG OPTEE_MSG_FUNCID_CALL_WITH_ARG +#define OPTEE_SMC_CALL_WITH_ARG \ + OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_CALL_WITH_ARG) + +/* + * Get Shared Memory Config + * + * Returns the Secure/Non-secure shared memory config. + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_GET_SHM_CONFIG + * a1-6 Not used + * a7 Hypervisor Client ID register + * + * Have config return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1 Physical address of start of SHM + * a2 Size of of SHM + * a3 Cache settings of memory, as defined by the + * OPTEE_SMC_SHM_* values above + * a4-7 Preserved + * + * Not available register usage: + * a0 OPTEE_SMC_RETURN_ENOTAVAIL + * a1-3 Not used + * a4-7 Preserved + */ +#define OPTEE_SMC_FUNCID_GET_SHM_CONFIG 7 +#define OPTEE_SMC_GET_SHM_CONFIG \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_SHM_CONFIG) + +struct optee_smc_get_shm_config_result { + unsigned long status; + unsigned long start; + unsigned long size; + unsigned long settings; +}; + +/* + * Exchanges capabilities between normal world and secure world + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_EXCHANGE_CAPABILITIES + * a1 bitfield of normal world capabilities OPTEE_SMC_NSEC_CAP_* + * a2-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1 bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_* + * a2-7 Preserved + * + * Error return register usage: + * a0 OPTEE_SMC_RETURN_ENOTAVAIL, can't use the capabilities from normal world + * a1 bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_* + * a2-7 Preserved + */ +/* Normal world works as a uniprocessor system */ +#define OPTEE_SMC_NSEC_CAP_UNIPROCESSOR BIT(0) +/* Secure world has reserved shared memory for normal world to use */ +#define OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM BIT(0) +/* Secure world can communicate via previously unregistered shared memory */ +#define OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM BIT(1) +#define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES 9 +#define OPTEE_SMC_EXCHANGE_CAPABILITIES \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES) + +struct optee_smc_exchange_capabilities_result { + unsigned long status; + unsigned long capabilities; + unsigned long reserved0; + unsigned long reserved1; +}; + +/* + * Disable and empties cache of shared memory objects + * + * Secure world can cache frequently used shared memory objects, for + * example objects used as RPC arguments. When secure world is idle this + * function returns one shared memory reference to free. To disable the + * cache and free all cached objects this function has to be called until + * it returns OPTEE_SMC_RETURN_ENOTAVAIL. + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_DISABLE_SHM_CACHE + * a1-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1 Upper 32bit of a 64bit Shared memory cookie + * a2 Lower 32bit of a 64bit Shared memory cookie + * a3-7 Preserved + * + * Cache empty return register usage: + * a0 OPTEE_SMC_RETURN_ENOTAVAIL + * a1-7 Preserved + * + * Not idle return register usage: + * a0 OPTEE_SMC_RETURN_EBUSY + * a1-7 Preserved + */ +#define OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE 10 +#define OPTEE_SMC_DISABLE_SHM_CACHE \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE) + +struct optee_smc_disable_shm_cache_result { + unsigned long status; + unsigned long shm_upper32; + unsigned long shm_lower32; + unsigned long reserved0; +}; + +/* + * Enable cache of shared memory objects + * + * Secure world can cache frequently used shared memory objects, for + * example objects used as RPC arguments. When secure world is idle this + * function returns OPTEE_SMC_RETURN_OK and the cache is enabled. If + * secure world isn't idle OPTEE_SMC_RETURN_EBUSY is returned. + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_ENABLE_SHM_CACHE + * a1-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1-7 Preserved + * + * Not idle return register usage: + * a0 OPTEE_SMC_RETURN_EBUSY + * a1-7 Preserved + */ +#define OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE 11 +#define OPTEE_SMC_ENABLE_SHM_CACHE \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE) + +/* + * Resume from RPC (for example after processing an IRQ) + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC + * a1-3 Value of a1-3 when OPTEE_SMC_CALL_WITH_ARG returned + * OPTEE_SMC_RETURN_RPC in a0 + * + * Return register usage is the same as for OPTEE_SMC_*CALL_WITH_ARG above. + * + * Possible return values + * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION Trusted OS does not recognize this + * function. + * OPTEE_SMC_RETURN_OK Original call completed, result + * updated in the previously supplied. + * struct optee_msg_arg + * OPTEE_SMC_RETURN_RPC Call suspended by RPC call to normal + * world. + * OPTEE_SMC_RETURN_ERESUME Resume failed, the opaque resume + * information was corrupt. + */ +#define OPTEE_SMC_FUNCID_RETURN_FROM_RPC 3 +#define OPTEE_SMC_CALL_RETURN_FROM_RPC \ + OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_RETURN_FROM_RPC) + +#define OPTEE_SMC_RETURN_RPC_PREFIX_MASK 0xFFFF0000 +#define OPTEE_SMC_RETURN_RPC_PREFIX 0xFFFF0000 +#define OPTEE_SMC_RETURN_RPC_FUNC_MASK 0x0000FFFF + +#define OPTEE_SMC_RETURN_GET_RPC_FUNC(ret) \ + ((ret) & OPTEE_SMC_RETURN_RPC_FUNC_MASK) + +#define OPTEE_SMC_RPC_VAL(func) ((func) | OPTEE_SMC_RETURN_RPC_PREFIX) + +/* + * Allocate memory for RPC parameter passing. The memory is used to hold a + * struct optee_msg_arg. + * + * "Call" register usage: + * a0 This value, OPTEE_SMC_RETURN_RPC_ALLOC + * a1 Size in bytes of required argument memory + * a2 Not used + * a3 Resume information, must be preserved + * a4-5 Not used + * a6-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1 Upper 32bits of 64bit physical pointer to allocated + * memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't + * be allocated. + * a2 Lower 32bits of 64bit physical pointer to allocated + * memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't + * be allocated + * a3 Preserved + * a4 Upper 32bits of 64bit Shared memory cookie used when freeing + * the memory or doing an RPC + * a5 Lower 32bits of 64bit Shared memory cookie used when freeing + * the memory or doing an RPC + * a6-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_ALLOC 0 +#define OPTEE_SMC_RETURN_RPC_ALLOC \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_ALLOC) + +/* + * Free memory previously allocated by OPTEE_SMC_RETURN_RPC_ALLOC + * + * "Call" register usage: + * a0 This value, OPTEE_SMC_RETURN_RPC_FREE + * a1 Upper 32bits of 64bit shared memory cookie belonging to this + * argument memory + * a2 Lower 32bits of 64bit shared memory cookie belonging to this + * argument memory + * a3-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1-2 Not used + * a3-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_FREE 2 +#define OPTEE_SMC_RETURN_RPC_FREE \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE) + +/* + * Deliver an IRQ in normal world. + * + * "Call" register usage: + * a0 OPTEE_SMC_RETURN_RPC_IRQ + * a1-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_IRQ 4 +#define OPTEE_SMC_RETURN_RPC_IRQ \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_IRQ) + +/* + * Do an RPC request. The supplied struct optee_msg_arg tells which + * request to do and the parameters for the request. The following fields + * are used (the rest are unused): + * - cmd the Request ID + * - ret return value of the request, filled in by normal world + * - num_params number of parameters for the request + * - params the parameters + * - param_attrs attributes of the parameters + * + * "Call" register usage: + * a0 OPTEE_SMC_RETURN_RPC_CMD + * a1 Upper 32bit of a 64bit Shared memory cookie holding a + * struct optee_msg_arg, must be preserved, only the data should + * be updated + * a2 Lower 32bit of a 64bit Shared memory cookie holding a + * struct optee_msg_arg, must be preserved, only the data should + * be updated + * a3-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1-2 Not used + * a3-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_CMD 5 +#define OPTEE_SMC_RETURN_RPC_CMD \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_CMD) + +/* Returned in a0 */ +#define OPTEE_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF + +/* Returned in a0 only from Trusted OS functions */ +#define OPTEE_SMC_RETURN_OK 0x0 +#define OPTEE_SMC_RETURN_ETHREAD_LIMIT 0x1 +#define OPTEE_SMC_RETURN_EBUSY 0x2 +#define OPTEE_SMC_RETURN_ERESUME 0x3 +#define OPTEE_SMC_RETURN_EBADADDR 0x4 +#define OPTEE_SMC_RETURN_EBADCMD 0x5 +#define OPTEE_SMC_RETURN_ENOMEM 0x6 +#define OPTEE_SMC_RETURN_ENOTAVAIL 0x7 +#define OPTEE_SMC_RETURN_IS_RPC(ret) __optee_smc_return_is_rpc((ret)) + +static inline bool __optee_smc_return_is_rpc(u32 ret) +{ + return ret != OPTEE_SMC_RETURN_UNKNOWN_FUNCTION && + (ret & OPTEE_SMC_RETURN_RPC_PREFIX_MASK) == + OPTEE_SMC_RETURN_RPC_PREFIX; +} + +#endif /* OPTEE_SMC_H */ diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c new file mode 100644 index 000000000000..8814eca06021 --- /dev/null +++ b/drivers/tee/optee/rpc.c @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" + +struct wq_entry { + struct list_head link; + struct completion c; + u32 key; +}; + +void optee_wait_queue_init(struct optee_wait_queue *priv) +{ + mutex_init(&priv->mu); + INIT_LIST_HEAD(&priv->db); +} + +void optee_wait_queue_exit(struct optee_wait_queue *priv) +{ + mutex_destroy(&priv->mu); +} + +static void handle_rpc_func_cmd_get_time(struct optee_msg_arg *arg) +{ + struct timespec64 ts; + + if (arg->num_params != 1) + goto bad; + if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) != + OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT) + goto bad; + + getnstimeofday64(&ts); + arg->params[0].u.value.a = ts.tv_sec; + arg->params[0].u.value.b = ts.tv_nsec; + + arg->ret = TEEC_SUCCESS; + return; +bad: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +} + +static struct wq_entry *wq_entry_get(struct optee_wait_queue *wq, u32 key) +{ + struct wq_entry *w; + + mutex_lock(&wq->mu); + + list_for_each_entry(w, &wq->db, link) + if (w->key == key) + goto out; + + w = kmalloc(sizeof(*w), GFP_KERNEL); + if (w) { + init_completion(&w->c); + w->key = key; + list_add_tail(&w->link, &wq->db); + } +out: + mutex_unlock(&wq->mu); + return w; +} + +static void wq_sleep(struct optee_wait_queue *wq, u32 key) +{ + struct wq_entry *w = wq_entry_get(wq, key); + + if (w) { + wait_for_completion(&w->c); + mutex_lock(&wq->mu); + list_del(&w->link); + mutex_unlock(&wq->mu); + kfree(w); + } +} + +static void wq_wakeup(struct optee_wait_queue *wq, u32 key) +{ + struct wq_entry *w = wq_entry_get(wq, key); + + if (w) + complete(&w->c); +} + +static void handle_rpc_func_cmd_wq(struct optee *optee, + struct optee_msg_arg *arg) +{ + if (arg->num_params != 1) + goto bad; + + if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) != + OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) + goto bad; + + switch (arg->params[0].u.value.a) { + case OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP: + wq_sleep(&optee->wait_queue, arg->params[0].u.value.b); + break; + case OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP: + wq_wakeup(&optee->wait_queue, arg->params[0].u.value.b); + break; + default: + goto bad; + } + + arg->ret = TEEC_SUCCESS; + return; +bad: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +} + +static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg) +{ + u32 msec_to_wait; + + if (arg->num_params != 1) + goto bad; + + if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) != + OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) + goto bad; + + msec_to_wait = arg->params[0].u.value.a; + + /* set task's state to interruptible sleep */ + set_current_state(TASK_INTERRUPTIBLE); + + /* take a nap */ + msleep(msec_to_wait); + + arg->ret = TEEC_SUCCESS; + return; +bad: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +} + +static void handle_rpc_supp_cmd(struct tee_context *ctx, + struct optee_msg_arg *arg) +{ + struct tee_param *params; + + arg->ret_origin = TEEC_ORIGIN_COMMS; + + params = kmalloc_array(arg->num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + return; + } + + if (optee_from_msg_param(params, arg->num_params, arg->params)) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + goto out; + } + + arg->ret = optee_supp_thrd_req(ctx, arg->cmd, arg->num_params, params); + + if (optee_to_msg_param(arg->params, arg->num_params, params)) + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +out: + kfree(params); +} + +static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz) +{ + u32 ret; + struct tee_param param; + struct optee *optee = tee_get_drvdata(ctx->teedev); + struct tee_shm *shm; + + param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT; + param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL; + param.u.value.b = sz; + param.u.value.c = 0; + + ret = optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_ALLOC, 1, ¶m); + if (ret) + return ERR_PTR(-ENOMEM); + + mutex_lock(&optee->supp.ctx_mutex); + /* Increases count as secure world doesn't have a reference */ + shm = tee_shm_get_from_id(optee->supp.ctx, param.u.value.c); + mutex_unlock(&optee->supp.ctx_mutex); + return shm; +} + +static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, + struct optee_msg_arg *arg) +{ + phys_addr_t pa; + struct tee_shm *shm; + size_t sz; + size_t n; + + arg->ret_origin = TEEC_ORIGIN_COMMS; + + if (!arg->num_params || + arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + + for (n = 1; n < arg->num_params; n++) { + if (arg->params[n].attr != OPTEE_MSG_ATTR_TYPE_NONE) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + } + + sz = arg->params[0].u.value.b; + switch (arg->params[0].u.value.a) { + case OPTEE_MSG_RPC_SHM_TYPE_APPL: + shm = cmd_alloc_suppl(ctx, sz); + break; + case OPTEE_MSG_RPC_SHM_TYPE_KERNEL: + shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED); + break; + default: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + + if (IS_ERR(shm)) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + return; + } + + if (tee_shm_get_pa(shm, 0, &pa)) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + goto bad; + } + + arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT; + arg->params[0].u.tmem.buf_ptr = pa; + arg->params[0].u.tmem.size = sz; + arg->params[0].u.tmem.shm_ref = (unsigned long)shm; + arg->ret = TEEC_SUCCESS; + return; +bad: + tee_shm_free(shm); +} + +static void cmd_free_suppl(struct tee_context *ctx, struct tee_shm *shm) +{ + struct tee_param param; + + param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT; + param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL; + param.u.value.b = tee_shm_get_id(shm); + param.u.value.c = 0; + + /* + * Match the tee_shm_get_from_id() in cmd_alloc_suppl() as secure + * world has released its reference. + * + * It's better to do this before sending the request to supplicant + * as we'd like to let the process doing the initial allocation to + * do release the last reference too in order to avoid stacking + * many pending fput() on the client process. This could otherwise + * happen if secure world does many allocate and free in a single + * invoke. + */ + tee_shm_put(shm); + + optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_FREE, 1, ¶m); +} + +static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx, + struct optee_msg_arg *arg) +{ + struct tee_shm *shm; + + arg->ret_origin = TEEC_ORIGIN_COMMS; + + if (arg->num_params != 1 || + arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + + shm = (struct tee_shm *)(unsigned long)arg->params[0].u.value.b; + switch (arg->params[0].u.value.a) { + case OPTEE_MSG_RPC_SHM_TYPE_APPL: + cmd_free_suppl(ctx, shm); + break; + case OPTEE_MSG_RPC_SHM_TYPE_KERNEL: + tee_shm_free(shm); + break; + default: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + } + arg->ret = TEEC_SUCCESS; +} + +static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, + struct tee_shm *shm) +{ + struct optee_msg_arg *arg; + + arg = tee_shm_get_va(shm, 0); + if (IS_ERR(arg)) { + pr_err("%s: tee_shm_get_va %p failed\n", __func__, shm); + return; + } + + switch (arg->cmd) { + case OPTEE_MSG_RPC_CMD_GET_TIME: + handle_rpc_func_cmd_get_time(arg); + break; + case OPTEE_MSG_RPC_CMD_WAIT_QUEUE: + handle_rpc_func_cmd_wq(optee, arg); + break; + case OPTEE_MSG_RPC_CMD_SUSPEND: + handle_rpc_func_cmd_wait(arg); + break; + case OPTEE_MSG_RPC_CMD_SHM_ALLOC: + handle_rpc_func_cmd_shm_alloc(ctx, arg); + break; + case OPTEE_MSG_RPC_CMD_SHM_FREE: + handle_rpc_func_cmd_shm_free(ctx, arg); + break; + default: + handle_rpc_supp_cmd(ctx, arg); + } +} + +/** + * optee_handle_rpc() - handle RPC from secure world + * @ctx: context doing the RPC + * @param: value of registers for the RPC + * + * Result of RPC is written back into @param. + */ +void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param) +{ + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct tee_shm *shm; + phys_addr_t pa; + + switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { + case OPTEE_SMC_RPC_FUNC_ALLOC: + shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED); + if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { + reg_pair_from_64(¶m->a1, ¶m->a2, pa); + reg_pair_from_64(¶m->a4, ¶m->a5, + (unsigned long)shm); + } else { + param->a1 = 0; + param->a2 = 0; + param->a4 = 0; + param->a5 = 0; + } + break; + case OPTEE_SMC_RPC_FUNC_FREE: + shm = reg_pair_to_ptr(param->a1, param->a2); + tee_shm_free(shm); + break; + case OPTEE_SMC_RPC_FUNC_IRQ: + /* + * An IRQ was raised while secure world was executing, + * since all IRQs are handled in Linux a dummy RPC is + * performed to let Linux take the IRQ through the normal + * vector. + */ + break; + case OPTEE_SMC_RPC_FUNC_CMD: + shm = reg_pair_to_ptr(param->a1, param->a2); + handle_rpc_func_cmd(ctx, optee, shm); + break; + default: + pr_warn("Unknown RPC func 0x%x\n", + (u32)OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)); + break; + } + + param->a0 = OPTEE_SMC_CALL_RETURN_FROM_RPC; +} diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c new file mode 100644 index 000000000000..b4ea0678a436 --- /dev/null +++ b/drivers/tee/optee/supp.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include "optee_private.h" + +void optee_supp_init(struct optee_supp *supp) +{ + memset(supp, 0, sizeof(*supp)); + mutex_init(&supp->ctx_mutex); + mutex_init(&supp->thrd_mutex); + mutex_init(&supp->supp_mutex); + init_completion(&supp->data_to_supp); + init_completion(&supp->data_from_supp); +} + +void optee_supp_uninit(struct optee_supp *supp) +{ + mutex_destroy(&supp->ctx_mutex); + mutex_destroy(&supp->thrd_mutex); + mutex_destroy(&supp->supp_mutex); +} + +/** + * optee_supp_thrd_req() - request service from supplicant + * @ctx: context doing the request + * @func: function requested + * @num_params: number of elements in @param array + * @param: parameters for function + * + * Returns result of operation to be passed to secure world + */ +u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, + struct tee_param *param) +{ + bool interruptable; + struct optee *optee = tee_get_drvdata(ctx->teedev); + struct optee_supp *supp = &optee->supp; + u32 ret; + + /* + * Other threads blocks here until we've copied our answer from + * supplicant. + */ + while (mutex_lock_interruptible(&supp->thrd_mutex)) { + /* See comment below on when the RPC can be interrupted. */ + mutex_lock(&supp->ctx_mutex); + interruptable = !supp->ctx; + mutex_unlock(&supp->ctx_mutex); + if (interruptable) + return TEEC_ERROR_COMMUNICATION; + } + + /* + * We have exclusive access now since the supplicant at this + * point is either doing a + * wait_for_completion_interruptible(&supp->data_to_supp) or is in + * userspace still about to do the ioctl() to enter + * optee_supp_recv() below. + */ + + supp->func = func; + supp->num_params = num_params; + supp->param = param; + supp->req_posted = true; + + /* Let supplicant get the data */ + complete(&supp->data_to_supp); + + /* + * Wait for supplicant to process and return result, once we've + * returned from wait_for_completion(data_from_supp) we have + * exclusive access again. + */ + while (wait_for_completion_interruptible(&supp->data_from_supp)) { + mutex_lock(&supp->ctx_mutex); + interruptable = !supp->ctx; + if (interruptable) { + /* + * There's no supplicant available and since the + * supp->ctx_mutex currently is held none can + * become available until the mutex released + * again. + * + * Interrupting an RPC to supplicant is only + * allowed as a way of slightly improving the user + * experience in case the supplicant hasn't been + * started yet. During normal operation the supplicant + * will serve all requests in a timely manner and + * interrupting then wouldn't make sense. + */ + supp->ret = TEEC_ERROR_COMMUNICATION; + init_completion(&supp->data_to_supp); + } + mutex_unlock(&supp->ctx_mutex); + if (interruptable) + break; + } + + ret = supp->ret; + supp->param = NULL; + supp->req_posted = false; + + /* We're done, let someone else talk to the supplicant now. */ + mutex_unlock(&supp->thrd_mutex); + + return ret; +} + +/** + * optee_supp_recv() - receive request for supplicant + * @ctx: context receiving the request + * @func: requested function in supplicant + * @num_params: number of elements allocated in @param, updated with number + * used elements + * @param: space for parameters for @func + * + * Returns 0 on success or <0 on failure + */ +int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param) +{ + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct optee_supp *supp = &optee->supp; + int rc; + + /* + * In case two threads in one supplicant is calling this function + * simultaneously we need to protect the data with a mutex which + * we'll release before returning. + */ + mutex_lock(&supp->supp_mutex); + + if (supp->supp_next_send) { + /* + * optee_supp_recv() has been called again without + * a optee_supp_send() in between. Supplicant has + * probably been restarted before it was able to + * write back last result. Abort last request and + * wait for a new. + */ + if (supp->req_posted) { + supp->ret = TEEC_ERROR_COMMUNICATION; + supp->supp_next_send = false; + complete(&supp->data_from_supp); + } + } + + /* + * This is where supplicant will be hanging most of the + * time, let's make this interruptable so we can easily + * restart supplicant if needed. + */ + if (wait_for_completion_interruptible(&supp->data_to_supp)) { + rc = -ERESTARTSYS; + goto out; + } + + /* We have exlusive access to the data */ + + if (*num_params < supp->num_params) { + /* + * Not enough room for parameters, tell supplicant + * it failed and abort last request. + */ + supp->ret = TEEC_ERROR_COMMUNICATION; + rc = -EINVAL; + complete(&supp->data_from_supp); + goto out; + } + + *func = supp->func; + *num_params = supp->num_params; + memcpy(param, supp->param, + sizeof(struct tee_param) * supp->num_params); + + /* Allow optee_supp_send() below to do its work */ + supp->supp_next_send = true; + + rc = 0; +out: + mutex_unlock(&supp->supp_mutex); + return rc; +} + +/** + * optee_supp_send() - send result of request from supplicant + * @ctx: context sending result + * @ret: return value of request + * @num_params: number of parameters returned + * @param: returned parameters + * + * Returns 0 on success or <0 on failure. + */ +int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param) +{ + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct optee_supp *supp = &optee->supp; + size_t n; + int rc = 0; + + /* + * We still have exclusive access to the data since that's how we + * left it when returning from optee_supp_read(). + */ + + /* See comment on mutex in optee_supp_read() above */ + mutex_lock(&supp->supp_mutex); + + if (!supp->supp_next_send) { + /* + * Something strange is going on, supplicant shouldn't + * enter optee_supp_send() in this state + */ + rc = -ENOENT; + goto out; + } + + if (num_params != supp->num_params) { + /* + * Something is wrong, let supplicant restart. Next call to + * optee_supp_recv() will give an error to the requesting + * thread and release it. + */ + rc = -EINVAL; + goto out; + } + + /* Update out and in/out parameters */ + for (n = 0; n < num_params; n++) { + struct tee_param *p = supp->param + n; + + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + p->u.value.a = param[n].u.value.a; + p->u.value.b = param[n].u.value.b; + p->u.value.c = param[n].u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + p->u.memref.size = param[n].u.memref.size; + break; + default: + break; + } + } + supp->ret = ret; + + /* Allow optee_supp_recv() above to do its work */ + supp->supp_next_send = false; + + /* Let the requesting thread continue */ + complete(&supp->data_from_supp); +out: + mutex_unlock(&supp->supp_mutex); + return rc; +} -- GitLab From ca3e4eb14a140080e3eb35976a796c3a11b17497 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 1 Jun 2015 16:15:25 +0200 Subject: [PATCH 0043/1398] BACKPORT: Documentation: tee subsystem and op-tee driver Change-Id: Ic6c0af1b40e62132474ef8da2c7ddf226764ea56 Acked-by: Andreas Dannenberg Signed-off-by: Jens Wiklander (cherry picked from commit 6a6e77006fcdba89708214556c6d560323e850fc) Signed-off-by: Victor Chong --- Documentation/00-INDEX | 2 + Documentation/tee.txt | 118 +++++++++++++++++++++++++++++++++++++++++ MAINTAINERS | 1 + 3 files changed, 121 insertions(+) create mode 100644 Documentation/tee.txt diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 3acc4f1a6f84..4a5a887181c3 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -436,6 +436,8 @@ sysrq.txt - info on the magic SysRq key. target/ - directory with info on generating TCM v4 fabric .ko modules +tee.txt + - info on the TEE subsystem and drivers this_cpu_ops.txt - List rationale behind and the way to use this_cpu operations. thermal/ diff --git a/Documentation/tee.txt b/Documentation/tee.txt new file mode 100644 index 000000000000..718599357596 --- /dev/null +++ b/Documentation/tee.txt @@ -0,0 +1,118 @@ +TEE subsystem +This document describes the TEE subsystem in Linux. + +A TEE (Trusted Execution Environment) is a trusted OS running in some +secure environment, for example, TrustZone on ARM CPUs, or a separate +secure co-processor etc. A TEE driver handles the details needed to +communicate with the TEE. + +This subsystem deals with: + +- Registration of TEE drivers + +- Managing shared memory between Linux and the TEE + +- Providing a generic API to the TEE + +The TEE interface +================= + +include/uapi/linux/tee.h defines the generic interface to a TEE. + +User space (the client) connects to the driver by opening /dev/tee[0-9]* or +/dev/teepriv[0-9]*. + +- TEE_IOC_SHM_ALLOC allocates shared memory and returns a file descriptor + which user space can mmap. When user space doesn't need the file + descriptor any more, it should be closed. When shared memory isn't needed + any longer it should be unmapped with munmap() to allow the reuse of + memory. + +- TEE_IOC_VERSION lets user space know which TEE this driver handles and + the its capabilities. + +- TEE_IOC_OPEN_SESSION opens a new session to a Trusted Application. + +- TEE_IOC_INVOKE invokes a function in a Trusted Application. + +- TEE_IOC_CANCEL may cancel an ongoing TEE_IOC_OPEN_SESSION or TEE_IOC_INVOKE. + +- TEE_IOC_CLOSE_SESSION closes a session to a Trusted Application. + +There are two classes of clients, normal clients and supplicants. The latter is +a helper process for the TEE to access resources in Linux, for example file +system access. A normal client opens /dev/tee[0-9]* and a supplicant opens +/dev/teepriv[0-9]. + +Much of the communication between clients and the TEE is opaque to the +driver. The main job for the driver is to receive requests from the +clients, forward them to the TEE and send back the results. In the case of +supplicants the communication goes in the other direction, the TEE sends +requests to the supplicant which then sends back the result. + +OP-TEE driver +============= + +The OP-TEE driver handles OP-TEE [1] based TEEs. Currently it is only the ARM +TrustZone based OP-TEE solution that is supported. + +Lowest level of communication with OP-TEE builds on ARM SMC Calling +Convention (SMCCC) [2], which is the foundation for OP-TEE's SMC interface +[3] used internally by the driver. Stacked on top of that is OP-TEE Message +Protocol [4]. + +OP-TEE SMC interface provides the basic functions required by SMCCC and some +additional functions specific for OP-TEE. The most interesting functions are: + +- OPTEE_SMC_FUNCID_CALLS_UID (part of SMCCC) returns the version information + which is then returned by TEE_IOC_VERSION + +- OPTEE_SMC_CALL_GET_OS_UUID returns the particular OP-TEE implementation, used + to tell, for instance, a TrustZone OP-TEE apart from an OP-TEE running on a + separate secure co-processor. + +- OPTEE_SMC_CALL_WITH_ARG drives the OP-TEE message protocol + +- OPTEE_SMC_GET_SHM_CONFIG lets the driver and OP-TEE agree on which memory + range to used for shared memory between Linux and OP-TEE. + +The GlobalPlatform TEE Client API [5] is implemented on top of the generic +TEE API. + +Picture of the relationship between the different components in the +OP-TEE architecture. + + User space Kernel Secure world + ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~ + +--------+ +-------------+ + | Client | | Trusted | + +--------+ | Application | + /\ +-------------+ + || +----------+ /\ + || |tee- | || + || |supplicant| \/ + || +----------+ +-------------+ + \/ /\ | TEE Internal| + +-------+ || | API | + + TEE | || +--------+--------+ +-------------+ + | Client| || | TEE | OP-TEE | | OP-TEE | + | API | \/ | subsys | driver | | Trusted OS | + +-------+----------------+----+-------+----+-----------+-------------+ + | Generic TEE API | | OP-TEE MSG | + | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) | + +-----------------------------+ +------------------------------+ + +RPC (Remote Procedure Call) are requests from secure world to kernel driver +or tee-supplicant. An RPC is identified by a special range of SMCCC return +values from OPTEE_SMC_CALL_WITH_ARG. RPC messages which are intended for the +kernel are handled by the kernel driver. Other RPC messages will be forwarded to +tee-supplicant without further involvement of the driver, except switching +shared memory buffer representation. + +References: +[1] https://github.com/OP-TEE/optee_os +[2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html +[3] drivers/tee/optee/optee_smc.h +[4] drivers/tee/optee/optee_msg.h +[5] http://www.globalplatform.org/specificationsdevice.asp look for + "TEE Client API Specification v1.0" and click download. diff --git a/MAINTAINERS b/MAINTAINERS index bf6e78f2fd64..a419303662bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10666,6 +10666,7 @@ S: Maintained F: include/linux/tee_drv.h F: include/uapi/linux/tee.h F: drivers/tee/ +F: Documentation/tee.txt THUNDERBOLT DRIVER M: Andreas Noever -- GitLab From f26d3c76d3764c2d3b542eb8f0aa9ee7dc47233c Mon Sep 17 00:00:00 2001 From: Nick Bray Date: Thu, 30 Nov 2017 15:49:54 -0800 Subject: [PATCH 0044/1398] ANDROID: initramfs: call free_initrd() when skipping init Memory allocated for initrd would not be reclaimed if initializing ramfs was skipped. Bug: 69901741 Test: "grep MemTotal /proc/meminfo" increases by a few MB on an Android device with a/b boot. Change-Id: Ifbe094d303ed12cfd6de6aa004a8a19137a2f58a Signed-off-by: Nick Bray --- init/initramfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/init/initramfs.c b/init/initramfs.c index d0b53f49c98a..bf3af10c500a 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -622,8 +622,11 @@ static int __init populate_rootfs(void) { char *err; - if (do_skip_initramfs) + if (do_skip_initramfs) { + if (initrd_start) + free_initrd(); return default_rootfs(); + } err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) -- GitLab From 322e659a03dcec9e19a2bbd116ee8f1c978a7030 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 30 Oct 2017 14:46:31 -0700 Subject: [PATCH 0045/1398] bcache: only permit to recovery read error when cache device is clean commit d59b23795933678c9638fd20c942d2b4f3cd6185 upstream. When bcache does read I/Os, for example in writeback or writethrough mode, if a read request on cache device is failed, bcache will try to recovery the request by reading from cached device. If the data on cached device is not synced with cache device, then requester will get a stale data. For critical storage system like database, providing stale data from recovery may result an application level data corruption, which is unacceptible. With this patch, for a failed read request in writeback or writethrough mode, recovery a recoverable read request only happens when cache device is clean. That is to say, all data on cached device is up to update. For other cache modes in bcache, read request will never hit cached_dev_read_error(), they don't need this patch. Please note, because cache mode can be switched arbitrarily in run time, a writethrough mode might be switched from a writeback mode. Therefore checking dc->has_data in writethrough mode still makes sense. Changelog: V4: Fix parens error pointed by Michael Lyle. v3: By response from Kent Oversteet, he thinks recovering stale data is a bug to fix, and option to permit it is unnecessary. So this version the sysfs file is removed. v2: rename sysfs entry from allow_stale_data_on_failure to allow_stale_data_on_failure, and fix the confusing commit log. v1: initial patch posted. [small change to patch comment spelling by mlyle] Signed-off-by: Coly Li Signed-off-by: Michael Lyle Reported-by: Arne Wolf Reviewed-by: Michael Lyle Cc: Kent Overstreet Cc: Nix Cc: Kai Krakow Cc: Eric Wheeler Cc: Junhui Tang Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e0f1c6d534fe..dfad204f6245 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -702,8 +702,16 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - if (s->recoverable) { + /* + * If cache device is dirty (dc->has_dirty is non-zero), then + * recovery a failed read request from cached device may get a + * stale data back. So read failure recovery is only permitted + * when cache device is clean. + */ + if (s->recoverable && + (dc && !atomic_read(&dc->has_dirty))) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); -- GitLab From 9db9b5f2b1b691c78b239a71a4b6a8d950323a78 Mon Sep 17 00:00:00 2001 From: Rui Hua Date: Fri, 24 Nov 2017 15:14:26 -0800 Subject: [PATCH 0046/1398] bcache: recover data from backing when data is clean commit e393aa2446150536929140739f09c6ecbcbea7f0 upstream. When we send a read request and hit the clean data in cache device, there is a situation called cache read race in bcache(see the commit in the tail of cache_look_up(), the following explaination just copy from there): The bucket we're reading from might be reused while our bio is in flight, and we could then end up reading the wrong data. We guard against this by checking (in bch_cache_read_endio()) if the pointer is stale again; if so, we treat it as an error (s->iop.error = -EINTR) and reread from the backing device (but we don't pass that error up anywhere) It should be noted that cache read race happened under normal circumstances, not the circumstance when SSD failed, it was counted and shown in /sys/fs/bcache/XXX/internal/cache_read_races. Without this patch, when we use writeback mode, we will never reread from the backing device when cache read race happened, until the whole cache device is clean, because the condition (s->recoverable && (dc && !atomic_read(&dc->has_dirty))) is false in cached_dev_read_error(). In this situation, the s->iop.error(= -EINTR) will be passed up, at last, user will receive -EINTR when it's bio end, this is not suitable, and wield to up-application. In this patch, we use s->read_dirty_data to judge whether the read request hit dirty data in cache device, it is safe to reread data from the backing device when the read request hit clean data. This can not only handle cache read race, but also recover data when failed read request from cache device. [edited by mlyle to fix up whitespace, commit log title, comment spelling] Fixes: d59b23795933 ("bcache: only permit to recovery read error when cache device is clean") Signed-off-by: Hua Rui Reviewed-by: Michael Lyle Reviewed-by: Coly Li Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index dfad204f6245..ab8a1b36af21 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -702,16 +702,15 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); /* - * If cache device is dirty (dc->has_dirty is non-zero), then - * recovery a failed read request from cached device may get a - * stale data back. So read failure recovery is only permitted - * when cache device is clean. + * If read request hit dirty data (s->read_dirty_data is true), + * then recovery a failed read request from cached device may + * get a stale data back. So read failure recovery is only + * permitted when read request hit clean data in cache device, + * or when cache read race happened. */ - if (s->recoverable && - (dc && !atomic_read(&dc->has_dirty))) { + if (s->recoverable && !s->read_dirty_data) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); -- GitLab From 48f4d1f7fe48627a4c5c9332bcdf0f2a906e2f20 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 9 Nov 2017 15:39:56 +0100 Subject: [PATCH 0047/1398] drm/fsl-dcu: avoid disabling pixel clock twice on suspend commit 9306e996574f7f57136a62e49cd0075f85713623 upstream. With commit 0a70c998d0c5 ("drm/fsl-dcu: enable pixel clock when enabling CRTC") the pixel clock is controlled by the CRTC code. Disabling the pixel clock in suspend leads to a warning due to the second clk_disable_unprepare call: WARNING: CPU: 0 PID: 359 at drivers/clk/clk.c:594 clk_core_disable+0x8c/0x90 Remove clk_disable_unprepare call for pixel clock to avoid unbalanced clock disable on suspend. Fixes: 0a70c998d0c5 ("drm/fsl-dcu: enable pixel clock when enabling CRTC") Signed-off-by: Stefan Agner Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index cc2fde2ae5ef..7fbfb31f09b8 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -243,7 +243,6 @@ static int fsl_dcu_drm_pm_suspend(struct device *dev) return PTR_ERR(fsl_dev->state); } - clk_disable_unprepare(fsl_dev->pix_clk); clk_disable_unprepare(fsl_dev->clk); return 0; -- GitLab From cffc01d24d2591cd2184c13fba32f50abfd20e68 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 10 Nov 2017 10:15:28 +0100 Subject: [PATCH 0048/1398] drm/fsl-dcu: enable IRQ before drm_atomic_helper_resume() commit 9fd99f4f3f5e13ce959900ae57d64b1bdb51d823 upstream. The resume helpers wait for a vblank to occurre hence IRQ need to be enabled. This avoids a warning as follows during resume: WARNING: CPU: 0 PID: 314 at drivers/gpu/drm/drm_atomic_helper.c:1249 drm_atomic_helper_wait_for_vblanks.part.1+0x284/0x288 [CRTC:28:crtc-0] vblank wait timed out Signed-off-by: Stefan Agner Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 7fbfb31f09b8..c9eef0f51d31 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -265,6 +265,7 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) if (fsl_dev->tcon) fsl_tcon_bypass_enable(fsl_dev->tcon); fsl_dcu_drm_init_planes(fsl_dev->drm); + enable_irq(fsl_dev->irq); drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state); console_lock(); @@ -272,7 +273,6 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) console_unlock(); drm_kms_helper_poll_enable(fsl_dev->drm); - enable_irq(fsl_dev->irq); return 0; } -- GitLab From 0de12a778b923807017d133ba5c0ef2d2a8444e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 9 Nov 2016 10:46:21 +0200 Subject: [PATCH 0049/1398] Revert "crypto: caam - get rid of tasklet" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2b163b5bce04546da72617bfb6c8bf07a45c4b17 upstream. This reverts commit 66d2e2028091a074aa1290d2eeda5ddb1a6c329c. Quoting from Russell's findings: https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg21136.html [quote] Okay, I've re-tested, using a different way of measuring, because using openssl speed is impractical for off-loaded engines. I've decided to use this way to measure the performance: dd if=/dev/zero bs=1048576 count=128 | /usr/bin/time openssl dgst -md5 For the threaded IRQs case gives: 0.05user 2.74system 0:05.30elapsed 52%CPU (0avgtext+0avgdata 2400maxresident)k 0.06user 2.52system 0:05.18elapsed 49%CPU (0avgtext+0avgdata 2404maxresident)k 0.12user 2.60system 0:05.61elapsed 48%CPU (0avgtext+0avgdata 2460maxresident)k => 5.36s => 25.0MB/s and the tasklet case: 0.08user 2.53system 0:04.83elapsed 54%CPU (0avgtext+0avgdata 2468maxresident)k 0.09user 2.47system 0:05.16elapsed 49%CPU (0avgtext+0avgdata 2368maxresident)k 0.10user 2.51system 0:04.87elapsed 53%CPU (0avgtext+0avgdata 2460maxresident)k => 4.95 => 27.1MB/s which corresponds to an 8% slowdown for the threaded IRQ case. So, tasklets are indeed faster than threaded IRQs. [...] I think I've proven from the above that this patch needs to be reverted due to the performance regression, and that there _is_ most definitely a deterimental effect of switching from tasklets to threaded IRQs. [/quote] Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/intern.h | 1 + drivers/crypto/caam/jr.c | 25 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 5d4c05074a5c..e2bcacc1a921 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -41,6 +41,7 @@ struct caam_drv_private_jr { struct device *dev; int ridx; struct caam_job_ring __iomem *rregs; /* JobR's register space */ + struct tasklet_struct irqtask; int irq; /* One per queue */ /* Number of scatterlist crypt transforms active on the JobR */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 757c27f9953d..9e7f28122bb7 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev) ret = caam_reset_hw_jr(dev); + tasklet_kill(&jrp->irqtask); + /* Release interrupt */ free_irq(jrp->irq, dev); @@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) /* * Check the output ring for ready responses, kick - * the threaded irq if jobs done. + * tasklet if jobs done. */ irqstate = rd_reg32(&jrp->rregs->jrintstatus); if (!irqstate) @@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); - return IRQ_WAKE_THREAD; + preempt_disable(); + tasklet_schedule(&jrp->irqtask); + preempt_enable(); + + return IRQ_HANDLED; } -static irqreturn_t caam_jr_threadirq(int irq, void *st_dev) +/* Deferred service handler, run as interrupt-fired tasklet */ +static void caam_jr_dequeue(unsigned long devarg) { int hw_idx, sw_idx, i, head, tail; - struct device *dev = st_dev; + struct device *dev = (struct device *)devarg; struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg); u32 *userdesc, userstatus; @@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev) /* reenable / unmask IRQs */ clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); - - return IRQ_HANDLED; } /** @@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev) jrp = dev_get_drvdata(dev); + tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev); + /* Connect job ring interrupt handler. */ - error = request_threaded_irq(jrp->irq, caam_jr_interrupt, - caam_jr_threadirq, IRQF_SHARED, - dev_name(dev), dev); + error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED, + dev_name(dev), dev); if (error) { dev_err(dev, "can't connect JobR %d interrupt (%d)\n", jrp->ridx, jrp->irq); @@ -454,6 +460,7 @@ static int caam_jr_init(struct device *dev) out_free_irq: free_irq(jrp->irq, dev); out_kill_deq: + tasklet_kill(&jrp->irqtask); return error; } -- GitLab From ee23ae915fa74956503507c1e55cbb2102f349ec Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 29 Nov 2017 16:09:58 -0800 Subject: [PATCH 0050/1398] mm, oom_reaper: gather each vma to prevent leaking TLB entry commit 687cb0884a714ff484d038e9190edc874edcf146 upstream. tlb_gather_mmu(&tlb, mm, 0, -1) means gathering the whole virtual memory space. In this case, tlb->fullmm is true. Some archs like arm64 doesn't flush TLB when tlb->fullmm is true: commit 5a7862e83000 ("arm64: tlbflush: avoid flushing when fullmm == 1"). Which causes leaking of tlb entries. Will clarifies his patch: "Basically, we tag each address space with an ASID (PCID on x86) which is resident in the TLB. This means we can elide TLB invalidation when pulling down a full mm because we won't ever assign that ASID to another mm without doing TLB invalidation elsewhere (which actually just nukes the whole TLB). I think that means that we could potentially not fault on a kernel uaccess, because we could hit in the TLB" There could be a window between complete_signal() sending IPI to other cores and all threads sharing this mm are really kicked off from cores. In this window, the oom reaper may calls tlb_flush_mmu_tlbonly() to flush TLB then frees pages. However, due to the above problem, the TLB entries are not really flushed on arm64. Other threads are possible to access these pages through TLB entries. Moreover, a copy_to_user() can also write to these pages without generating page fault, causes use-after-free bugs. This patch gathers each vma instead of gathering full vm space. In this case tlb->fullmm is not true. The behavior of oom reaper become similar to munmapping before do_exit, which should be safe for all archs. Link: http://lkml.kernel.org/r/20171107095453.179940-1-wangnan0@huawei.com Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Wang Nan Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Minchan Kim Cc: Will Deacon Cc: Bob Liu Cc: Ingo Molnar Cc: Roman Gushchin Cc: Konstantin Khlebnikov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [backported to 4.9 stable tree] Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- mm/oom_kill.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d631d251c150..4a184157cc3d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -524,7 +524,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) */ set_bit(MMF_UNSTABLE, &mm->flags); - tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (is_vm_hugetlb_page(vma)) continue; @@ -546,11 +545,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, &details); + tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); + } } - tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), -- GitLab From 3789201628f5b2d58289682bc38f7a601807e2b3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Nov 2017 19:27:22 +0100 Subject: [PATCH 0051/1398] uas: Always apply US_FL_NO_ATA_1X quirk to Seagate devices commit 7fee72d5e8f1e7b8d8212e28291b1a0243ecf2f1 upstream. We've been adding this as a quirk on a per device basis hoping that newer disk enclosures would do better, but that has not happened, so simply apply this quirk to all Seagate devices. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas-detect.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index a155cd02bce2..ecc83c405a8b 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -111,6 +111,10 @@ static int uas_use_uas_driver(struct usb_interface *intf, } } + /* All Seagate disk enclosures have broken ATA pass-through support */ + if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2) + flags |= US_FL_NO_ATA_1X; + usb_stor_adjust_quirks(udev, &flags); if (flags & US_FL_IGNORE_UAS) { -- GitLab From 6ad14c9c646b6d8accb48e8bbcfb201506a9884b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 14 Nov 2017 01:31:15 -0500 Subject: [PATCH 0052/1398] usb: quirks: Add no-lpm quirk for KY-688 USB 3.1 Type-C Hub commit e43a12f1793ae1fe006e26fe9327a8840a92233c upstream. KY-688 USB 3.1 Type-C Hub internally uses a Genesys Logic hub to connect to Realtek r8153. Similar to commit ("7496cfe5431f2 usb: quirks: Add no-lpm quirk for Moshi USB to Ethernet Adapter"), no-lpm can make r8153 ethernet work. Signed-off-by: Kai-Heng Feng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 37c418e581fb..50010282c010 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -151,6 +151,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* appletouch */ { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ + { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, -- GitLab From 65e6599937d1006a278d29d4b8e742759947ac8f Mon Sep 17 00:00:00 2001 From: Matt Wilson Date: Mon, 13 Nov 2017 11:31:31 -0800 Subject: [PATCH 0053/1398] serial: 8250_pci: Add Amazon PCI serial device ID commit 3bfd1300abfe3adb18e84a89d97a0e82a22124bb upstream. This device will be used in future Amazon EC2 instances as the primary serial port (i.e., data sent to this port will be available via the GetConsoleOuput [1] EC2 API). [1] http://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_GetConsoleOutput.html Signed-off-by: Matt Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 22d32d295c5b..b80ea872b039 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5568,6 +5568,9 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 }, { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 }, + /* Amazon PCI serial device */ + { PCI_DEVICE(0x1d0f, 0x8250), .driver_data = pbn_b0_1_115200 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL -- GitLab From 232b47b3c88af1da737cd7760f247c4ed58168cf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 11 Sep 2017 11:24:22 +0200 Subject: [PATCH 0054/1398] s390/runtime instrumentation: simplify task exit handling commit 8d9047f8b967ce6181fd824ae922978e1b055cc0 upstream. Free data structures required for runtime instrumentation from arch_release_task_struct(). This allows to simplify the code a bit, and also makes the semantics a bit easier: arch_release_task_struct() is never called from the task that is being removed. In addition this allows to get rid of exit_thread() in a later patch. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/runtime_instr.h | 4 +++- arch/s390/kernel/process.c | 3 +-- arch/s390/kernel/runtime_instr.c | 30 +++++++++++++-------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h index 402ad6df4897..c54a9310d814 100644 --- a/arch/s390/include/asm/runtime_instr.h +++ b/arch/s390/include/asm/runtime_instr.h @@ -85,6 +85,8 @@ static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, load_runtime_instr_cb(&runtime_instr_empty_cb); } -void exit_thread_runtime_instr(void); +struct task_struct; + +void runtime_instr_release(struct task_struct *tsk); #endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 172fe1121d99..8382fc62cde6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -70,8 +70,6 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) - exit_thread_runtime_instr(); } void flush_thread(void) @@ -84,6 +82,7 @@ void release_thread(struct task_struct *dead_task) void arch_release_task_struct(struct task_struct *tsk) { + runtime_instr_release(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 70cdb03d4acd..fd03a7569e10 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -18,11 +18,24 @@ /* empty control block to disable RI by loading it */ struct runtime_instr_cb runtime_instr_empty_cb; +void runtime_instr_release(struct task_struct *tsk) +{ + kfree(tsk->thread.ri_cb); +} + static void disable_runtime_instr(void) { - struct pt_regs *regs = task_pt_regs(current); + struct task_struct *task = current; + struct pt_regs *regs; + if (!task->thread.ri_cb) + return; + regs = task_pt_regs(task); + preempt_disable(); load_runtime_instr_cb(&runtime_instr_empty_cb); + kfree(task->thread.ri_cb); + task->thread.ri_cb = NULL; + preempt_enable(); /* * Make sure the RI bit is deleted from the PSW. If the user did not @@ -43,19 +56,6 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->valid = 1; } -void exit_thread_runtime_instr(void) -{ - struct task_struct *task = current; - - preempt_disable(); - if (!task->thread.ri_cb) - return; - disable_runtime_instr(); - kfree(task->thread.ri_cb); - task->thread.ri_cb = NULL; - preempt_enable(); -} - SYSCALL_DEFINE1(s390_runtime_instr, int, command) { struct runtime_instr_cb *cb; @@ -64,7 +64,7 @@ SYSCALL_DEFINE1(s390_runtime_instr, int, command) return -EOPNOTSUPP; if (command == S390_RUNTIME_INSTR_STOP) { - exit_thread_runtime_instr(); + disable_runtime_instr(); return 0; } -- GitLab From e4f12192d9e2fae46661b4be364c3ec375d3d7c3 Mon Sep 17 00:00:00 2001 From: Sebastian Sjoholm Date: Mon, 20 Nov 2017 19:29:32 +0100 Subject: [PATCH 0055/1398] USB: serial: option: add Quectel BG96 id commit c654b21ede93845863597de9ad774fd30db5f2ab upstream. Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel development board (EVB). The USB id is added to option.c to allow DIAG,GPS,AT and modem communication with the BG96. Signed-off-by: Sebastian Sjoholm Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index db3d34c2c82e..ffa8ec917ff5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -241,6 +241,7 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 +#define QUECTEL_PRODUCT_BG96 0x0296 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1185,6 +1186,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), -- GitLab From b0a46089fe818d33b42fc7c399239db5e4f7bfcf Mon Sep 17 00:00:00 2001 From: Boshi Wang Date: Fri, 20 Oct 2017 16:01:03 +0800 Subject: [PATCH 0056/1398] ima: fix hash algorithm initialization [ Upstream commit ebe7c0a7be92bbd34c6ff5b55810546a0ee05bee ] The hash_setup function always sets the hash_setup_done flag, even when the hash algorithm is invalid. This prevents the default hash algorithm defined as CONFIG_IMA_DEFAULT_HASH from being used. This patch sets hash_setup_done flag only for valid hash algorithms. Fixes: e7a2ad7eb6f4 "ima: enable support for larger default filedata hash algorithms" Signed-off-by: Boshi Wang Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 0e8762945e79..2b3def14b4fb 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -51,6 +51,8 @@ static int __init hash_setup(char *str) ima_hash_algo = HASH_ALGO_SHA1; else if (strncmp(str, "md5", 3) == 0) ima_hash_algo = HASH_ALGO_MD5; + else + return 1; goto out; } @@ -60,6 +62,8 @@ static int __init hash_setup(char *str) break; } } + if (i == HASH_ALGO__LAST) + return 1; out: hash_setup_done = 1; return 1; -- GitLab From 63074a793d69d9fd56e1456d598f4df9d35d9b70 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 30 Oct 2017 14:38:58 +0100 Subject: [PATCH 0057/1398] s390/pci: do not require AIS facility [ Upstream commit 48070c73058be6de9c0d754d441ed7092dfc8f12 ] As of today QEMU does not provide the AIS facility to its guest. This prevents Linux guests from using PCI devices as the ais facility is checked during init. As this is just a performance optimization, we can move the ais check into the code where we need it (calling the SIC instruction). This is used at initialization and on interrupt. Both places do not require any serialization, so we can simply skip the instruction. Since we will now get all interrupts, we can also avoid the 2nd scan. As we can have multiple interrupts in parallel we might trigger spurious irqs more often for the non-AIS case but the core code can handle that. Signed-off-by: Christian Borntraeger Reviewed-by: Pierre Morel Reviewed-by: Halil Pasic Acked-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/pci_insn.h | 2 +- arch/s390/pci/pci.c | 5 +++-- arch/s390/pci/pci_insn.c | 6 +++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 649eb62c52b3..9e02cb7955c1 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -81,6 +81,6 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range); int zpci_load(u64 *data, u64 req, u64 offset); int zpci_store(u64 data, u64 req, u64 offset); int zpci_store_block(const u64 *data, u64 req, u64 offset); -void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); +int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 15ffc19c8c0c..03a1d5976ff5 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -354,7 +354,8 @@ static void zpci_irq_handler(struct airq_struct *airq) /* End of second scan with interrupts on. */ break; /* First scan complete, reenable interrupts. */ - zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC)) + break; si = 0; continue; } @@ -928,7 +929,7 @@ static int __init pci_base_init(void) if (!s390_pci_probe) return 0; - if (!test_facility(69) || !test_facility(71) || !test_facility(72)) + if (!test_facility(69) || !test_facility(71)) return 0; rc = zpci_debug_init(); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index fa8d7d4b9751..248146dcfce3 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -91,11 +92,14 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) +int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) { + if (!test_facility(72)) + return -EIO; asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); + return 0; } /* PCI Load */ -- GitLab From 8a95afc485767641f712a46086b858e4c7ae2996 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:52 -0700 Subject: [PATCH 0058/1398] selftests/x86/ldt_get: Add a few additional tests for limits [ Upstream commit fec8f5ae1715a01c72ad52cb2ecd8aacaf142302 ] We weren't testing the .limit and .limit_in_pages fields very well. Add more tests. This addition seems to trigger the "bits 16:19 are undefined" issue that was fixed in an earlier patch. I think that, at least on my CPU, the high nibble of the limit ends in LAR bits 16:19. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5601c15ea9b3113d288953fd2838b18bedf6bc67.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/ldt_gdt.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index e717fed80219..f936a3cd3e35 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -360,9 +360,24 @@ static void do_simple_tests(void) install_invalid(&desc, false); desc.seg_not_present = 0; - desc.read_exec_only = 0; desc.seg_32bit = 1; + desc.read_exec_only = 0; + desc.limit = 0xfffff; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G); + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G); + desc.contents = 1; + desc.read_exec_only = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G); + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G); + + desc.limit = 0; install_invalid(&desc, true); } -- GitLab From e5f2421df13523e46697d453575c020af57c945b Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Mon, 6 Nov 2017 01:32:20 +0000 Subject: [PATCH 0059/1398] staging: greybus: loopback: Fix iteration count on async path [ Upstream commit 44b02da39210e6dd67e39ff1f48d30c56d384240 ] Commit 12927835d211 ("greybus: loopback: Add asynchronous bi-directional support") does what it says on the tin - namely, adds support for asynchronous bi-directional loopback operations. What it neglects to do though is increment the per-connection gb->iteration_count on an asynchronous operation error. This patch fixes that omission. Fixes: 12927835d211 ("greybus: loopback: Add asynchronous bi-directional support") Signed-off-by: Bryan O'Donoghue Reported-by: Mitch Tasman Reviewed-by: Johan Hovold Cc: Alex Elder Cc: Mitch Tasman Cc: greybus-dev@lists.linaro.org Cc: devel@driverdev.osuosl.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/loopback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c index 29dc249b0c74..3c2c233c2e49 100644 --- a/drivers/staging/greybus/loopback.c +++ b/drivers/staging/greybus/loopback.c @@ -1034,8 +1034,10 @@ static int gb_loopback_fn(void *data) error = gb_loopback_async_sink(gb, size); } - if (error) + if (error) { gb->error++; + gb->iteration_count++; + } } else { /* We are effectively single threaded here */ if (type == GB_LOOPBACK_TYPE_PING) -- GitLab From 510f6573ef5da5cc678e0fdb955f95c6125cd3bd Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 5 Sep 2017 22:57:06 +1000 Subject: [PATCH 0060/1398] m68k: fix ColdFire node shift size calculation [ Upstream commit f55ab8f27548ff3431a6567d400c6757c49fd520 ] The m68k pg_data_table is a fix size array defined in arch/m68k/mm/init.c. Index numbers within it are defined based on memory size. But for Coldfire these don't take into account a non-zero physical RAM base address, and this causes us to access past the end of this array at system start time. Change the node shift calculation so that we keep the index inside its range. Reported-by: Angelo Dureghello Tested-by: Angelo Dureghello Signed-off-by: Greg Ungerer Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/m68k/mm/mcfmmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 87131cd3bc8f..6d3a50446b21 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -169,7 +169,7 @@ void __init cf_bootmem_alloc(void) max_pfn = max_low_pfn = PFN_DOWN(_ramend); high_memory = (void *)_ramend; - m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6; + m68k_virt_to_node_shift = fls(_ramend - 1) - 6; module_fixup(NULL, __start_fixup, __stop_fixup); /* setup bootmem data */ -- GitLab From 946d33e1e6cd99e7191a6446c82ebc66d5432aab Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 28 Oct 2017 11:35:49 +0200 Subject: [PATCH 0061/1398] serial: 8250_fintek: Fix rs485 disablement on invalid ioctl() [ Upstream commit 3236a965486ba0c6043cf2c7b51943d8b382ae29 ] This driver's ->rs485_config callback checks if SER_RS485_RTS_ON_SEND and SER_RS485_RTS_AFTER_SEND have the same value. If they do, it means the user has passed in invalid data with the TIOCSRS485 ioctl() since RTS must have a different polarity when sending and when not sending. In this case, rs485 mode is not enabled (the RS485_URA bit is not set in the RS485 Enable Register) and this is supposed to be signaled back to the user by clearing the SER_RS485_ENABLED bit in struct serial_rs485 ... except a missing tilde character is preventing that from happening. Fixes: 28e3fb6c4dce ("serial: Add support for Fintek F81216A LPC to 4 UART") Cc: Ricardo Ribalda Delgado Cc: "Ji-Ze Hong (Peter Hong)" Signed-off-by: Lukas Wunner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fintek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index f8c31070a337..2ffebb7e5ff8 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -121,7 +121,7 @@ static int fintek_8250_rs485_config(struct uart_port *port, if ((!!(rs485->flags & SER_RS485_RTS_ON_SEND)) == (!!(rs485->flags & SER_RS485_RTS_AFTER_SEND))) - rs485->flags &= SER_RS485_ENABLED; + rs485->flags &= ~SER_RS485_ENABLED; else config |= RS485_URA; -- GitLab From ee08fca9a14d846120d5c00af803002dbac8223b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 20 Oct 2017 20:40:24 +0200 Subject: [PATCH 0062/1398] staging: rtl8188eu: avoid a null dereference on pmlmepriv [ Upstream commit 123c0aab0050cd0e07ce18e453389fbbb0a5a425 ] There is a check on pmlmepriv before dereferencing it when vfree'ing pmlmepriv->free_bss_buf however the previous call to rtw_free_mlme_priv_ie_data deferences pmlmepriv causing a null pointer deference if it is null. Avoid this by also calling rtw_free_mlme_priv_ie_data if the pointer is non-null. Detected by CoverityScan, CID#1230262 ("Dereference before null check") Fixes: 7b464c9fa5cc ("staging: r8188eu: Add files for new driver - part 4") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_mlme.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_mlme.c b/drivers/staging/rtl8188eu/core/rtw_mlme.c index ee2dcd05010f..0b60d1e0333e 100644 --- a/drivers/staging/rtl8188eu/core/rtw_mlme.c +++ b/drivers/staging/rtl8188eu/core/rtw_mlme.c @@ -107,10 +107,10 @@ void rtw_free_mlme_priv_ie_data(struct mlme_priv *pmlmepriv) void rtw_free_mlme_priv(struct mlme_priv *pmlmepriv) { - rtw_free_mlme_priv_ie_data(pmlmepriv); - - if (pmlmepriv) + if (pmlmepriv) { + rtw_free_mlme_priv_ie_data(pmlmepriv); vfree(pmlmepriv->free_bss_buf); + } } struct wlan_network *_rtw_alloc_network(struct mlme_priv *pmlmepriv) -- GitLab From b3d7cc5a84b438cf738fe89310cac92f7f8dc7fe Mon Sep 17 00:00:00 2001 From: Hiromitsu Yamasaki Date: Thu, 2 Nov 2017 10:32:36 +0100 Subject: [PATCH 0063/1398] spi: sh-msiof: Fix DMA transfer size check [ Upstream commit 36735783fdb599c94b9c86824583df367c65900b ] DMA supports 32-bit words only, even if BITLEN1 of SITMDR2 register is 16bit. Fixes: b0d0ce8b6b91 ("spi: sh-msiof: Add DMA support") Signed-off-by: Hiromitsu Yamasaki Signed-off-by: Simon Horman Acked-by: Geert Uytterhoeven Acked-by: Dirk Behme Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 1de3a772eb7d..cbf02ebb30a2 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -862,7 +862,7 @@ static int sh_msiof_transfer_one(struct spi_master *master, break; copy32 = copy_bswap32; } else if (bits <= 16) { - if (l & 1) + if (l & 3) break; copy32 = copy_wswap32; } else { -- GitLab From e042fd0f03181475711c5350e88ac1f9ed5dce12 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Oct 2017 11:35:27 +0100 Subject: [PATCH 0064/1398] spi: spi-axi: fix potential use-after-free after deregistration [ Upstream commit 4d5e0689dc9d5640ad46cdfbe1896b74d8df1661 ] Take an extra reference to the controller before deregistering it to prevent use-after-free in the interrupt handler in case an interrupt fires before the line is disabled. Fixes: b1353d1c1d45 ("spi: Add Analog Devices AXI SPI Engine controller support") Acked-by: Lars-Peter Clausen Signed-off-by: Johan Hovold Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-axi-spi-engine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index c1eafbd7610a..da51fed143cd 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -553,7 +553,7 @@ static int spi_engine_probe(struct platform_device *pdev) static int spi_engine_remove(struct platform_device *pdev) { - struct spi_master *master = platform_get_drvdata(pdev); + struct spi_master *master = spi_master_get(platform_get_drvdata(pdev)); struct spi_engine *spi_engine = spi_master_get_devdata(master); int irq = platform_get_irq(pdev, 0); @@ -561,6 +561,8 @@ static int spi_engine_remove(struct platform_device *pdev) free_irq(irq, master); + spi_master_put(master); + writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING); writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE); writel_relaxed(0x01, spi_engine->base + SPI_ENGINE_REG_RESET); -- GitLab From de139e81d350526d2b44f5e5192a32443be08238 Mon Sep 17 00:00:00 2001 From: Subhash Jadavani Date: Wed, 27 Sep 2017 11:04:40 +0530 Subject: [PATCH 0065/1398] mmc: sdhci-msm: fix issue with power irq [ Upstream commit c7ccee224d2d551f712752c4a16947f6529d6506 ] SDCC controller reset (SW_RST) during probe may trigger power irq if previous status of PWRCTL was either BUS_ON or IO_HIGH_V. So before we enable the power irq interrupt in GIC (by registering the interrupt handler), we need to ensure that any pending power irq interrupt status is acknowledged otherwise power irq interrupt handler would be fired prematurely. Signed-off-by: Subhash Jadavani Signed-off-by: Vijay Viswanath Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-msm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 90ed2e12d345..80c89a31d790 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -642,6 +642,21 @@ static int sdhci_msm_probe(struct platform_device *pdev) CORE_VENDOR_SPEC_CAPABILITIES0); } + /* + * Power on reset state may trigger power irq if previous status of + * PWRCTL was either BUS_ON or IO_HIGH_V. So before enabling pwr irq + * interrupt in GIC, any pending power irq interrupt should be + * acknowledged. Otherwise power irq interrupt handler would be + * fired prematurely. + */ + sdhci_msm_voltage_switch(host); + + /* + * Ensure that above writes are propogated before interrupt enablement + * in GIC. + */ + mb(); + /* Setup IRQ for handling power/voltage tasks with PMIC */ msm_host->pwr_irq = platform_get_irq_byname(pdev, "pwr_irq"); if (msm_host->pwr_irq < 0) { @@ -651,6 +666,9 @@ static int sdhci_msm_probe(struct platform_device *pdev) goto clk_disable; } + /* Enable pwr irq interrupts */ + writel_relaxed(INT_MASK, msm_host->core_mem + CORE_PWRCTL_MASK); + ret = devm_request_threaded_irq(&pdev->dev, msm_host->pwr_irq, NULL, sdhci_msm_pwr_irq, IRQF_ONESHOT, dev_name(&pdev->dev), host); -- GitLab From 68dfd4dd09003e6171208ea12c521186c809d8db Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 21 Oct 2017 01:02:07 +0300 Subject: [PATCH 0066/1398] usb: phy: tahvo: fix error handling in tahvo_usb_probe() [ Upstream commit ce035409bfa892a2fabb89720b542e1b335c3426 ] If devm_extcon_dev_allocate() fails, we should disable clk before return. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Fixes: 860d2686fda7 ("usb: phy: tahvo: Use devm_extcon_dev_[allocate|register]() and replace deprecated API") Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-tahvo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c index ab5d364f6e8c..335a1ef35224 100644 --- a/drivers/usb/phy/phy-tahvo.c +++ b/drivers/usb/phy/phy-tahvo.c @@ -368,7 +368,8 @@ static int tahvo_usb_probe(struct platform_device *pdev) tu->extcon = devm_extcon_dev_allocate(&pdev->dev, tahvo_cable); if (IS_ERR(tu->extcon)) { dev_err(&pdev->dev, "failed to allocate memory for extcon\n"); - return -ENOMEM; + ret = PTR_ERR(tu->extcon); + goto err_disable_clk; } ret = devm_extcon_dev_register(&pdev->dev, tu->extcon); -- GitLab From a816a7e1e70b757bd54528240b34d3c9b6049e1e Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Wed, 4 Oct 2017 10:01:28 -0500 Subject: [PATCH 0067/1398] serial: 8250: Preserve DLD[7:4] for PORT_XR17V35X [ Upstream commit 0ab84da2e076948c49d36197ee7d254125c53eab ] The upper four bits of the XR17V35x fractional divisor register (DLD) control general chip function (RS-485 direction pin polarity, multidrop mode, XON/XOFF parity check, and fast IR mode). Don't allow these bits to be clobbered when setting the baudrate. Signed-off-by: Aaron Sierra Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 1ef31e3ee4a1..f6e4373a8850 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2526,8 +2526,11 @@ static void serial8250_set_divisor(struct uart_port *port, unsigned int baud, serial_dl_write(up, quot); /* XR17V35x UARTs have an extra fractional divisor register (DLD) */ - if (up->port.type == PORT_XR17V35X) + if (up->port.type == PORT_XR17V35X) { + /* Preserve bits not related to baudrate; DLD[7:4]. */ + quot_frac |= serial_port_in(port, 0x2) & 0xf0; serial_port_out(port, 0x2, quot_frac); + } } static unsigned int serial8250_get_baud_rate(struct uart_port *port, -- GitLab From 61b7a6f1cea31d5b96419e25054b0f4177e1f576 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 18 Oct 2017 10:21:07 -0700 Subject: [PATCH 0068/1398] x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt() [ Upstream commit da20ab35180780e4a6eadc804544f1fa967f3567 ] We do not have tracepoints for sys_modify_ldt() because we define it directly instead of using the normal SYSCALL_DEFINEx() macros. However, there is a reason sys_modify_ldt() does not use the macros: it has an 'int' return type instead of 'unsigned long'. This is a bug, but it's a bug cemented in the ABI. What does this mean? If we return -EINVAL from a function that returns 'int', we have 0x00000000ffffffea in %rax. But, if we return -EINVAL from a function returning 'unsigned long', we end up with 0xffffffffffffffea in %rax, which is wrong. To work around this and maintain the 'int' behavior while using the SYSCALL_DEFINEx() macros, so we add a cast to 'unsigned int' in both implementations of sys_modify_ldt(). Signed-off-by: Dave Hansen Reviewed-by: Andy Lutomirski Reviewed-by: Brian Gerst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171018172107.1A79C532@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/syscalls.h | 2 +- arch/x86/kernel/ldt.c | 16 +++++++++++++--- arch/x86/um/ldt.c | 7 +++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 91dfcafe27a6..bad25bb80679 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); /* kernel/ldt.c */ -asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +asmlinkage long sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ asmlinkage long sys_rt_sigreturn(void); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 6707039b9032..5f70014ca602 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -271,8 +272,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { int ret = -ENOSYS; @@ -290,5 +291,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr, ret = write_ldt(ptr, bytecount, 0); break; } - return ret; + /* + * The SYSCALL_DEFINE() macros give us an 'unsigned long' + * return type, but tht ABI for sys_modify_ldt() expects + * 'int'. This cast gives us an int-sized value in %rax + * for the return code. The 'unsigned' is necessary so + * the compiler does not try to sign-extend the negative + * return codes into the high half of the register when + * taking the value from int->long. + */ + return (unsigned int)ret; } diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 836a1eb5df43..3ee234b6234d 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm) mm->arch.ldt.entry_count = 0; } -int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { - return do_modify_ldt_skas(func, ptr, bytecount); + /* See non-um modify_ldt() for why we do this cast */ + return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount); } -- GitLab From 7f44b524a0ad75c5c7821d3cfb83e35dafbc5ab8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Oct 2017 12:40:29 -0500 Subject: [PATCH 0069/1398] EDAC, sb_edac: Fix missing break in switch [ Upstream commit a8e9b186f153a44690ad0363a56716e7077ad28c ] Add missing break statement in order to prevent the code from falling through. Signed-off-by: Gustavo A. R. Silva Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20171016174029.GA19757@embeddedor.com Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/sb_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 54775221a01f..3c47e6361d81 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2510,6 +2510,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, break; case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: pvt->pci_ta = pdev; + break; case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS: pvt->pci_ras = pdev; break; -- GitLab From 5840a285a1a2d21c27bb9443236b09220ba40679 Mon Sep 17 00:00:00 2001 From: Jibin Xu Date: Sun, 10 Sep 2017 20:11:42 -0700 Subject: [PATCH 0070/1398] sysrq : fix Show Regs call trace on ARM [ Upstream commit b00bebbc301c8e1f74f230dc82282e56b7e7a6db ] When kernel configuration SMP,PREEMPT and DEBUG_PREEMPT are enabled, echo 1 >/proc/sys/kernel/sysrq echo p >/proc/sysrq-trigger kernel will print call trace as below: sysrq: SysRq : Show Regs BUG: using __this_cpu_read() in preemptible [00000000] code: sh/435 caller is __this_cpu_preempt_check+0x18/0x20 Call trace: [] dump_backtrace+0x0/0x1d0 [] show_stack+0x24/0x30 [] dump_stack+0x90/0xb0 [] check_preemption_disabled+0x100/0x108 [] __this_cpu_preempt_check+0x18/0x20 [] sysrq_handle_showregs+0x1c/0x40 [] __handle_sysrq+0x12c/0x1a0 [] write_sysrq_trigger+0x60/0x70 [] proc_reg_write+0x90/0xd0 [] __vfs_write+0x48/0x90 [] vfs_write+0xa4/0x190 [] SyS_write+0x54/0xb0 [] el0_svc_naked+0x24/0x28 This can be seen on a common board like an r-pi3. This happens because when echo p >/proc/sysrq-trigger, get_irq_regs() is called outside of IRQ context, if preemption is enabled in this situation,kernel will print the call trace. Since many prior discussions on the mailing lists have made it clear that get_irq_regs either just returns NULL or stale data when used outside of IRQ context,we simply avoid calling it outside of IRQ context. Signed-off-by: Jibin Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 701c085bb19b..53cbf4ebef10 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -243,8 +243,10 @@ static void sysrq_handle_showallcpus(int key) * architecture has no support for it: */ if (!trigger_all_cpu_backtrace()) { - struct pt_regs *regs = get_irq_regs(); + struct pt_regs *regs = NULL; + if (in_irq()) + regs = get_irq_regs(); if (regs) { pr_info("CPU%d:\n", smp_processor_id()); show_regs(regs); @@ -263,7 +265,10 @@ static struct sysrq_key_op sysrq_showallcpus_op = { static void sysrq_handle_showregs(int key) { - struct pt_regs *regs = get_irq_regs(); + struct pt_regs *regs = NULL; + + if (in_irq()) + regs = get_irq_regs(); if (regs) show_regs(regs); perf_event_print_debug(); -- GitLab From 8f6e33aad04888d65c3571986dc9b4e81a6f9a92 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 1 Oct 2017 02:18:37 +0100 Subject: [PATCH 0071/1398] usbip: tools: Install all headers needed for libusbip development [ Upstream commit c15562c0dcb2c7f26e891923b784cf1926b8c833 ] usbip_host_driver.h now depends on several additional headers, which need to be installed along with it. Fixes: 021aed845303 ("staging: usbip: userspace: migrate usbip_host_driver ...") Fixes: 3391ba0e2792 ("usbip: tools: Extract generic code to be shared with ...") Signed-off-by: Ben Hutchings Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/usb/usbip/Makefile.am b/tools/usb/usbip/Makefile.am index 66f8bf038c9f..45eaa70a71e0 100644 --- a/tools/usb/usbip/Makefile.am +++ b/tools/usb/usbip/Makefile.am @@ -1,6 +1,7 @@ SUBDIRS := libsrc src includedir = @includedir@/usbip include_HEADERS := $(addprefix libsrc/, \ - usbip_common.h vhci_driver.h usbip_host_driver.h) + usbip_common.h vhci_driver.h usbip_host_driver.h \ + list.h sysfs_utils.h usbip_host_common.h) dist_man_MANS := $(addprefix doc/, usbip.8 usbipd.8) -- GitLab From a730e156bb75094da4deb3d704a7b00c5fbff34e Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 13 Sep 2017 10:12:09 +0200 Subject: [PATCH 0072/1398] perf test attr: Fix ignored test case result [ Upstream commit 22905582f6dd4bbd0c370fe5732c607452010c04 ] Command perf test -v 16 (Setup struct perf_event_attr test) always reports success even if the test case fails. It works correctly if you also specify -F (for don't fork). root@s35lp76 perf]# ./perf test -v 16 15: Setup struct perf_event_attr : --- start --- running './tests/attr/test-record-no-delay' [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB /tmp/tmp4E1h7R/perf.data (1 samples) ] expected task=0, got 1 expected precise_ip=0, got 3 expected wakeup_events=1, got 0 FAILED './tests/attr/test-record-no-delay' - match failure test child finished with 0 ---- end ---- Setup struct perf_event_attr: Ok The reason for the wrong error reporting is the return value of the system() library call. It is called in run_dir() file tests/attr.c and returns the exit status, in above case 0xff00. This value is given as parameter to the exit() function which can only handle values 0-0xff. The child process terminates with exit value of 0 and the parent does not detect any error. This patch corrects the error reporting and prints the correct test result. Signed-off-by: Thomas-Mich Richter Acked-by: Jiri Olsa Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Thomas-Mich Richter LPU-Reference: 20170913081209.39570-2-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-rdube6rfcjsr1nzue72c7lqn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 28d1605b0338..b60a6fd66517 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -150,7 +150,7 @@ static int run_dir(const char *d, const char *perf) snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", d, d, perf, vcnt, v); - return system(cmd); + return system(cmd) ? TEST_FAIL : TEST_OK; } int test__attr(int subtest __maybe_unused) -- GitLab From a65f24aac76b3834743ec1eaf9281cbd42658269 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 19 Sep 2017 19:01:40 +0900 Subject: [PATCH 0073/1398] kprobes/x86: Disable preemption in ftrace-based jprobes [ Upstream commit 5bb4fc2d8641219732eb2bb654206775a4219aca ] Disable preemption in ftrace-based jprobe handlers as described in Documentation/kprobes.txt: "Probe handlers are run with preemption disabled." This will fix jprobes behavior when CONFIG_PREEMPT=y. Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150581530024.32348.9863783558598926771.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/ftrace.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 5f8f0b3cc674..2c0b0b645a74 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -26,7 +26,7 @@ #include "common.h" static nokprobe_inline -int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, +void __skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* @@ -41,20 +41,21 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, NULL); if (orig_ip) regs->ip = orig_ip; - return 1; } int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb, 0); - else - return 0; + if (kprobe_ftrace(p)) { + __skip_singlestep(p, regs, kcb, 0); + preempt_enable_no_resched(); + return 1; + } + return 0; } NOKPROBE_SYMBOL(skip_singlestep); -/* Ftrace callback handler for kprobes */ +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { @@ -77,13 +78,17 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); + /* To emulate trap based kprobes, preempt_disable here */ + preempt_disable(); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) + if (!p->pre_handler || !p->pre_handler(p, regs)) { __skip_singlestep(p, regs, kcb, orig_ip); + preempt_enable_no_resched(); + } /* * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. + * resets current kprobe, and keep preempt count +1. */ } end: -- GitLab From 7aa534b8a33f25dbaef0381c86b4c49bf0fedf25 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Sep 2017 12:12:17 -0300 Subject: [PATCH 0074/1398] tools include: Do not use poison with C++ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6ae8eefc6c8fe050f057781b70a83262eb0a61ee ] LIST_POISON[12] are used to initialize list_head and hlist_node pointers, and do void pointer arithmetic, which C++ doesn't like, so, to avoid drifting from the kernel by introducing some HLIST_POISON to do away with void pointer math, just make those poisoned pointers be NULL when building it with a C++ compiler. Noticed with: $ make LLVM_CONFIG=/usr/bin/llvm-config-3.9 LIBCLANGLLVM=1 CXX util/c++/clang.o CXX util/c++/clang-test.o In file included from /home/lizj/linux/tools/include/linux/list.h:5:0, from /home/lizj/linux/tools/perf/util/namespaces.h:13, from /home/lizj/linux/tools/perf/util/util.h:15, from /home/lizj/linux/tools/perf/util/util-cxx.h:20, from util/c++/clang-c.h:5, from util/c++/clang-test.cpp:2: /home/lizj/linux/tools/include/linux/list.h: In function ‘void list_del(list_head*)’: /home/lizj/linux/tools/include/linux/poison.h:14:31: error: pointer of type ‘void *’ used in arithmetic [-Werror=pointer-arith] # define POISON_POINTER_DELTA 0 ^ /home/lizj/linux/tools/include/linux/poison.h:22:41: note: in expansion of macro ‘POISON_POINTER_DELTA’ #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) ^ /home/lizj/linux/tools/include/linux/list.h:107:16: note: in expansion of macro ‘LIST_POISON1’ entry->next = LIST_POISON1; ^ In file included from /home/lizj/linux/tools/perf/util/namespaces.h:13:0, from /home/lizj/linux/tools/perf/util/util.h:15, from /home/lizj/linux/tools/perf/util/util-cxx.h:20, from util/c++/clang-c.h:5, from util/c++/clang-test.cpp:2: /home/lizj/linux/tools/include/linux/list.h:107:14: error: invalid conversion from ‘void*’ to ‘list_head*’ [-fpermissive] Reported-by: Li Zhijian Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Philip Li Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-m5ei2o0mjshucbr28baf5lqz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/poison.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 51334edec506..f306a7642509 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -14,6 +14,10 @@ # define POISON_POINTER_DELTA 0 #endif +#ifdef __cplusplus +#define LIST_POISON1 NULL +#define LIST_POISON2 NULL +#else /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses @@ -21,6 +25,7 @@ */ #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) #define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) +#endif /********** include/linux/timer.h **********/ /* -- GitLab From 2383ba62b5352e02df75d3b6dff2dfd156ce8f1c Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 25 Aug 2017 07:39:16 +0200 Subject: [PATCH 0075/1398] iio: adc: ti-ads1015: add 10% to conversion wait time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fe895ac88b9fbdf2026f0bfd56c82747bb9d7c48 ] As user's guide "ADS1015EVM, ADS1115EVM, ADS1015EVM-PDK, ADS1115EVM-PDK User Guide (Rev. B)" (http://www.ti.com/lit/ug/sbau157b/sbau157b.pdf) states at page 16: "Note that both the ADS1115 and ADS1015 have internal clocks with a ±10% accuracy. If performing FFT tests, frequencies may appear to be incorrect as a result of this tolerance range.", add those 10% to converion wait time. Cc: Daniel Baluta Cc: Jonathan Cameron Signed-off-by: Ladislav Michl Reviewed-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 472641fc890c..af05e20c986b 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -269,6 +269,7 @@ int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val) conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr_old]); conv_time += DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]); + conv_time += conv_time / 10; /* 10% internal clock inaccuracy */ usleep_range(conv_time, conv_time + 1); data->conv_invalid = false; } -- GitLab From 771c831c1bdef159ff6fa01a6c49f0890b1f3066 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 17:10:28 +0200 Subject: [PATCH 0076/1398] dax: Avoid page invalidation races and unnecessary radix tree traversals [ Upstream commit e3fce68cdbed297d927e993b3ea7b8b1cee545da ] Currently dax_iomap_rw() takes care of invalidating page tables and evicting hole pages from the radix tree when write(2) to the file happens. This invalidation is only necessary when there is some block allocation resulting from write(2). Furthermore in current place the invalidation is racy wrt page fault instantiating a hole page just after we have invalidated it. So perform the page invalidation inside dax_iomap_actor() where we can do it only when really necessary and after blocks have been allocated so nobody will be instantiating new hole pages anymore. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index bf6218da7928..800748f10b3d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1265,6 +1265,17 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; + /* + * Write can allocate block for an area which has a hole page mapped + * into page tables. We have to tear down these mappings so that data + * written by write(2) is visible in mmap. + */ + if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + invalidate_inode_pages2_range(inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); + } + while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); struct blk_dax_ctl dax = { 0 }; @@ -1329,23 +1340,6 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) flags |= IOMAP_WRITE; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. We'll eventually need to shift this down even further so that - * we can check if we allocated blocks over a hole first. - */ - if (mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, - (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, iomap_dax_actor); -- GitLab From 55c050ae5165bbeff669aaf32c7ed4dfc8f513b4 Mon Sep 17 00:00:00 2001 From: Slava Shwartsman Date: Thu, 29 Dec 2016 18:37:12 +0200 Subject: [PATCH 0077/1398] net/mlx4_en: Fix type mismatch for 32-bit systems [ Upstream commit 61b6034c6cfdcb265bb453505c3d688e7567727a ] is_power_of_2 expects unsigned long and we pass u64 max_val_cycles, this will be truncated on 32 bit systems, and the result is not what we were expecting. div_u64 expects u32 as a second argument and we pass max_val_cycles_rounded which is u64 hence it will always be truncated. Fix was tested on both 64 and 32 bit systems and got same results for max_val_cycles and max_val_cycles_rounded. Fixes: 4850cf458157 ("net/mlx4_en: Resolve dividing by zero in 32-bit system") Signed-off-by: Slava Shwartsman Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index d4d97ca12e83..f9897d17f01d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -251,13 +251,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); -- GitLab From d2d74d0e58b20d4464f9f24b33e083a3f6766aa6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Dec 2016 19:48:20 +0100 Subject: [PATCH 0078/1398] l2tp: take remote address into account in l2tp_ip and l2tp_ip6 socket lookups [ Upstream commit a9b2dff80be979432484afaf7f8d8e73f9e8838a ] For connected sockets, __l2tp_ip{,6}_bind_lookup() needs to check the remote IP when looking for a matching socket. Otherwise a connected socket can receive traffic not originating from its peer. Drop l2tp_ip_bind_lookup() and l2tp_ip6_bind_lookup() instead of updating their prototype, as these functions aren't used. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 19 ++++++------------- net/l2tp/l2tp_ip6.c | 20 ++++++-------------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3468d5635d0a..9d77a54e8854 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -48,7 +48,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) return (struct l2tp_ip_sock *)sk; } -static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) +static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, + __be32 raddr, int dif, u32 tunnel_id) { struct sock *sk; @@ -62,6 +63,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -72,15 +74,6 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif return sk; } -static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -186,8 +179,8 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb), - tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, + inet_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip_lock); goto discard; @@ -289,7 +282,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ write_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0, sk->sk_bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip_lock); ret = -EADDRINUSE; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 1d522ce833e6..247097289fd0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk) static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, + const struct in6_addr *raddr, int dif, u32 tunnel_id) { struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -73,6 +75,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && + (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -83,17 +86,6 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, return sk; } -static inline struct sock *l2tp_ip6_bind_lookup(struct net *net, - struct in6_addr *laddr, - int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -200,8 +192,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb), - tunnel_id); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip6_lock); goto discard; @@ -339,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); write_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if, + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip6_lock); err = -EADDRINUSE; -- GitLab From 000e7180633f254aef401a8cbca7f470ead41fed Mon Sep 17 00:00:00 2001 From: M'boumba Cedric Madianga Date: Tue, 13 Dec 2016 14:40:43 +0100 Subject: [PATCH 0079/1398] dmaengine: stm32-dma: Set correct args number for DMA request from DT [ Upstream commit 7e96304d99477de1f70db42035071e56439da817 ] This patch sets the right number of arguments to be used for DMA clients which request channels from DT. Signed-off-by: M'boumba Cedric Madianga Reviewed-by: Ludovic BARRE Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/stm32-dma.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 307547f4848d..bd758e67843c 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -976,21 +976,18 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, struct stm32_dma_chan *chan; struct dma_chan *c; - if (dma_spec->args_count < 3) + if (dma_spec->args_count < 4) return NULL; cfg.channel_id = dma_spec->args[0]; cfg.request_line = dma_spec->args[1]; cfg.stream_config = dma_spec->args[2]; - cfg.threshold = 0; + cfg.threshold = dma_spec->args[3]; if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) return NULL; - if (dma_spec->args_count > 3) - cfg.threshold = dma_spec->args[3]; - chan = &dmadev->chan[cfg.channel_id]; c = dma_get_slave_channel(&chan->vchan.chan); -- GitLab From 7468e767d8f2115427f2066203e5262f5dee8c5c Mon Sep 17 00:00:00 2001 From: M'boumba Cedric Madianga Date: Tue, 13 Dec 2016 14:40:46 +0100 Subject: [PATCH 0080/1398] dmaengine: stm32-dma: Fix null pointer dereference in stm32_dma_tx_status [ Upstream commit 57b5a32135c813f2ab669039fb4ec16b30cb3305 ] chan->desc is always set to NULL when a DMA transfer is complete. As a DMA transfer could be complete during the call of stm32_dma_tx_status, we need to be sure that chan->desc is not NULL before using this variable to avoid a null pointer deference issue. Signed-off-by: M'boumba Cedric Madianga Reviewed-by: Ludovic BARRE Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman Acked-by: Pierre-Yves MORDRET --- drivers/dma/stm32-dma.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index bd758e67843c..ae3f60be7759 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -884,7 +884,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, struct virt_dma_desc *vdesc; enum dma_status status; unsigned long flags; - u32 residue; + u32 residue = 0; status = dma_cookie_status(c, cookie, state); if ((status == DMA_COMPLETE) || (!state)) @@ -892,16 +892,12 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, spin_lock_irqsave(&chan->vchan.lock, flags); vdesc = vchan_find_desc(&chan->vchan, cookie); - if (cookie == chan->desc->vdesc.tx.cookie) { + if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) residue = stm32_dma_desc_residue(chan, chan->desc, chan->next_sg); - } else if (vdesc) { + else if (vdesc) residue = stm32_dma_desc_residue(chan, to_stm32_dma_desc(vdesc), 0); - } else { - residue = 0; - } - dma_set_residue(state, residue); spin_unlock_irqrestore(&chan->vchan.lock, flags); -- GitLab From 5eb97be8798189ee1080b225d0c8f14f54736bca Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Thu, 15 Dec 2016 12:47:42 +0000 Subject: [PATCH 0081/1398] usb: gadget: f_fs: Fix ExtCompat descriptor validation [ Upstream commit 354bc45bf329494ef6051f3229ef50b9e2a7ea2a ] Reserved1 is documented as expected to be set to 0, but this test fails when it it set to 0. Reverse the condition. Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 273320fa30ae..4fce83266926 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2263,7 +2263,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || d->bFirstInterfaceNumber >= ffs->interfaces_count || - !d->Reserved1) + d->Reserved1) return -EINVAL; for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) -- GitLab From 30bd002c454ad54ccca08dc056577c94021085af Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 3 Jan 2017 21:25:48 +0530 Subject: [PATCH 0082/1398] libcxgb: fix error check for ip6_route_output() [ Upstream commit a9a8cdb368d99bb655b5cdabea560446db0527cc ] ip6_route_output() never returns NULL so check dst->error instead of !dst. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index 0f0de5b63622..d04a6c163445 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -133,17 +133,15 @@ cxgb_find_route6(struct cxgb4_lld_info *lldi, if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = sin6_scope_id; dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgb_our_interface(lldi, get_real_dev, - ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + if (dst->error || + (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) { dst_release(dst); - dst = NULL; + return NULL; } } -out: return dst; } EXPORT_SYMBOL(cxgb_find_route6); -- GitLab From bcc7511ef2566a1ca1945f449122e7e34d43de2a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 3 Jan 2017 16:34:48 -0800 Subject: [PATCH 0083/1398] net: systemport: Utilize skb_put_padto() [ Upstream commit bb7da333d0a9f3bddc08f84187b7579a3f68fd24 ] Since we need to pad our packets, utilize skb_put_padto() which increases skb->len by how much we need to pad, allowing us to eliminate the test on skb->len right below. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index be7ec5a76a54..c89e89979267 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1039,13 +1039,12 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. */ - if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { ret = NETDEV_TX_OK; goto out; } - skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ? - ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len; + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); if (dma_mapping_error(kdev, mapping)) { -- GitLab From a97b72427e5f30c0711605fa16eed070ddb0a1b7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 3 Jan 2017 16:34:49 -0800 Subject: [PATCH 0084/1398] net: systemport: Pad packet before inserting TSB [ Upstream commit 38e5a85562a6cd911fc26d951d576551a688574c ] Inserting the TSB means adding an extra 8 bytes in front the of packet that is going to be used as metadata information by the TDMA engine, but stripped off, so it does not really help with the packet padding. For some odd packet sizes that fall below the 60 bytes payload (e.g: ARP) we can end-up padding them after the TSB insertion, thus making them 64 bytes, but with the TDMA stripping off the first 8 bytes, they could still be smaller than 64 bytes which is required to ingress the switch. Fix this by swapping the padding and TSB insertion, guaranteeing that the packets have the right sizes. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index c89e89979267..744ed6ddaf37 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1023,15 +1023,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } - /* Insert TSB and checksum infos */ - if (priv->tsb_en) { - skb = bcm_sysport_insert_tsb(skb, dev); - if (!skb) { - ret = NETDEV_TX_OK; - goto out; - } - } - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we @@ -1044,6 +1035,15 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } + /* Insert TSB and checksum infos */ + if (priv->tsb_en) { + skb = bcm_sysport_insert_tsb(skb, dev); + if (!skb) { + ret = NETDEV_TX_OK; + goto out; + } + } + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); -- GitLab From 9a777021eeb49279014e490835617dcb5f09d85c Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 3 Jan 2017 11:37:48 -0600 Subject: [PATCH 0085/1398] ARM: OMAP2+: Fix WL1283 Bluetooth Baud Rate [ Upstream commit a3ac350793d90d1da631c8beeee9352387974ed5 ] Commit 485fa1261f78 ("ARM: OMAP2+: LogicPD Torpedo + Wireless: Add Bluetooth") set the wrong baud rate for the UART. The Baud rate was 300,000 and it should be 3,000,000 for WL1283. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/pdata-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index da310bb779b9..88676fe9b119 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -147,7 +147,7 @@ static struct ti_st_plat_data wilink_pdata = { .nshutdown_gpio = 137, .dev_name = "/dev/ttyO1", .flow_cntrl = 1, - .baud_rate = 300000, + .baud_rate = 3000000, }; static struct platform_device wl18xx_device = { -- GitLab From 08bd389c9ef830e5463fa0dda69370b4e3b14de3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 3 Jan 2017 13:22:34 +0200 Subject: [PATCH 0086/1398] ARM: OMAP1: DMA: Correct the number of logical channels [ Upstream commit 657279778af54f35e54b07b6687918f254a2992c ] OMAP1510, OMAP5910 and OMAP310 have only 9 logical channels. OMAP1610, OMAP5912, OMAP1710, OMAP730, and OMAP850 have 16 logical channels available. The wired 17 for the lch_count must have been used to cover the 16 + 1 dedicated LCD channel, in reality we can only use 9 or 16 channels. The d->chan_count is not used by the omap-dma stack, so we can skip the setup. chan_count was configured to the number of logical channels and not the actual number of physical channels anyways. Signed-off-by: Peter Ujfalusi Acked-by: Aaro Koskinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap1/dma.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c index f6ba589cd312..c821c1d5610e 100644 --- a/arch/arm/mach-omap1/dma.c +++ b/arch/arm/mach-omap1/dma.c @@ -32,7 +32,6 @@ #include "soc.h" #define OMAP1_DMA_BASE (0xfffed800) -#define OMAP1_LOGICAL_DMA_CH_COUNT 17 static u32 enable_1510_mode; @@ -348,8 +347,6 @@ static int __init omap1_system_dma_init(void) goto exit_iounmap; } - d->lch_count = OMAP1_LOGICAL_DMA_CH_COUNT; - /* Valid attributes for omap1 plus processors */ if (cpu_is_omap15xx()) d->dev_caps = ENABLE_1510_MODE; @@ -366,13 +363,14 @@ static int __init omap1_system_dma_init(void) d->dev_caps |= CLEAR_CSR_ON_READ; d->dev_caps |= IS_WORD_16; - if (cpu_is_omap15xx()) - d->chan_count = 9; - else if (cpu_is_omap16xx() || cpu_is_omap7xx()) { - if (!(d->dev_caps & ENABLE_1510_MODE)) - d->chan_count = 16; + /* available logical channels */ + if (cpu_is_omap15xx()) { + d->lch_count = 9; + } else { + if (d->dev_caps & ENABLE_1510_MODE) + d->lch_count = 9; else - d->chan_count = 9; + d->lch_count = 16; } p = dma_plat_info; -- GitLab From 105b403ba0dedc930882e8aeae361d650b9c49d3 Mon Sep 17 00:00:00 2001 From: David Forster Date: Fri, 6 Jan 2017 10:27:59 +0000 Subject: [PATCH 0087/1398] vti6: fix device register to report IFLA_INFO_KIND [ Upstream commit 93e246f783e6bd1bc64fdfbfe68b18161f69b28e ] vti6 interface is registered before the rtnl_link_ops block is attached. As a result the resulting RTM_NEWLINK is missing IFLA_INFO_KIND. Re-order attachment of rtnl_link_ops block to fix. Signed-off-by: Dave Forster Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 816f79d1a8a3..67e882d49195 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -189,12 +189,12 @@ static int vti6_tnl_create2(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; + dev->rtnl_link_ops = &vti6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &vti6_link_ops; dev_hold(dev); vti6_tnl_link(ip6n, t); -- GitLab From b4c3022e673a0c368e16d296691719a6f920ad74 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 6 Jan 2017 20:30:02 +0100 Subject: [PATCH 0088/1398] be2net: fix accesses to unicast list [ Upstream commit 1d0f110a2c6c4bca3dbcc4b0e27f1e3dc2d44a2c ] Commit 988d44b "be2net: Avoid redundant addition of mac address in HW" introduced be_dev_mac_add & be_uc_mac_add helpers that incorrectly access adapter->uc_list as an array of bytes instead of an array of be_eth_addr. Consequently NIC is not filled with valid data so unicast filtering is broken. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Fixes: 988d44b be2net: Avoid redundant addition of mac address in HW Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 5626908f3f7a..c82f22b244c1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -275,8 +275,7 @@ static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac) /* Check if mac has already been added as part of uc-list */ for (i = 0; i < adapter->uc_macs; i++) { - if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN], - mac)) { + if (ether_addr_equal(adapter->uc_list[i].mac, mac)) { /* mac already added, skip addition */ adapter->pmac_id[0] = adapter->pmac_id[i + 1]; return 0; @@ -1679,14 +1678,12 @@ static void be_clear_mc_list(struct be_adapter *adapter) static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) { - if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], - adapter->dev_mac)) { + if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) { adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; return 0; } - return be_cmd_pmac_add(adapter, - (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac, adapter->if_handle, &adapter->pmac_id[uc_idx + 1], 0); } -- GitLab From e3a252a9924cb80678092e841cada6c48773eef2 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 6 Jan 2017 21:59:30 +0100 Subject: [PATCH 0089/1398] be2net: fix unicast list filling [ Upstream commit 6052cd1af86f9833b6b0b60d5d4787c4a06d65ea ] The adapter->pmac_id[0] item is used for primary MAC address but this is not true for adapter->uc_list[0] as is assumed in be_set_uc_list(). There are N UC addresses copied first from net_device to adapter->uc_list[1..N] and then N UC addresses from adapter->uc_list[0..N-1] are sent to HW. So the last UC address is never stored into HW and address 00:00:00:00;00:00 (from uc_list[0]) is used instead. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Fixes: b717241 be2net: replace polling with sleeping in the FW completion path Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c82f22b244c1..fbb396b08495 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1719,9 +1719,8 @@ static void be_set_uc_list(struct be_adapter *adapter) } if (adapter->update_uc_list) { - i = 1; /* First slot is claimed by the Primary MAC */ - /* cache the uc-list in adapter array */ + i = 0; netdev_for_each_uc_addr(ha, netdev) { ether_addr_copy(adapter->uc_list[i].mac, ha->addr); i++; -- GitLab From d9a4e70f23c3b648a1cb60827f2580a897642963 Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Mon, 9 Jan 2017 20:57:48 +0700 Subject: [PATCH 0090/1398] net/appletalk: Fix kernel memory disclosure [ Upstream commit ce7e40c432ba84da104438f6799d460a4cad41bc ] ipddp_route structs contain alignment padding so kernel heap memory is leaked when they are copied to user space in ipddp_ioctl(SIOCFINDIPDDPRT). Change kmalloc() to kzalloc() to clear that memory. Signed-off-by: Vlad Tsyrklevich Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/appletalk/ipddp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index e90c6a7333d7..2e4649655181 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -191,7 +191,7 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev) */ static int ipddp_create(struct ipddp_route *new_rt) { - struct ipddp_route *rt = kmalloc(sizeof(*rt), GFP_KERNEL); + struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL); if (rt == NULL) return -ENOMEM; -- GitLab From c4081f91981b9db6ef27d4cbf9b475ccaf9a56fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Jan 2017 17:37:27 +1300 Subject: [PATCH 0091/1398] libfs: Modify mount_pseudo_xattr to be clear it is not a userspace mount [ Upstream commit 75422726b0f717d67db3283c2eb5bc14fa2619c5 ] Add MS_KERNMOUNT to the flags that are passed. Use sget_userns and force &init_user_ns instead of calling sget so that even if called from a weird context the internal filesystem will be considered to be in the intial user namespace. Luis Ressel reported that the the failure to pass MS_KERNMOUNT into mount_pseudo broke his in development graphics driver that uses the generic drm infrastructure. I am not certain the deriver was bug free in it's usage of that infrastructure but since mount_pseudo_xattr can never be triggered by userspace it is clearer and less error prone, and less problematic for the code to be explicit. Reported-by: Luis Ressel Tested-by: Luis Ressel Acked-by: Al Viro Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/libfs.c b/fs/libfs.c index 48826d4da189..9588780ad43e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); - s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); + s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, + &init_user_ns, NULL); if (IS_ERR(s)) return ERR_CAST(s); -- GitLab From 71ab86262b66a594976259e1be18879ecfb16c7c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 9 Jan 2017 14:31:58 -0800 Subject: [PATCH 0092/1398] net: qrtr: Mark 'buf' as little endian [ Upstream commit 3512a1ad56174308a9fd3e10f4b1e3e152e9ec01 ] Failure to mark this pointer as __le32 causes checkers like sparse to complain: net/qrtr/qrtr.c:274:16: warning: incorrect type in assignment (different base types) net/qrtr/qrtr.c:274:16: expected unsigned int [unsigned] [usertype] net/qrtr/qrtr.c:274:16: got restricted __le32 [usertype] net/qrtr/qrtr.c:275:16: warning: incorrect type in assignment (different base types) net/qrtr/qrtr.c:275:16: expected unsigned int [unsigned] [usertype] net/qrtr/qrtr.c:275:16: got restricted __le32 [usertype] net/qrtr/qrtr.c:276:16: warning: incorrect type in assignment (different base types) net/qrtr/qrtr.c:276:16: expected unsigned int [unsigned] [usertype] net/qrtr/qrtr.c:276:16: got restricted __le32 [usertype] Silence it. Cc: Bjorn Andersson Signed-off-by: Stephen Boyd Acked-by: Bjorn Andersson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/qrtr/qrtr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c985ecbe9bd6..ae5ac175b2be 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -252,7 +252,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, const int pkt_len = 20; struct qrtr_hdr *hdr; struct sk_buff *skb; - u32 *buf; + __le32 *buf; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); if (!skb) @@ -269,7 +269,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, hdr->dst_node_id = cpu_to_le32(dst_node); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); - buf = (u32 *)skb_put(skb, pkt_len); + buf = (__le32 *)skb_put(skb, pkt_len); memset(buf, 0, pkt_len); buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); buf[1] = cpu_to_le32(src_node); -- GitLab From fae478cd9311b1e8bf85f318a2098defabfb71e5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Jan 2017 16:57:39 -0800 Subject: [PATCH 0093/1398] mm: fix remote numa hits statistics [ Upstream commit 2df26639e708a88dcc22171949da638a9998f3bc ] Jia He has noticed that commit b9f00e147f27 ("mm, page_alloc: reduce branches in zone_statistics") has an unintentional side effect that remote node allocation requests are accounted as NUMA_MISS rathat than NUMA_HIT and NUMA_OTHER if such a request doesn't use __GFP_OTHER_NODE. There are many of these potentially because the flag is used very rarely while we have many users of __alloc_pages_node. Fix this by simply ignoring __GFP_OTHER_NODE (it can be removed in a follow up patch) and treat all allocations that were satisfied from the preferred zone's node as NUMA_HITS because this is the same node we requested the allocation from in most cases. If this is not the local node then we just account it as NUMA_OTHER rather than NUMA_LOCAL. One downsize would be that an allocation request for a node which is outside of the mempolicy nodemask would be reported as a hit which is a bit weird but that was the case before b9f00e147f27 already. Fixes: b9f00e147f27 ("mm, page_alloc: reduce branches in zone_statistics") Link: http://lkml.kernel.org/r/20170102153057.9451-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Jia He Reviewed-by: Vlastimil Babka # with cbmc[1] superpowers Acked-by: Mel Gorman Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Taku Izumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ef5ee56095e8..fbc38888252b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2592,30 +2592,23 @@ int __isolate_free_page(struct page *page, unsigned int order) * Update NUMA hit/miss statistics * * Must be called with interrupts disabled. - * - * When __GFP_OTHER_NODE is set assume the node of the preferred - * zone is the local node. This is useful for daemons who allocate - * memory on behalf of other processes. */ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags) { #ifdef CONFIG_NUMA - int local_nid = numa_node_id(); enum zone_stat_item local_stat = NUMA_LOCAL; - if (unlikely(flags & __GFP_OTHER_NODE)) { + if (z->node != numa_node_id()) local_stat = NUMA_OTHER; - local_nid = preferred_zone->node; - } - if (z->node == local_nid) { + if (z->node == preferred_zone->node) __inc_zone_state(z, NUMA_HIT); - __inc_zone_state(z, local_stat); - } else { + else { __inc_zone_state(z, NUMA_MISS); __inc_zone_state(preferred_zone, NUMA_FOREIGN); } + __inc_zone_state(z, local_stat); #endif } -- GitLab From bc13325a2410c5190478caa5c807e4c23edf4093 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Oct 2016 12:23:49 +0200 Subject: [PATCH 0094/1398] mac80211: calculate min channel width correctly [ Upstream commit 96aa2e7cf126773b16c6c19b7474a8a38d3c707e ] In the current minimum chandef code there's an issue in that the recalculation can happen after rate control is initialized for a station that has a wider bandwidth than the current chanctx, and then rate control can immediately start using those higher rates which could cause problems. Observe that first of all that this problem is because we don't take non-associated and non-uploaded stations into account. The restriction to non-associated is quite pointless and is one of the causes for the problem described above, since the rate init will happen before the station is set to associated; no frames could actually be sent until associated, but the rate table can already contain higher rates and that might cause problems. Also, rejecting non-uploaded stations is wrong, since the rate control can select higher rates for those as well. Secondly, it's then necessary to recalculate the minimal config before initializing rate control, so that when rate control is initialized, the higher rates are already available. This can be done easily by adding the necessary function call in rate init. Change-Id: Ib9bc02d34797078db55459d196993f39dcd43070 Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/chan.c | 3 --- net/mac80211/rate.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e75cbf6ecc26..a0d901d8992e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -231,9 +231,6 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata) !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) continue; - if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC)) - continue; - max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); } rcu_read_unlock(); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 206698bc93f4..9e2641d45587 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -40,6 +40,8 @@ void rate_control_rate_init(struct sta_info *sta) ieee80211_sta_set_rx_nss(sta); + ieee80211_recalc_min_chandef(sta->sdata); + if (!ref) return; -- GitLab From c682ae717759622c32d3bbd720a6acfbd54c9726 Mon Sep 17 00:00:00 2001 From: Kazuya Mizuguchi Date: Thu, 12 Jan 2017 13:21:06 +0100 Subject: [PATCH 0095/1398] ravb: Remove Rx overflow log messages [ Upstream commit 18a3ed59d09cf81a6447aadf6931bf0c9ffec5e0 ] Remove Rx overflow log messages as in an environment where logging results in network traffic logging may cause further overflows. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Kazuya Mizuguchi [simon: reworked changelog] Signed-off-by: Simon Horman Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 11623aad0e8e..10d3a9f6349e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -941,14 +941,10 @@ static int ravb_poll(struct napi_struct *napi, int budget) /* Receive error message handling */ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) { + if (priv->rx_over_errors != ndev->stats.rx_over_errors) ndev->stats.rx_over_errors = priv->rx_over_errors; - netif_err(priv, rx_err, ndev, "Receive Descriptor Empty\n"); - } - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) { + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; - netif_err(priv, rx_err, ndev, "Receive FIFO Overflow\n"); - } out: return budget - quota; } -- GitLab From 9f800573a31b8993313a8c11eef4e0bd881c785a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 5 Jan 2017 10:20:16 -0500 Subject: [PATCH 0096/1398] nfs: Don't take a reference on fl->fl_file for LOCK operation [ Upstream commit 4b09ec4b14a168bf2c687e1f598140c3c11e9222 ] I have reports of a crash that look like __fput() was called twice for a NFSv4.0 file. It seems possible that the state manager could try to reclaim a lock and take a reference on the fl->fl_file at the same time the file is being released if, during the close(), a signal interrupts the wait for outstanding IO while removing locks which then skips the removal of that lock. Since 83bfff23e9ed ("nfs4: have do_vfs_lock take an inode pointer") has removed the need to traverse fl->fl_file->f_inode in nfs4_lock_done(), taking that reference is no longer necessary. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 67845220fc27..4638654e26f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -6006,7 +6005,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -6119,7 +6117,6 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); - fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } -- GitLab From cca6bca0097359468c635876d726c657df5733f0 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 9 Jan 2017 15:33:02 +0100 Subject: [PATCH 0097/1398] drm/exynos/decon5433: update shadow registers iff there are active windows [ Upstream commit f65a7c9cb3770ed4d3e7c57c66d7032689081b5e ] Improper usage of DECON_UPDATE register leads to subtle errors. If it set in decon_commit when there are no active windows it results in slow registry updates - all subsequent shadow registry updates takes more than full vblank. On the other side if it is not set when there are active windows it results in garbage on the screen after suspend/resume of FB console. The patch hopefully fixes it. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 6ca1f3117fe8..ef7fcb5f044b 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -188,8 +188,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) /* enable output and display signal */ decon_set_bits(ctx, DECON_VIDCON0, VIDCON0_ENVID | VIDCON0_ENVID_F, ~0); - - decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); } static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, @@ -340,8 +338,9 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); - /* standalone update */ - decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); + /* update iff there are active windows */ + if (crtc->base.state->plane_mask) + decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); if (ctx->out_type & IFTYPE_I80) set_bit(BIT_WIN_UPDATED, &ctx->flags); -- GitLab From 72afbf76344ff89272b9971716bb74855bc99157 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Jan 2017 10:20:58 +0100 Subject: [PATCH 0098/1398] drm/exynos/decon5433: set STANDALONE_UPDATE_F also if planes are disabled [ Upstream commit 821b40b79db7dedbfe15ab330dfd181e661a533f ] STANDALONE_UPDATE_F should be set if something changed in plane configurations, including plane disable. The patch fixes page-faults bugs, caused by decon still using framebuffers of disabled planes. v2: fixed clear-bit code (Thx Marek) v3: use test_and_clear_bit (Thx Joonyoung) Signed-off-by: Andrzej Hajda Tested-by: Joonyoung Shim Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index ef7fcb5f044b..09e8cc36948e 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -46,7 +46,8 @@ enum decon_flag_bits { BIT_CLKS_ENABLED, BIT_IRQS_ENABLED, BIT_WIN_UPDATED, - BIT_SUSPENDED + BIT_SUSPENDED, + BIT_REQUEST_UPDATE }; struct decon_context { @@ -313,6 +314,7 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, /* window enable */ decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, ~0); + set_bit(BIT_REQUEST_UPDATE, &ctx->flags); } static void decon_disable_plane(struct exynos_drm_crtc *crtc, @@ -325,6 +327,7 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, return; decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, 0); + set_bit(BIT_REQUEST_UPDATE, &ctx->flags); } static void decon_atomic_flush(struct exynos_drm_crtc *crtc) @@ -338,8 +341,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); - /* update iff there are active windows */ - if (crtc->base.state->plane_mask) + if (test_and_clear_bit(BIT_REQUEST_UPDATE, &ctx->flags)) decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); if (ctx->out_type & IFTYPE_I80) -- GitLab From ee01c59bf8386609fb87720c7c77e01b00941176 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jan 2017 12:18:56 +0100 Subject: [PATCH 0099/1398] KVM: arm/arm64: Fix occasional warning from the timer work function [ Upstream commit 63e41226afc3f7a044b70325566fa86ac3142538 ] When a VCPU blocks (WFI) and has programmed the vtimer, we program a soft timer to expire in the future to wake up the vcpu thread when appropriate. Because such as wake up involves a vcpu kick, and the timer expire function can get called from interrupt context, and the kick may sleep, we have to schedule the kick in the work function. The work function currently has a warning that gets raised if it turns out that the timer shouldn't fire when it's run, which was added because the idea was that in that case the work should never have been cancelled. However, it turns out that this whole thing is racy and we can get spurious warnings. The problem is that we clear the armed flag in the work function, which may run in parallel with the kvm_timer_unschedule->timer_disarm() call. This results in a possible situation where the timer_disarm() call does not call cancel_work_sync(), which effectively synchronizes the completion of the work function with running the VCPU. As a result, the VCPU thread proceeds before the work function completees, causing changes to the timer state such that kvm_timer_should_fire(vcpu) returns false in the work function. All we do in the work function is to kick the VCPU, and an occasional rare extra kick never harmed anyone. Since the race above is extremely rare, we don't bother checking if the race happens but simply remove the check and the clearing of the armed flag from the work function. Reported-by: Matthias Brugger Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arch_timer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 27a1f6341d41..7b49a1378c90 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -89,9 +89,6 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - - WARN_ON(!kvm_timer_should_fire(vcpu)); /* * If the vcpu is blocked we want to wake it up so that it will see -- GitLab From cda72bbb7bac1b1ab0cd06051e7a223530689002 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 13 Jan 2017 13:32:51 +0100 Subject: [PATCH 0100/1398] mac80211: prevent skb/txq mismatch [ Upstream commit dbef53621116474bb883f76f0ba6b7640bc42332 ] Station structure is considered as not uploaded (to driver) until drv_sta_state() finishes. This call is however done after the structure is attached to mac80211 internal lists and hashes. This means mac80211 can lookup (and use) station structure before it is uploaded to a driver. If this happens (structure exists, but sta->uploaded is false) fast_tx path can still be taken. Deep in the fastpath call the sta->uploaded is checked against to derive "pubsta" argument for ieee80211_get_txq(). If sta->uploaded is false (and sta is actually non-NULL) ieee80211_get_txq() effectively downgraded to vif->txq. At first glance this may look innocent but coerces mac80211 into a state that is almost guaranteed (codel may drop offending skb) to crash because a station-oriented skb gets queued up on vif-oriented txq. The ieee80211_tx_dequeue() ends up looking at info->control.flags and tries to use txq->sta which in the fail case is NULL. It's probably pointless to pretend one can downgrade skb from sta-txq to vif-txq. Since downgrading unicast traffic to vif->txq must not be done there's no txq to put a frame on if sta->uploaded is false. Therefore the code is made to fall back to regular tx() op path if the described condition is hit. Only drivers using wake_tx_queue were affected. Example crash dump before fix: Unable to handle kernel paging request at virtual address ffffe26c PC is at ieee80211_tx_dequeue+0x204/0x690 [mac80211] [] (ieee80211_tx_dequeue [mac80211]) from [] (ath10k_mac_tx_push_txq+0x54/0x1c0 [ath10k_core]) [] (ath10k_mac_tx_push_txq [ath10k_core]) from [] (ath10k_htt_txrx_compl_task+0xd78/0x11d0 [ath10k_core]) [] (ath10k_htt_txrx_compl_task [ath10k_core]) [] (ath10k_pci_napi_poll+0x54/0xe8 [ath10k_pci]) [] (ath10k_pci_napi_poll [ath10k_pci]) from [] (net_rx_action+0xac/0x160) Reported-by: Mohammed Shafi Shajakhan Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 274c564bd9af..1ffd1e145c13 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1244,7 +1244,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, + struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -1258,10 +1258,13 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, if (!ieee80211_is_data(hdr->frame_control)) return NULL; - if (pubsta) { + if (sta) { u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - txq = pubsta->txq[tid]; + if (!sta->uploaded) + return NULL; + + txq = sta->sta.txq[tid]; } else if (vif) { txq = vif->txq; } @@ -1499,23 +1502,17 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; - struct ieee80211_sta *pubsta; if (!local->ops->wake_tx_queue || sdata->vif.type == NL80211_IFTYPE_MONITOR) return false; - if (sta && sta->uploaded) - pubsta = &sta->sta; - else - pubsta = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); vif = &sdata->vif; - txqi = ieee80211_get_txq(local, vif, pubsta, skb); + txqi = ieee80211_get_txq(local, vif, sta, skb); if (!txqi) return false; -- GitLab From 72232a3bc5df33b6908a0590049c97158e2f9b58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Jan 2017 13:31:32 -0500 Subject: [PATCH 0101/1398] NFSv4: Fix client recovery when server reboots multiple times [ Upstream commit c6180a6237174f481dc856ed6e890d8196b6f0fb ] If the server reboots multiple times, the client should rely on the server to tell it that it cannot reclaim state as per section 9.6.3.4 in RFC7530 and section 8.4.2.1 in RFC5661. Currently, the client is being to conservative, and is assuming that if the server reboots while state recovery is in progress, then it must ignore state that was not recovered before the reboot. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4state.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 92671914067f..71deeae6eefd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1718,7 +1718,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) break; case -NFS4ERR_STALE_CLIENTID: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: -- GitLab From a88ff235e8adf50bb50f5243c242f5f82f7549fa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 28 Dec 2016 14:31:03 +0100 Subject: [PATCH 0102/1398] perf/x86/intel: Account interrupts for PEBS errors [ Upstream commit 475113d937adfd150eb82b5e2c5507125a68e7af ] It's possible to set up PEBS events to get only errors and not any data, like on SNB-X (model 45) and IVB-EP (model 62) via 2 perf commands running simultaneously: taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10 This leads to a soft lock up, because the error path of the intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt for error PEBS interrupts, so in case you're getting ONLY errors you don't have a way to stop the event when it's over the max_samples_per_tick limit: NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816] ... RIP: 0010:[] [] smp_call_function_single+0xe2/0x140 ... Call Trace: ? trace_hardirqs_on_caller+0xf5/0x1b0 ? perf_cgroup_attach+0x70/0x70 perf_install_in_context+0x199/0x1b0 ? ctx_resched+0x90/0x90 SYSC_perf_event_open+0x641/0xf90 SyS_perf_event_open+0x9/0x10 do_syscall_64+0x6c/0x1f0 entry_SYSCALL64_slow_path+0x25/0x25 Add perf_event_account_interrupt() which does the interrupt and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s error path. We keep the pending_kill and pending_wakeup logic only in the __perf_event_overflow() path, because they make sense only if there's any data to deliver. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 6 ++++- include/linux/perf_event.h | 1 + kernel/events/core.c | 47 +++++++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index be202390bbd3..9dfeeeca0ea8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; /* log dropped samples number */ - if (error[bit]) + if (error[bit]) { perf_log_lost_samples(event, error[bit]); + if (perf_event_account_interrupt(event)) + x86_pmu_stop(event, 0); + } + if (counts[bit]) { __intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecdb9817..78ed8105e64d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); +extern int perf_event_account_interrupt(struct perf_event *event); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, diff --git a/kernel/events/core.c b/kernel/events/core.c index 36ff2d93f222..13b9784427b0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7088,25 +7088,12 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -/* - * Generic event overflow handling, sampling. - */ - -static int __perf_event_overflow(struct perf_event *event, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) +static int +__perf_event_account_interrupt(struct perf_event *event, int throttle) { - int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; - u64 seq; int ret = 0; - - /* - * Non-sampling counters might still use the PMI to fold short - * hardware counters, ignore those. - */ - if (unlikely(!is_sampling_event(event))) - return 0; + u64 seq; seq = __this_cpu_read(perf_throttled_seq); if (seq != hwc->interrupts_seq) { @@ -7134,6 +7121,34 @@ static int __perf_event_overflow(struct perf_event *event, perf_adjust_period(event, delta, hwc->last_period, true); } + return ret; +} + +int perf_event_account_interrupt(struct perf_event *event) +{ + return __perf_event_account_interrupt(event, 1); +} + +/* + * Generic event overflow handling, sampling. + */ + +static int __perf_event_overflow(struct perf_event *event, + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int events = atomic_read(&event->event_limit); + int ret = 0; + + /* + * Non-sampling counters might still use the PMI to fold short + * hardware counters, ignore those. + */ + if (unlikely(!is_sampling_event(event))) + return 0; + + ret = __perf_event_account_interrupt(event, throttle); + /* * XXX event_limit might not quite work as expected on inherited * events -- GitLab From 57e76dbbc5609c49c8abb2199561e92f54bdd7eb Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Tue, 3 Jan 2017 14:39:51 -0600 Subject: [PATCH 0103/1398] powerpc/mm: Fix memory hotplug BUG() on radix [ Upstream commit 32b53c012e0bfe20b2745962a89db0dc72ef3270 ] Memory hotplug is leading to hash page table calls, even on radix: arch_add_memory create_section_mapping htab_bolt_mapping BUG_ON(!ppc_md.hpte_insert); To fix, refactor {create,remove}_section_mapping() into hash__ and radix__ variants. Leave the radix versions stubbed for now. Reviewed-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Reza Arbab Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++++ arch/powerpc/mm/hash_utils_64.c | 4 ++-- arch/powerpc/mm/pgtable-book3s64.c | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index f61cad3de4e6..4c935f7504f7 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -201,6 +201,10 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, unsigned long phys); extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); + +int hash__create_section_mapping(unsigned long start, unsigned long end); +int hash__remove_section_mapping(unsigned long start, unsigned long end); + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 78dabf065ba9..bd666287c5ed 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -747,7 +747,7 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end) +int hash__create_section_mapping(unsigned long start, unsigned long end) { int rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, @@ -761,7 +761,7 @@ int create_section_mapping(unsigned long start, unsigned long end) return rc; } -int remove_section_mapping(unsigned long start, unsigned long end) +int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f4f437cbabf1..0fad7f6742ff 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -125,3 +125,21 @@ void mmu_cleanup_all(void) else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return -ENODEV; + + return hash__create_section_mapping(start, end); +} + +int remove_section_mapping(unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return -ENODEV; + + return hash__remove_section_mapping(start, end); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ -- GitLab From d8a544c920878780506ee757f11fc2076c0ba724 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 23 Dec 2016 18:06:05 -0800 Subject: [PATCH 0104/1398] qla2xxx: Fix wrong IOCB type assumption [ Upstream commit bb1181c9a8b46b6f10e749d9ed94480336445d7f ] qlt_reset is called with Immedidate Notify IOCB only. Current code wrongly cast it as ATIO IOCB. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 91f5f55a8a9b..59059ffbb98c 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -668,11 +668,9 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) { struct qla_hw_data *ha = vha->hw; struct qla_tgt_sess *sess = NULL; - uint32_t unpacked_lun, lun = 0; uint16_t loop_id; int res = 0; struct imm_ntfy_from_isp *n = (struct imm_ntfy_from_isp *)iocb; - struct atio_from_isp *a = (struct atio_from_isp *)iocb; unsigned long flags; loop_id = le16_to_cpu(n->u.isp24.nport_handle); @@ -725,11 +723,7 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) "loop_id %d)\n", vha->host_no, sess, sess->port_name, mcmd, loop_id); - lun = a->u.isp24.fcp_cmnd.lun; - unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); - - return qlt_issue_task_mgmt(sess, unpacked_lun, mcmd, - iocb, QLA24XX_MGMT_SEND_NACK); + return qlt_issue_task_mgmt(sess, 0, mcmd, iocb, QLA24XX_MGMT_SEND_NACK); } /* ha->tgt.sess_lock supposed to be held on entry */ -- GitLab From 4ee340c06bf2b271641cf08792c5173f55736184 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Jan 2017 20:00:40 +0800 Subject: [PATCH 0105/1398] drm/amdgpu: fix bug set incorrect value to vce register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e05208ded1905e500cd5b369d624b071951c68b9 ] Set the proper bits for clockgating setup. Signed-off-by: Rex Zhu Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 50f0cf2788b7..7522f796f19b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -182,7 +182,7 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, WREG32(mmVCE_UENC_CLOCK_GATING_2, data); data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); - data &= ~0xffc00000; + data &= ~0x3ff; WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); -- GitLab From 2001ccb4dea45907e11b6e53888777d7d202beb9 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 17 Jan 2017 15:15:20 +0100 Subject: [PATCH 0106/1398] drm/exynos/decon5433: set STANDALONE_UPDATE_F on output enablement [ Upstream commit 11d8bcef7a0399e1d2519f207fd575fc404306b4 ] DECON_TV requires STANDALONE_UPDATE after output enabling, otherwise it does not start. This change is neutral for DECON. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 09e8cc36948e..6dd09c306bc1 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -189,6 +189,8 @@ static void decon_commit(struct exynos_drm_crtc *crtc) /* enable output and display signal */ decon_set_bits(ctx, DECON_VIDCON0, VIDCON0_ENVID | VIDCON0_ENVID_F, ~0); + + decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); } static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, -- GitLab From 05071c058d20c4b5e0b670c5c4fd6976e0547413 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 20 Jan 2017 13:01:57 +0000 Subject: [PATCH 0107/1398] net: sctp: fix array overrun read on sctp_timer_tbl [ Upstream commit 0e73fc9a56f22f2eec4d2b2910c649f7af67b74d ] The comparison on the timeout can lead to an array overrun read on sctp_timer_tbl because of an off-by-one error. Fix this by using < instead of <= and also compare to the array size rather than SCTP_EVENT_TIMEOUT_MAX. Fixes CoverityScan CID#1397639 ("Out-of-bounds read") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 95d7b15dad21..e371a0d90068 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -166,7 +166,7 @@ static const char *const sctp_timer_tbl[] = { /* Lookup timer debug name. */ const char *sctp_tname(const sctp_subtype_t id) { - if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX) + if (id.timeout < ARRAY_SIZE(sctp_timer_tbl)) return sctp_timer_tbl[id.timeout]; return "unknown_timer"; } -- GitLab From b75e47cf84390b8be2d93a08977270166831a2b3 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 22 Jan 2017 16:50:23 +0800 Subject: [PATCH 0108/1398] x86/fpu: Set the xcomp_bv when we fake up a XSAVES area [ Upstream commit 4c833368f0bf748d4147bf301b1f95bc8eccb3c0 ] I got the following calltrace on a Apollo Lake SoC with 32-bit kernel: WARNING: CPU: 2 PID: 261 at arch/x86/include/asm/fpu/internal.h:363 fpu__restore+0x1f5/0x260 [...] Hardware name: Intel Corp. Broxton P/NOTEBOOK, BIOS APLIRVPA.X64.0138.B35.1608091058 08/09/2016 Call Trace: dump_stack() __warn() ? fpu__restore() warn_slowpath_null() fpu__restore() __fpu__restore_sig() fpu__restore_sig() restore_sigcontext.isra.9() sys_sigreturn() do_int80_syscall_32() entry_INT80_32() The reason is that a #GP occurs when executing XRSTORS. The root cause is that we forget to set the xcomp_bv when we fake up the XSAVES area in the copyin_to_xsaves() function. Signed-off-by: Kevin Hao Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/1485075023-30161-1-git-send-email-haokexin@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/xstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 095ef7ddd6ae..abfbb61b18b8 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1077,6 +1077,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= xfeatures; + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; return 0; } -- GitLab From cab00a9c16c07045bdaaf9e6b4e92de81790f386 Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Thu, 19 Jan 2017 09:57:41 +0800 Subject: [PATCH 0109/1398] drm/amdgpu: fix unload driver issue for virtual display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3a1d19a29670aa7eb58576a31883d0aa9fb77549 ] Virtual display doesn't allocate amdgpu_encoder when initializing, so will get invaild pointer if try to free amdgpu_encoder when unloading driver. Signed-off-by: Xiangliang Yu Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_virtual.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index c2bd9f045532..6d75fd0e3105 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -565,11 +565,8 @@ static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = static void dce_virtual_encoder_destroy(struct drm_encoder *encoder) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - - kfree(amdgpu_encoder->enc_priv); drm_encoder_cleanup(encoder); - kfree(amdgpu_encoder); + kfree(encoder); } static const struct drm_encoder_funcs dce_virtual_encoder_funcs = { -- GitLab From dee48e57ae22d8dedb37eda71e029e731034b0e6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jan 2017 09:29:09 +0100 Subject: [PATCH 0110/1398] mac80211: don't try to sleep in rate_control_rate_init() [ Upstream commit 115865fa0826ed18ca04717cf72d0fe874c0fe7f ] In my previous patch, I missed that rate_control_rate_init() is called from some places that cannot sleep, so it cannot call ieee80211_recalc_min_chandef(). Remove that call for now to fix the context bug, we'll have to find a different way to fix the minimum channel width issue. Fixes: 96aa2e7cf126 ("mac80211: calculate min channel width correctly") Reported-by: Xiaolong Ye (via lkp-robot) Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 9e2641d45587..206698bc93f4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -40,8 +40,6 @@ void rate_control_rate_init(struct sta_info *sta) ieee80211_sta_set_rx_nss(sta); - ieee80211_recalc_min_chandef(sta->sdata); - if (!ref) return; -- GitLab From 8d4198f8c66bff208cf5c6191ce8d9ad71e2c048 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:50:34 +0200 Subject: [PATCH 0111/1398] RDMA/qedr: Return success when not changing QP state [ Upstream commit 865cea40b69741c3da2574176876463233b2b67c ] If the user is requesting us to change the QP state to the same state that it is already in, return success instead of failure. Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4ba019e3dc56..35d5b89decb4 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1653,7 +1653,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, int status = 0; if (new_state == qp->state) - return 1; + return 0; switch (qp->state) { case QED_ROCE_QP_STATE_RESET: -- GitLab From e8240244880b1a88ff9e0baf1fda8eccf6b7590b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:51:41 +0200 Subject: [PATCH 0112/1398] RDMA/qedr: Fix RDMA CM loopback [ Upstream commit af2b14b8b8ae21b0047a52c767ac8b44f435a280 ] The loopback logic in RDMA CM packets compares Ethernet addresses and was accidently inverse. Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qedr/qedr_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 63890ebb72bd..eccf7039aaca 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -404,9 +404,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev, } if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h)) - packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; - else packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB; + else + packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; packet->roce_mode = roce_mode; memcpy(packet->header.vaddr, ud_header_buffer, header_size); -- GitLab From 6313adb8de021b841184115ca0e232cb0d533634 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:46 +0100 Subject: [PATCH 0113/1398] tipc: fix nametbl_lock soft lockup at module exit [ Upstream commit 9dc3abdd1f7ea524e8552e0a3ef01219892ed1f4 ] Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/tipc/server.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index f89c0c2e8c16..04ff441b8065 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- GitLab From aad54ba21dd8742810f195527e62f99ee0a1cee1 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:48 +0100 Subject: [PATCH 0114/1398] tipc: fix cleanup at module unload [ Upstream commit 35e22e49a5d6a741ebe7f2dd280b2052c3003ef7 ] In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Acked-by: Ying Xue Acked-by: Jon Maloy Tested-by: John Thompson Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 04ff441b8065..3cd6402e812c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -619,14 +619,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(&s->idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(&s->conn_idr, id); if (con) { - total++; spin_unlock_bh(&s->idr_lock); tipc_close_conn(con); spin_lock_bh(&s->idr_lock); -- GitLab From 49c3164e6d84ce32121d18f060bc05825001df8e Mon Sep 17 00:00:00 2001 From: Iago Abal Date: Wed, 11 Jan 2017 14:00:21 +0100 Subject: [PATCH 0115/1398] dmaengine: pl330: fix double lock [ Upstream commit 91539eb1fda2d530d3b268eef542c5414e54bf1a ] The static bug finder EBA (http://www.iagoabal.eu/eba/) reported the following double-lock bug: Double lock: 1. spin_lock_irqsave(pch->lock, flags) at pl330_free_chan_resources:2236; 2. call to function `pl330_release_channel' immediately after; 3. call to function `dma_pl330_rqcb' in line 1753; 4. spin_lock_irqsave(pch->lock, flags) at dma_pl330_rqcb:1505. I have fixed it as suggested by Marek Szyprowski. First, I have replaced `pch->lock' with `pl330->lock' in functions `pl330_alloc_chan_resources' and `pl330_free_chan_resources'. This avoids the double-lock by acquiring a different lock than `dma_pl330_rqcb'. NOTE that, as a result, `pl330_free_chan_resources' executes `list_splice_tail_init' on `pch->work_list' under lock `pl330->lock', whereas in the rest of the code `pch->work_list' is protected by `pch->lock'. I don't know if this may cause race conditions. Similarly `pch->cyclic' is written by `pl330_alloc_chan_resources' under `pl330->lock' but read by `pl330_tx_submit' under `pch->lock'. Second, I have removed locking from `pl330_request_channel' and `pl330_release_channel' functions. Function `pl330_request_channel' is only called from `pl330_alloc_chan_resources', so the lock is already held. Function `pl330_release_channel' is called from `pl330_free_chan_resources', which already holds the lock, and from `pl330_del'. Function `pl330_del' is called in an error path of `pl330_probe' and at the end of `pl330_remove', but I assume that there cannot be concurrent accesses to the protected data at those points. Signed-off-by: Iago Abal Reviewed-by: Marek Szyprowski Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 9f3dbc8c63d2..fb2e7476d96b 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1694,7 +1694,6 @@ static bool _chan_ns(const struct pl330_dmac *pl330, int i) static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) { struct pl330_thread *thrd = NULL; - unsigned long flags; int chans, i; if (pl330->state == DYING) @@ -1702,8 +1701,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) chans = pl330->pcfg.num_chan; - spin_lock_irqsave(&pl330->lock, flags); - for (i = 0; i < chans; i++) { thrd = &pl330->channels[i]; if ((thrd->free) && (!_manager_ns(thrd) || @@ -1721,8 +1718,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) thrd = NULL; } - spin_unlock_irqrestore(&pl330->lock, flags); - return thrd; } @@ -1740,7 +1735,6 @@ static inline void _free_event(struct pl330_thread *thrd, int ev) static void pl330_release_channel(struct pl330_thread *thrd) { struct pl330_dmac *pl330; - unsigned long flags; if (!thrd || thrd->free) return; @@ -1752,10 +1746,8 @@ static void pl330_release_channel(struct pl330_thread *thrd) pl330 = thrd->dmac; - spin_lock_irqsave(&pl330->lock, flags); _free_event(thrd, thrd->ev); thrd->free = true; - spin_unlock_irqrestore(&pl330->lock, flags); } /* Initialize the structure for PL330 configuration, that can be used @@ -2120,20 +2112,20 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; - spin_lock_irqsave(&pch->lock, flags); + spin_lock_irqsave(&pl330->lock, flags); dma_cookie_init(chan); pch->cyclic = false; pch->thread = pl330_request_channel(pl330); if (!pch->thread) { - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); return -ENOMEM; } tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch); - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); return 1; } @@ -2236,12 +2228,13 @@ static int pl330_pause(struct dma_chan *chan) static void pl330_free_chan_resources(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; tasklet_kill(&pch->task); pm_runtime_get_sync(pch->dmac->ddma.dev); - spin_lock_irqsave(&pch->lock, flags); + spin_lock_irqsave(&pl330->lock, flags); pl330_release_channel(pch->thread); pch->thread = NULL; @@ -2249,7 +2242,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan) if (pch->cyclic) list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); pm_runtime_mark_last_busy(pch->dmac->ddma.dev); pm_runtime_put_autosuspend(pch->dmac->ddma.dev); } -- GitLab From fe8bdc9b79ee644177715a9f72a7b332dd881847 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 24 Jan 2017 21:49:41 -0500 Subject: [PATCH 0116/1398] tcp: correct memory barrier usage in tcp_check_space() [ Upstream commit 56d806222ace4c3aeae516cd7a855340fb2839d8 ] sock_reset_flag() maps to __clear_bit() not the atomic version clear_bit(). Thus, we need smp_mb(), smp_mb__after_atomic() is not sufficient. Fixes: 3c7151275c0c ("tcp: add memory barriers to write space paths") Cc: Eric Dumazet Cc: Oleg Nesterov Signed-off-by: Jason Baron Acked-by: Eric Dumazet Reported-by: Oleg Nesterov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8fcd0c642742..05255a286888 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5081,7 +5081,7 @@ static void tcp_check_space(struct sock *sk) if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ - smp_mb__after_atomic(); + smp_mb(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) tcp_new_space(sk); -- GitLab From 6bd89953fb1f8e45dc9f93ab59f50c950ef44187 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Mon, 16 Jan 2017 15:49:38 +0100 Subject: [PATCH 0117/1398] i2c: i2c-cadence: Initialize configuration before probing devices [ Upstream commit 0e1929dedea36781e25902118c93edd8d8f09af1 ] The cadence I2C driver calls cdns_i2c_writereg(..) to setup a workaround in the controller, but did so after calling i2c_add_adapter() which starts probing devices on the bus. Change the order so that the configuration is completely finished before using the adapter. Signed-off-by: Mike Looijmans Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-cadence.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 686971263bef..45d6771fac8c 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -962,10 +962,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) goto err_clk_dis; } - ret = i2c_add_adapter(&id->adap); - if (ret < 0) - goto err_clk_dis; - /* * Cadence I2C controller has a bug wherein it generates * invalid read transaction after HW timeout in master receiver mode. @@ -975,6 +971,10 @@ static int cdns_i2c_probe(struct platform_device *pdev) */ cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET); + ret = i2c_add_adapter(&id->adap); + if (ret < 0) + goto err_clk_dis; + dev_info(&pdev->dev, "%u kHz mmio %08lx irq %d\n", id->i2c_clk / 1000, (unsigned long)r_mem->start, id->irq); -- GitLab From 3976dd677e891c0b2c63d08028d445663539472c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 1 Jan 2017 13:41:56 +0200 Subject: [PATCH 0118/1398] nvmet: cancel fatal error and flush async work before free controller [ Upstream commit 06406d81a2d7cfb8abcc4fa6cdfeb8e5897007c5 ] Make sure they are not running and we can free the controller safely. Signed-off-by: Roy Shterman Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 55ce769cecee..fbd6d487103f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -816,6 +816,9 @@ static void nvmet_ctrl_free(struct kref *ref) list_del(&ctrl->subsys_entry); mutex_unlock(&subsys->lock); + flush_work(&ctrl->async_event_work); + cancel_work_sync(&ctrl->fatal_err_work); + ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid); nvmet_subsys_put(subsys); -- GitLab From d812be8288325ea28297e6bbbd8afb24b7d80f65 Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Fri, 27 Jan 2017 10:40:57 +0100 Subject: [PATCH 0119/1398] gtp: clear DF bit on GTP packet tx [ Upstream commit c6ce1d08eede4c2968ed08aafa3165e8e183c5a1 ] 3GPP TS 29.281 and 3GPP TS 29.060 imply that GTP-U packets should be sent with the DF bit cleared. For example 3GPP TS 29.060, Release 8, Section 13.2.2: > Backbone router: Any router in the backbone may fragment the GTP > packet if needed, according to IPv4. Signed-off-by: Andreas Schultz Acked-by: Harald Welte Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index cebde074d196..5b67f8a8b79a 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -612,7 +612,7 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) pktinfo.fl4.saddr, pktinfo.fl4.daddr, pktinfo.iph->tos, ip4_dst_hoplimit(&pktinfo.rt->dst), - htons(IP_DF), + 0, pktinfo.gtph_port, pktinfo.gtph_port, true, false); break; -- GitLab From c8d6f83d3b6e2a16d542d44e757c67a4bbb35a28 Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Fri, 27 Jan 2017 10:40:58 +0100 Subject: [PATCH 0120/1398] gtp: fix cross netns recv on gtp socket [ Upstream commit 3ab1b469e847ba425af3c5ad5068cc94b55b38d0 ] The use of the passed through netlink src_net to check for a cross netns operation was wrong. Using the GTP socket and the GTP netdevice is always correct (even if the netdev has been moved to new netns after link creation). Remove the now obsolete net field from gtp_dev. Signed-off-by: Andreas Schultz Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5b67f8a8b79a..cb206e5526c4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -69,7 +69,6 @@ struct gtp_dev { struct socket *sock0; struct socket *sock1u; - struct net *net; struct net_device *dev; unsigned int hash_size; @@ -316,7 +315,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); - xnet = !net_eq(gtp->net, dev_net(gtp->dev)); + xnet = !net_eq(sock_net(sk), dev_net(gtp->dev)); switch (udp_sk(sk)->encap_type) { case UDP_ENCAP_GTP0: @@ -658,7 +657,7 @@ static void gtp_link_setup(struct net_device *dev) static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); static void gtp_hashtable_free(struct gtp_dev *gtp); static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, - int fd_gtp0, int fd_gtp1, struct net *src_net); + int fd_gtp0, int fd_gtp1); static int gtp_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) @@ -675,7 +674,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, fd0 = nla_get_u32(data[IFLA_GTP_FD0]); fd1 = nla_get_u32(data[IFLA_GTP_FD1]); - err = gtp_encap_enable(dev, gtp, fd0, fd1, src_net); + err = gtp_encap_enable(dev, gtp, fd0, fd1); if (err < 0) goto out_err; @@ -821,7 +820,7 @@ static void gtp_hashtable_free(struct gtp_dev *gtp) } static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, - int fd_gtp0, int fd_gtp1, struct net *src_net) + int fd_gtp0, int fd_gtp1) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct socket *sock0, *sock1u; @@ -858,7 +857,6 @@ static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, gtp->sock0 = sock0; gtp->sock1u = sock1u; - gtp->net = src_net; tuncfg.sk_user_data = gtp; tuncfg.encap_rcv = gtp_encap_recv; -- GitLab From 3325615d2b500b9c84f908adbdb843a31acc3126 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 27 Jan 2017 21:39:03 +0100 Subject: [PATCH 0121/1398] net: phy: micrel: KSZ8795 do not set SUPPORTED_[Asym_]Pause [ Upstream commit cf626c3b252b2c9d131be0dd66096ec3bf729e54 ] As pr commit "net: phy: phy drivers should not set SUPPORTED_[Asym_]Pause" this phy driver should not set these feature bits. Signed-off-by: Sean Nyekjaer Fixes: 9d162ed69f51 ("net: phy: micrel: add support for KSZ8795") Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 222918828655..fbf5945ce00d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1020,7 +1020,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ8795, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8795", - .features = (SUPPORTED_Pause | SUPPORTED_Asym_Pause), + .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, -- GitLab From 6a9ffb5a87d874b038a35e5687a502e5afe6dad4 Mon Sep 17 00:00:00 2001 From: Vincent Date: Mon, 30 Jan 2017 15:06:43 +0100 Subject: [PATCH 0122/1398] net: thunderx: avoid dereferencing xcv when NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c73e44269369e936165f0f9b61f1f09a11dae01c ] This fixes the following smatch and coccinelle warnings: drivers/net/ethernet/cavium/thunder/thunder_xcv.c:119 xcv_setup_link() error: we previously assumed 'xcv' could be null (see line 118) [smatch] drivers/net/ethernet/cavium/thunder/thunder_xcv.c:119:16-20: ERROR: xcv is NULL but dereferenced. [coccinelle] Fixes: 6465859aba1e66a5 ("net: thunderx: Add RGMII interface type support") Signed-off-by: Vincent Stehlé Cc: Sunil Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/thunder_xcv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c index 67befedef709..578c7f8f11bf 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c @@ -116,8 +116,7 @@ void xcv_setup_link(bool link_up, int link_speed) int speed = 2; if (!xcv) { - dev_err(&xcv->pdev->dev, - "XCV init not done, probe may have failed\n"); + pr_err("XCV init not done, probe may have failed\n"); return; } -- GitLab From 6129fd98822520dccf60389a4e2ef5d0148aad78 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 31 Jan 2017 20:01:31 +0100 Subject: [PATCH 0123/1398] be2net: fix initial MAC setting [ Upstream commit 4993b39ab04b083ff6ee1147e7e7f120feb6bf7f ] Recent commit 34393529163a ("be2net: fix MAC addr setting on privileged BE3 VFs") allows privileged BE3 VFs to set its MAC address during initialization. Although the initial MAC for such VFs is already programmed by parent PF the subsequent setting performed by VF is OK, but in certain cases (after fresh boot) this command in VF can fail. The MAC should be initialized only when: 1) no MAC is programmed (always except BE3 VFs during first init) 2) programmed MAC is different from requested (e.g. MAC is set when interface is down). In this case the initial MAC programmed by PF needs to be deleted. The adapter->dev_mac contains MAC address currently programmed in HW so it should be zeroed when the MAC is deleted from HW and should not be filled when MAC is set when interface is down in be_mac_addr_set() as no programming is performed in this case. Example of failure without the fix (immediately after fresh boot): # ip link set eth0 up <- eth0 is BE3 PF be2net 0000:01:00.0 eth0: Link is Up # echo 1 > /sys/class/net/eth0/device/sriov_numvfs <- Create 1 VF ... be2net 0000:01:04.0: Emulex OneConnect(be3): VF port 0 # ip link set eth8 up <- eth8 is created privileged VF be2net 0000:01:04.0: opcode 59-1 failed:status 1-76 RTNETLINK answers: Input/output error # echo 0 > /sys/class/net/eth0/device/sriov_numvfs <- Delete VF iommu: Removing device 0000:01:04.0 from group 33 ... # echo 1 > /sys/class/net/eth0/device/sriov_numvfs <- Create it again iommu: Removing device 0000:01:04.0 from group 33 ... # ip link set eth8 up be2net 0000:01:04.0 eth8: Link is Up Initialization is now OK. v2 - Corrected the comment and condition check suggested by Suresh & Harsha Fixes: 34393529163a ("be2net: fix MAC addr setting on privileged BE3 VFs") Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: Ivan Vecera Acked-by: Sriharsha Basavapatna Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 33 +++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index fbb396b08495..1644896568c4 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -362,8 +362,10 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) status = -EPERM; goto err; } -done: + + /* Remember currently programmed MAC */ ether_addr_copy(adapter->dev_mac, addr->sa_data); +done: ether_addr_copy(netdev->dev_addr, addr->sa_data); dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; @@ -3635,8 +3637,10 @@ static void be_disable_if_filters(struct be_adapter *adapter) { /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */ if (!BEx_chip(adapter) || !be_virtfn(adapter) || - check_privilege(adapter, BE_PRIV_FILTMGMT)) + check_privilege(adapter, BE_PRIV_FILTMGMT)) { be_dev_mac_del(adapter, adapter->pmac_id[0]); + eth_zero_addr(adapter->dev_mac); + } be_clear_uc_list(adapter); be_clear_mc_list(adapter); @@ -3790,12 +3794,27 @@ static int be_enable_if_filters(struct be_adapter *adapter) if (status) return status; - /* Don't add MAC on BE3 VFs without FILTMGMT privilege */ - if (!BEx_chip(adapter) || !be_virtfn(adapter) || - check_privilege(adapter, BE_PRIV_FILTMGMT)) { + /* Normally this condition usually true as the ->dev_mac is zeroed. + * But on BE3 VFs the initial MAC is pre-programmed by PF and + * subsequent be_dev_mac_add() can fail (after fresh boot) + */ + if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) { + int old_pmac_id = -1; + + /* Remember old programmed MAC if any - can happen on BE3 VF */ + if (!is_zero_ether_addr(adapter->dev_mac)) + old_pmac_id = adapter->pmac_id[0]; + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; + + /* Delete the old programmed MAC as we successfully programmed + * a new MAC + */ + if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0]) + be_dev_mac_del(adapter, old_pmac_id); + ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr); } @@ -4569,6 +4588,10 @@ static int be_mac_setup(struct be_adapter *adapter) memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN); memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN); + + /* Initial MAC for BE3 VFs is already programmed by PF */ + if (BEx_chip(adapter) && be_virtfn(adapter)) + memcpy(adapter->dev_mac, mac, ETH_ALEN); } return 0; -- GitLab From 3e550debcf7557ca966388d5462519063aa75524 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 1 Feb 2017 14:26:16 +1100 Subject: [PATCH 0124/1398] vfio/spapr: Fix missing mutex unlock when creating a window [ Upstream commit 2da64d20a0b20046d688e44f4033efd09157e29d ] Commit d9c728949ddc ("vfio/spapr: Postpone default window creation") added an additional exit to the VFIO_IOMMU_SPAPR_TCE_CREATE case and made it possible to return from tce_iommu_ioctl() without unlocking container->lock; this fixes the issue. Fixes: d9c728949ddc ("vfio/spapr: Postpone default window creation") Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 85d3e648bdea..59b3f62a2d64 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1123,12 +1123,11 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_lock(&container->lock); ret = tce_iommu_create_default_window(container); - if (ret) - return ret; - - ret = tce_iommu_create_window(container, create.page_shift, - create.window_size, create.levels, - &create.start_addr); + if (!ret) + ret = tce_iommu_create_window(container, + create.page_shift, + create.window_size, create.levels, + &create.start_addr); mutex_unlock(&container->lock); -- GitLab From c93c09a057b71fa152de9d56f971fd82248d6499 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Feb 2017 14:30:53 -0800 Subject: [PATCH 0125/1398] mm: avoid returning VM_FAULT_RETRY from ->page_mkwrite handlers [ Upstream commit 0911d0041c22922228ca52a977d7b0b0159fee4b ] Some ->page_mkwrite handlers may return VM_FAULT_RETRY as its return code (GFS2 or Lustre can definitely do this). However VM_FAULT_RETRY from ->page_mkwrite is completely unhandled by the mm code and results in locking and writeably mapping the page which definitely is not what the caller wanted. Fix Lustre and block_page_mkwrite_ret() used by other filesystems (notably GFS2) to return VM_FAULT_NOPAGE instead which results in bailing out from the fault code, the CPU then retries the access, and we fault again effectively doing what the handler wanted. Link: http://lkml.kernel.org/r/20170203150729.15863-1-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Al Viro Reviewed-by: Jinshan Xiong Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/llite/llite_mmap.c | 4 +--- include/linux/buffer_head.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index 436691814a5e..27333d973bcd 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -401,15 +401,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = VM_FAULT_LOCKED; break; case -ENODATA: + case -EAGAIN: case -EFAULT: result = VM_FAULT_NOPAGE; break; case -ENOMEM: result = VM_FAULT_OOM; break; - case -EAGAIN: - result = VM_FAULT_RETRY; - break; default: result = VM_FAULT_SIGBUS; break; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 447a915db25d..4431ea2c8802 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -239,12 +239,10 @@ static inline int block_page_mkwrite_return(int err) { if (err == 0) return VM_FAULT_LOCKED; - if (err == -EFAULT) + if (err == -EFAULT || err == -EAGAIN) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; - if (err == -EAGAIN) - return VM_FAULT_RETRY; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } -- GitLab From 017a499be5746df215a664775b14e80c087bc941 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 8 Feb 2017 10:57:37 +0000 Subject: [PATCH 0126/1398] xen-netfront: Improve error handling during initialization [ Upstream commit e2e004acc7cbe3c531e752a270a74e95cde3ea48 ] This fixes a crash when running out of grant refs when creating many queues across many netdevs. * If creating queues fails (i.e. there are no grant refs available), call xenbus_dev_fatal() to ensure that the xenbus device is set to the closed state. * If no queues are created, don't call xennet_disconnect_backend as netdev->real_num_tx_queues will not have been set correctly. * If setup_netfront() fails, ensure that all the queues created are cleaned up, not just those that have been set up. * If any queues were set up and an error occurs, call xennet_destroy_queues() to clean up the napi context. * If any fatal error occurs, unregister and destroy the netdev to avoid leaving around a half setup network device. Signed-off-by: Ross Lagerwall Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index cd442e46afb4..265e2095f8d9 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1854,27 +1854,19 @@ static int talk_to_netback(struct xenbus_device *dev, xennet_destroy_queues(info); err = xennet_create_queues(info, &num_queues); - if (err < 0) - goto destroy_ring; + if (err < 0) { + xenbus_dev_fatal(dev, err, "creating queues"); + kfree(info->queues); + info->queues = NULL; + goto out; + } /* Create shared ring, alloc event channel -- for each queue */ for (i = 0; i < num_queues; ++i) { queue = &info->queues[i]; err = setup_netfront(dev, queue, feature_split_evtchn); - if (err) { - /* setup_netfront() will tidy up the current - * queue on error, but we need to clean up - * those already allocated. - */ - if (i > 0) { - rtnl_lock(); - netif_set_real_num_tx_queues(info->netdev, i); - rtnl_unlock(); - goto destroy_ring; - } else { - goto out; - } - } + if (err) + goto destroy_ring; } again: @@ -1964,9 +1956,10 @@ static int talk_to_netback(struct xenbus_device *dev, xenbus_transaction_end(xbt, 1); destroy_ring: xennet_disconnect_backend(info); - kfree(info->queues); - info->queues = NULL; + xennet_destroy_queues(info); out: + unregister_netdev(info->netdev); + xennet_free_netdev(info->netdev); return err; } -- GitLab From 959b8e0ba293fe0fddf6bbd5c9c874b7ab1feeb8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 11 Feb 2017 09:24:46 -0200 Subject: [PATCH 0127/1398] cec: initiator should be the same as the destination for, poll [ Upstream commit 42980da2eb7eb9695d8efc0c0ef145cbbb993b2c ] Poll messages that are used to allocate a logical address should use the same initiator as the destination. Instead, it expected that the initiator was 0xf which is not according to the standard. This also had consequences for the message checks in cec_transmit_msg_fh that incorrectly rejected poll messages with the same initiator and destination. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/cec/cec-adap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 499d7bfe7147..75e6d5e0504f 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -608,8 +608,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, } memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len); if (msg->len == 1) { - if (cec_msg_initiator(msg) != 0xf || - cec_msg_destination(msg) == 0xf) { + if (cec_msg_destination(msg) == 0xf) { dprintk(1, "cec_transmit_msg: invalid poll message\n"); return -EINVAL; } @@ -634,7 +633,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, dprintk(1, "cec_transmit_msg: destination is the adapter itself\n"); return -EINVAL; } - if (cec_msg_initiator(msg) != 0xf && + if (msg->len > 1 && adap->is_configured && !cec_has_log_addr(adap, cec_msg_initiator(msg))) { dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n", cec_msg_initiator(msg)); @@ -883,7 +882,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, /* Send poll message */ msg.len = 1; - msg.msg[0] = 0xf0 | log_addr; + msg.msg[0] = (log_addr << 4) | log_addr; err = cec_transmit_msg_fh(adap, &msg, NULL, true); /* -- GitLab From a767c866735c0f23e53aa2a4153c7db76988d230 Mon Sep 17 00:00:00 2001 From: Mart van Santen Date: Fri, 10 Feb 2017 12:02:18 +0000 Subject: [PATCH 0128/1398] xen-netback: vif counters from int/long to u64 [ Upstream commit ebf692f85ff78092cd238166d8d7ec51419f9c02 ] This patch fixes an issue where the type of counters in the queue(s) and interface are not in sync (queue counters are int, interface counters are long), causing incorrect reporting of tx/rx values of the vif interface and unclear counter overflows. This patch sets both counters to the u64 type. Signed-off-by: Mart van Santen Reviewed-by: Paul Durrant Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/common.h | 8 ++++---- drivers/net/xen-netback/interface.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index cb7365bdf6e0..5b1d2e8402d9 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -113,10 +113,10 @@ struct xenvif_stats { * A subset of struct net_device_stats that contains only the * fields that are updated in netback.c for each queue. */ - unsigned int rx_bytes; - unsigned int rx_packets; - unsigned int tx_bytes; - unsigned int tx_packets; + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; /* Additional stats used by xenvif */ unsigned long rx_gso_checksum_fixup; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 5bfaf5578810..618013e7f87b 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -225,10 +225,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; - unsigned long rx_bytes = 0; - unsigned long rx_packets = 0; - unsigned long tx_bytes = 0; - unsigned long tx_packets = 0; + u64 rx_bytes = 0; + u64 rx_packets = 0; + u64 tx_bytes = 0; + u64 tx_packets = 0; unsigned int index; spin_lock(&vif->lock); -- GitLab From c13a05a8017b01cae73edf90098f1702a5aa1c77 Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Mon, 13 Feb 2017 10:01:25 +0800 Subject: [PATCH 0129/1398] net: fec: fix multicast filtering hardware setup [ Upstream commit 01f8902bcf3ff124d0aeb88a774180ebcec20ace ] Fix hardware setup of multicast address hash: - Never clear the hardware hash (to avoid packet loss) - Construct the hash register values in software and then write once to hardware Signed-off-by: Rui Sousa Signed-off-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 12aef1b15356..849b8712ec81 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2923,6 +2923,7 @@ static void set_multicast_list(struct net_device *ndev) struct netdev_hw_addr *ha; unsigned int i, bit, data, crc, tmp; unsigned char hash; + unsigned int hash_high = 0, hash_low = 0; if (ndev->flags & IFF_PROMISC) { tmp = readl(fep->hwp + FEC_R_CNTRL); @@ -2945,11 +2946,7 @@ static void set_multicast_list(struct net_device *ndev) return; } - /* Clear filter and add the addresses in hash register - */ - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - + /* Add the addresses in hash register */ netdev_for_each_mc_addr(ha, ndev) { /* calculate crc32 value of mac address */ crc = 0xffffffff; @@ -2967,16 +2964,14 @@ static void set_multicast_list(struct net_device *ndev) */ hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; - if (hash > 31) { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - tmp |= 1 << (hash - 32); - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - } else { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW); - tmp |= 1 << hash; - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - } + if (hash > 31) + hash_high |= 1 << (hash - 32); + else + hash_low |= 1 << hash; } + + writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW); } /* Set a MAC change in hardware. */ -- GitLab From b53525eaac5590d06785f5fbdd8265b73ecbc911 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 13:59:24 +0100 Subject: [PATCH 0130/1398] dma-buf/dma-fence: Extract __dma_fence_is_later() commit 8111477663813caa1a4469cfe6afaae36cd04513 upstream. Often we have the task of comparing two seqno known to be on the same context, so provide a common __dma_fence_is_later(). Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629125930.821-1-chris@chris-wilson.co.uk [renamed to __fence_is_later() - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- include/linux/fence.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/fence.h b/include/linux/fence.h index 0d763053f97a..fcb69f99d717 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -280,6 +280,19 @@ fence_is_signaled(struct fence *fence) return false; } +/** + * __fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence's seqno + * @f2: [in] the second fence's seqno from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not common across contexts. + */ +static inline bool __fence_is_later(u32 f1, u32 f2) +{ + return (int)(f1 - f2) > 0; +} + /** * fence_is_later - return if f1 is chronologically later than f2 * @f1: [in] the first fence from the same context @@ -293,7 +306,7 @@ static inline bool fence_is_later(struct fence *f1, struct fence *f2) if (WARN_ON(f1->context != f2->context)) return false; - return (int)(f1->seqno - f2->seqno) > 0; + return __fence_is_later(f1->seqno, f2->seqno); } /** -- GitLab From fc839ecb8edae9ec6fc757e6874f14ffecda7b3d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 13:59:25 +0100 Subject: [PATCH 0131/1398] dma-buf/sw-sync: Fix the is-signaled test to handle u32 wraparound commit 61894b02716f122dd7662d5d89f5b2245ca551e2 upstream. Use the canonical __dma_fence_is_later() to compare the fence seqno against the timeline seqno to check if the fence is signaled. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629125930.821-2-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 62e8e6dc7953..77ede2a0fd5a 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -219,7 +219,7 @@ static bool timeline_fence_signaled(struct fence *fence) { struct sync_timeline *parent = fence_parent(fence); - return (fence->seqno > parent->value) ? false : true; + return !__fence_is_later(fence->seqno, parent->value); } static bool timeline_fence_enable_signaling(struct fence *fence) -- GitLab From 985b5b238436f015e4368d82c032d3ce085efe17 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 13:59:26 +0100 Subject: [PATCH 0132/1398] dma-buf/sw-sync: Prevent user overflow on timeline advance commit 8f66d3aa1735bc95ae58d846a157357e8d41abb8 upstream. The timeline is u32, which limits any single advance to INT_MAX so that we can detect all fences that need signaling. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629125930.821-3-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 77ede2a0fd5a..1098af298af6 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -345,6 +345,11 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg) if (copy_from_user(&value, (void __user *)arg, sizeof(value))) return -EFAULT; + while (value > INT_MAX) { + sync_timeline_signal(obj, INT_MAX); + value -= INT_MAX; + } + sync_timeline_signal(obj, value); return 0; -- GitLab From f14ad42b8743897d140808467ed4ae3ce93bd0a5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 13:59:27 +0100 Subject: [PATCH 0133/1398] dma-buf/sw-sync: Reduce irqsave/irqrestore from known context commit a6aa8fca4d792c72947e341d7842d2f700534335 upstream. If we know the context under which we are called, then we can use the simpler form of spin_lock_irq (saving the save/restore). Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629125930.821-4-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 15 +++++++++------ drivers/dma-buf/sync_debug.c | 14 ++++++-------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 1098af298af6..30aaa4216e82 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -135,12 +135,11 @@ static void sync_timeline_put(struct sync_timeline *obj) */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) { - unsigned long flags; struct sync_pt *pt, *next; trace_sync_timeline(obj); - spin_lock_irqsave(&obj->child_list_lock, flags); + spin_lock_irq(&obj->child_list_lock); obj->value += inc; @@ -150,7 +149,7 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) list_del_init(&pt->active_list); } - spin_unlock_irqrestore(&obj->child_list_lock, flags); + spin_unlock_irq(&obj->child_list_lock); } /** @@ -167,7 +166,6 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, unsigned int value) { - unsigned long flags; struct sync_pt *pt; if (size < sizeof(*pt)) @@ -177,13 +175,16 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, if (!pt) return NULL; - spin_lock_irqsave(&obj->child_list_lock, flags); + spin_lock_irq(&obj->child_list_lock); + sync_timeline_get(obj); fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, obj->context, value); list_add_tail(&pt->child_list, &obj->child_list_head); INIT_LIST_HEAD(&pt->active_list); - spin_unlock_irqrestore(&obj->child_list_lock, flags); + + spin_unlock_irq(&obj->child_list_lock); + return pt; } @@ -206,9 +207,11 @@ static void timeline_fence_release(struct fence *fence) unsigned long flags; spin_lock_irqsave(fence->lock, flags); + list_del(&pt->child_list); if (!list_empty(&pt->active_list)) list_del(&pt->active_list); + spin_unlock_irqrestore(fence->lock, flags); sync_timeline_put(parent); diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 2dd4c3db6caa..b684e0b46fd7 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -116,17 +116,16 @@ static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show) static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) { struct list_head *pos; - unsigned long flags; seq_printf(s, "%s: %d\n", obj->name, obj->value); - spin_lock_irqsave(&obj->child_list_lock, flags); + spin_lock_irq(&obj->child_list_lock); list_for_each(pos, &obj->child_list_head) { struct sync_pt *pt = container_of(pos, struct sync_pt, child_list); sync_print_fence(s, &pt->base, false); } - spin_unlock_irqrestore(&obj->child_list_lock, flags); + spin_unlock_irq(&obj->child_list_lock); } static void sync_print_sync_file(struct seq_file *s, @@ -149,12 +148,11 @@ static void sync_print_sync_file(struct seq_file *s, static int sync_debugfs_show(struct seq_file *s, void *unused) { - unsigned long flags; struct list_head *pos; seq_puts(s, "objs:\n--------------\n"); - spin_lock_irqsave(&sync_timeline_list_lock, flags); + spin_lock_irq(&sync_timeline_list_lock); list_for_each(pos, &sync_timeline_list_head) { struct sync_timeline *obj = container_of(pos, struct sync_timeline, @@ -163,11 +161,11 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_obj(s, obj); seq_puts(s, "\n"); } - spin_unlock_irqrestore(&sync_timeline_list_lock, flags); + spin_unlock_irq(&sync_timeline_list_lock); seq_puts(s, "fences:\n--------------\n"); - spin_lock_irqsave(&sync_file_list_lock, flags); + spin_lock_irq(&sync_file_list_lock); list_for_each(pos, &sync_file_list_head) { struct sync_file *sync_file = container_of(pos, struct sync_file, sync_file_list); @@ -175,7 +173,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_sync_file(s, sync_file); seq_puts(s, "\n"); } - spin_unlock_irqrestore(&sync_file_list_lock, flags); + spin_unlock_irq(&sync_file_list_lock); return 0; } -- GitLab From 1bf0b23c07f1e23f23c05d4f8223ebdebd13c3f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 13:59:28 +0100 Subject: [PATCH 0134/1398] dma-buf/sw-sync: sync_pt is private and of fixed size commit 3b52ce44e720c240afc4c4b03140d7b7811b23bd upstream. Since sync_pt is only allocated from a single location and is no longer the base class for fences (that is struct dma_fence) it no longer needs a generic unsized allocator. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629125930.821-5-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 30aaa4216e82..a5f47f31a251 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -155,7 +155,6 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) /** * sync_pt_create() - creates a sync pt * @parent: fence's parent sync_timeline - * @size: size to allocate for this pt * @inc: value of the fence * * Creates a new sync_pt as a child of @parent. @size bytes will be @@ -163,15 +162,12 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) * the generic sync_timeline struct. Returns the sync_pt object or * NULL in case of error. */ -static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, - unsigned int value) +static struct sync_pt *sync_pt_create(struct sync_timeline *obj, + unsigned int value) { struct sync_pt *pt; - if (size < sizeof(*pt)) - return NULL; - - pt = kzalloc(size, GFP_KERNEL); + pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return NULL; @@ -312,7 +308,7 @@ static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, goto err; } - pt = sync_pt_create(obj, sizeof(*pt), data.value); + pt = sync_pt_create(obj, data.value); if (!pt) { err = -ENOMEM; goto err; -- GitLab From e82ecb230924314b848b68f5dade1ccf38bea838 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 22:05:32 +0100 Subject: [PATCH 0135/1398] dma-buf/sw-sync: Fix locking around sync_timeline lists commit d3862e44daa7a0c94d2f6193502a8c49379acfce upstream. The sync_pt were not adding themselves atomically to the timeline lists, corruption imminent. Only a single list is required to track the unsignaled sync_pt, so reduce it and rename the lock more appropriately along with using idiomatic names to distinguish a list from links along it. v2: Prevent spinlock recursion on free during create (next patch) and fixup crossref in kerneldoc Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629210532.5617-1-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 48 ++++++++++++++---------------------- drivers/dma-buf/sync_debug.c | 9 +++---- drivers/dma-buf/sync_debug.h | 22 ++++++----------- 3 files changed, 31 insertions(+), 48 deletions(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index a5f47f31a251..a53dddbbf394 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,8 @@ struct sync_timeline *sync_timeline_create(const char *name) obj->context = fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name)); - INIT_LIST_HEAD(&obj->child_list_head); - INIT_LIST_HEAD(&obj->active_list_head); - spin_lock_init(&obj->child_list_lock); + INIT_LIST_HEAD(&obj->pt_list); + spin_lock_init(&obj->lock); sync_timeline_debug_add(obj); @@ -139,17 +138,15 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) trace_sync_timeline(obj); - spin_lock_irq(&obj->child_list_lock); + spin_lock_irq(&obj->lock); obj->value += inc; - list_for_each_entry_safe(pt, next, &obj->active_list_head, - active_list) { + list_for_each_entry_safe(pt, next, &obj->pt_list, link) if (fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->active_list); - } + list_del_init(&pt->link); - spin_unlock_irq(&obj->child_list_lock); + spin_unlock_irq(&obj->lock); } /** @@ -171,15 +168,15 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, if (!pt) return NULL; - spin_lock_irq(&obj->child_list_lock); - sync_timeline_get(obj); - fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, + fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value); - list_add_tail(&pt->child_list, &obj->child_list_head); - INIT_LIST_HEAD(&pt->active_list); + INIT_LIST_HEAD(&pt->link); - spin_unlock_irq(&obj->child_list_lock); + spin_lock_irq(&obj->lock); + if (!fence_is_signaled_locked(&pt->base)) + list_add_tail(&pt->link, &obj->pt_list); + spin_unlock_irq(&obj->lock); return pt; } @@ -200,15 +197,15 @@ static void timeline_fence_release(struct fence *fence) { struct sync_pt *pt = fence_to_sync_pt(fence); struct sync_timeline *parent = fence_parent(fence); - unsigned long flags; - spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) { + unsigned long flags; - list_del(&pt->child_list); - if (!list_empty(&pt->active_list)) - list_del(&pt->active_list); - - spin_unlock_irqrestore(fence->lock, flags); + spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) + list_del(&pt->link); + spin_unlock_irqrestore(fence->lock, flags); + } sync_timeline_put(parent); fence_free(fence); @@ -223,13 +220,6 @@ static bool timeline_fence_signaled(struct fence *fence) static bool timeline_fence_enable_signaling(struct fence *fence) { - struct sync_pt *pt = fence_to_sync_pt(fence); - struct sync_timeline *parent = fence_parent(fence); - - if (timeline_fence_signaled(fence)) - return false; - - list_add_tail(&pt->active_list, &parent->active_list_head); return true; } diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index b684e0b46fd7..39386517175b 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -119,13 +119,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) seq_printf(s, "%s: %d\n", obj->name, obj->value); - spin_lock_irq(&obj->child_list_lock); - list_for_each(pos, &obj->child_list_head) { - struct sync_pt *pt = - container_of(pos, struct sync_pt, child_list); + spin_lock_irq(&obj->lock); + list_for_each(pos, &obj->pt_list) { + struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irq(&obj->child_list_lock); + spin_unlock_irq(&obj->lock); } static void sync_print_sync_file(struct seq_file *s, diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index d269aa6783aa..7acb7ab2e1a1 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -24,43 +24,37 @@ * struct sync_timeline - sync object * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging - * @child_list_head: list of children sync_pts for this sync_timeline - * @child_list_lock: lock protecting @child_list_head and fence.status - * @active_list_head: list of active (unsignaled/errored) sync_pts + * @lock: lock protecting @pt_list and @value + * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ struct sync_timeline { struct kref kref; char name[32]; - /* protected by child_list_lock */ + /* protected by lock */ u64 context; int value; - struct list_head child_list_head; - spinlock_t child_list_lock; - - struct list_head active_list_head; + struct list_head pt_list; + spinlock_t lock; struct list_head sync_timeline_list; }; static inline struct sync_timeline *fence_parent(struct fence *fence) { - return container_of(fence->lock, struct sync_timeline, - child_list_lock); + return container_of(fence->lock, struct sync_timeline, lock); } /** * struct sync_pt - sync_pt object * @base: base fence object - * @child_list: sync timeline child's list - * @active_list: sync timeline active child's list + * @link: link on the sync timeline's list */ struct sync_pt { struct fence base; - struct list_head child_list; - struct list_head active_list; + struct list_head link; }; #ifdef CONFIG_SW_SYNC -- GitLab From db767404ad09b419779704fff4709a25f1dc252b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 22:12:53 +0100 Subject: [PATCH 0136/1398] dma-buf/sw-sync: Use an rbtree to sort fences in the timeline commit f1e8c67123cf171e2b0357e885e426328b241d7d upstream. Reduce the list iteration when incrementing the timeline by storing the fences in increasing order. v2: Prevent spinlock recursion on free during create v3: Fixup rebase conflict inside comments that escaped the compiler. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629211253.22766-1-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 49 +++++++++++++++++++++++++++++++----- drivers/dma-buf/sync_debug.h | 5 ++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index a53dddbbf394..505ce3e77376 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,6 +96,7 @@ struct sync_timeline *sync_timeline_create(const char *name) obj->context = fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name)); + obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock); @@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) obj->value += inc; - list_for_each_entry_safe(pt, next, &obj->pt_list, link) - if (fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->link); + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + if (!fence_is_signaled_locked(&pt->base)) + break; + + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + } spin_unlock_irq(&obj->lock); } @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, INIT_LIST_HEAD(&pt->link); spin_lock_irq(&obj->lock); - if (!fence_is_signaled_locked(&pt->base)) - list_add_tail(&pt->link, &obj->pt_list); + if (!fence_is_signaled_locked(&pt->base)) { + struct rb_node **p = &obj->pt_tree.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + struct sync_pt *other; + int cmp; + + parent = *p; + other = rb_entry(parent, typeof(*pt), node); + cmp = value - other->base.seqno; + if (cmp > 0) { + p = &parent->rb_right; + } else if (cmp < 0) { + p = &parent->rb_left; + } else { + if (fence_get_rcu(&other->base)) { + fence_put(&pt->base); + pt = other; + goto unlock; + } + p = &parent->rb_left; + } + } + rb_link_node(&pt->node, parent, p); + rb_insert_color(&pt->node, &obj->pt_tree); + + parent = rb_next(&pt->node); + list_add_tail(&pt->link, + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); + } +unlock: spin_unlock_irq(&obj->lock); return pt; @@ -202,8 +237,10 @@ static void timeline_fence_release(struct fence *fence) unsigned long flags; spin_lock_irqsave(fence->lock, flags); - if (!list_empty(&pt->link)) + if (!list_empty(&pt->link)) { list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } spin_unlock_irqrestore(fence->lock, flags); } diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 7acb7ab2e1a1..9615dc0385b5 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H #include +#include #include #include @@ -25,6 +26,7 @@ * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging * @lock: lock protecting @pt_list and @value + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ @@ -36,6 +38,7 @@ struct sync_timeline { u64 context; int value; + struct rb_root pt_tree; struct list_head pt_list; spinlock_t lock; @@ -51,10 +54,12 @@ static inline struct sync_timeline *fence_parent(struct fence *fence) * struct sync_pt - sync_pt object * @base: base fence object * @link: link on the sync timeline's list + * @node: node in the sync timeline's tree */ struct sync_pt { struct fence base; struct list_head link; + struct rb_node node; }; #ifdef CONFIG_SW_SYNC -- GitLab From 4496b88c06744a9a92088e1924c0b39e225898b2 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Sat, 29 Jul 2017 12:22:15 -0300 Subject: [PATCH 0137/1398] dma-buf/sw_sync: move timeline_fence_ops around commit 150b6a9d7d6fffb95c0a5349960a10569e8218b5 upstream. We are going to use timeline_fence_signaled() in a internal function in the next commit. Cc: Chris Wilson Signed-off-by: Gustavo Padovan Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170729152217.8362-1-gustavo@padovan.org [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 138 +++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 505ce3e77376..dec6d8529392 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -125,6 +125,75 @@ static void sync_timeline_put(struct sync_timeline *obj) kref_put(&obj->kref, sync_timeline_free); } +static const char *timeline_fence_get_driver_name(struct fence *fence) +{ + return "sw_sync"; +} + +static const char *timeline_fence_get_timeline_name(struct fence *fence) +{ + struct sync_timeline *parent = fence_parent(fence); + + return parent->name; +} + +static void timeline_fence_release(struct fence *fence) +{ + struct sync_pt *pt = fence_to_sync_pt(fence); + struct sync_timeline *parent = fence_parent(fence); + + if (!list_empty(&pt->link)) { + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) { + list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } + spin_unlock_irqrestore(fence->lock, flags); + } + + sync_timeline_put(parent); + fence_free(fence); +} + +static bool timeline_fence_signaled(struct fence *fence) +{ + struct sync_timeline *parent = fence_parent(fence); + + return !__fence_is_later(fence->seqno, parent->value); +} + +static bool timeline_fence_enable_signaling(struct fence *fence) +{ + return true; +} + +static void timeline_fence_value_str(struct fence *fence, + char *str, int size) +{ + snprintf(str, size, "%d", fence->seqno); +} + +static void timeline_fence_timeline_value_str(struct fence *fence, + char *str, int size) +{ + struct sync_timeline *parent = fence_parent(fence); + + snprintf(str, size, "%d", parent->value); +} + +static const struct fence_ops timeline_fence_ops = { + .get_driver_name = timeline_fence_get_driver_name, + .get_timeline_name = timeline_fence_get_timeline_name, + .enable_signaling = timeline_fence_enable_signaling, + .signaled = timeline_fence_signaled, + .wait = fence_default_wait, + .release = timeline_fence_release, + .fence_value_str = timeline_fence_value_str, + .timeline_value_str = timeline_fence_timeline_value_str, +}; + /** * sync_timeline_signal() - signal a status change on a sync_timeline * @obj: sync_timeline to signal @@ -216,75 +285,6 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, return pt; } -static const char *timeline_fence_get_driver_name(struct fence *fence) -{ - return "sw_sync"; -} - -static const char *timeline_fence_get_timeline_name(struct fence *fence) -{ - struct sync_timeline *parent = fence_parent(fence); - - return parent->name; -} - -static void timeline_fence_release(struct fence *fence) -{ - struct sync_pt *pt = fence_to_sync_pt(fence); - struct sync_timeline *parent = fence_parent(fence); - - if (!list_empty(&pt->link)) { - unsigned long flags; - - spin_lock_irqsave(fence->lock, flags); - if (!list_empty(&pt->link)) { - list_del(&pt->link); - rb_erase(&pt->node, &parent->pt_tree); - } - spin_unlock_irqrestore(fence->lock, flags); - } - - sync_timeline_put(parent); - fence_free(fence); -} - -static bool timeline_fence_signaled(struct fence *fence) -{ - struct sync_timeline *parent = fence_parent(fence); - - return !__fence_is_later(fence->seqno, parent->value); -} - -static bool timeline_fence_enable_signaling(struct fence *fence) -{ - return true; -} - -static void timeline_fence_value_str(struct fence *fence, - char *str, int size) -{ - snprintf(str, size, "%d", fence->seqno); -} - -static void timeline_fence_timeline_value_str(struct fence *fence, - char *str, int size) -{ - struct sync_timeline *parent = fence_parent(fence); - - snprintf(str, size, "%d", parent->value); -} - -static const struct fence_ops timeline_fence_ops = { - .get_driver_name = timeline_fence_get_driver_name, - .get_timeline_name = timeline_fence_get_timeline_name, - .enable_signaling = timeline_fence_enable_signaling, - .signaled = timeline_fence_signaled, - .wait = fence_default_wait, - .release = timeline_fence_release, - .fence_value_str = timeline_fence_value_str, - .timeline_value_str = timeline_fence_timeline_value_str, -}; - /* * *WARNING* * -- GitLab From 9ae85fabdfc0c74c2ac32219e116546e2a5a6309 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Sat, 29 Jul 2017 12:22:16 -0300 Subject: [PATCH 0138/1398] dma-buf/sw_sync: clean up list before signaling the fence commit 3792b7c1a70815fe4e954221c096f9278638fd21 upstream. If userspace already dropped its own reference by closing the sw_sync fence fd we might end up in a deadlock where dma_fence_is_signaled_locked() will trigger the release of the fence and thus try to hold the lock to remove the fence from the list. dma_fence_is_signaled_locked() tries to release/free the fence and hold the lock in the process. We fix that by changing the order operation and clean up the list and rb-tree first. v2: Drop fence get/put dance and manipulate the list first (Chris Wilson) Cc: Chris Wilson Signed-off-by: Gustavo Padovan Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170729152217.8362-2-gustavo@padovan.org [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index dec6d8529392..b0ee084a7fdc 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -213,11 +213,21 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) obj->value += inc; list_for_each_entry_safe(pt, next, &obj->pt_list, link) { - if (!fence_is_signaled_locked(&pt->base)) + if (!timeline_fence_signaled(&pt->base)) break; list_del_init(&pt->link); rb_erase(&pt->node, &obj->pt_tree); + + /* + * A signal callback may release the last reference to this + * fence, causing it to be freed. That operation has to be + * last to avoid a use after free inside this loop, and must + * be after we remove the fence from the timeline in order to + * prevent deadlocking on timeline->lock inside + * timeline_fence_release(). + */ + fence_signal_locked(&pt->base); } spin_unlock_irq(&obj->lock); -- GitLab From f55d17e9c3af7bf2fdb38c52a9abe4d4cfc0137c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Jan 2017 14:12:20 +0000 Subject: [PATCH 0139/1398] dma-fence: Clear fence->status during dma_fence_init() commit 83dd1376fd92f33bdeca9e83d479534a4e7f870b upstream. As the fence->status is an optional field that may be set before dma_fence_signal() is called to convey that the fence completed with an error, we have to ensure that it is always set to zero on initialisation so that the typical use (i.e. unset) always flags a successful completion. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter Reviewed-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-1-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index 4d51f9e83fa8..0f67d5294cd6 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -526,6 +526,7 @@ fence_init(struct fence *fence, const struct fence_ops *ops, fence->context = context; fence->seqno = seqno; fence->flags = 0UL; + fence->status = 0; trace_fence_init(fence); } -- GitLab From d3b029a44e14f5226fd9b410a0a70eb9b7ed6beb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Jan 2017 14:12:21 +0000 Subject: [PATCH 0140/1398] dma-fence: Wrap querying the fence->status commit d6c99f4bf093a58d3ab47caaec74b81f18bc4e3f upstream. The fence->status is an optional field that is only valid once the fence has been signaled. (Driver may fill the fence->status with an error code prior to calling dma_fence_signal().) Given the restriction upon its validity, wrap querying of the fence->status into a helper dma_fence_get_status(). Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-2-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/fence.c | 25 +++++++++++++++++++++++++ drivers/dma-buf/sync_debug.c | 20 ++++++++++---------- drivers/dma-buf/sync_file.c | 6 ++---- include/linux/fence.h | 24 ++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index 0f67d5294cd6..c7a0eefb93ab 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -280,6 +280,31 @@ int fence_add_callback(struct fence *fence, struct fence_cb *cb, } EXPORT_SYMBOL(fence_add_callback); +/** + * fence_get_status - returns the status upon completion + * @fence: [in] the fence to query + * + * This wraps fence_get_status_locked() to return the error status + * condition on a signaled fence. See fence_get_status_locked() for more + * details. + * + * Returns 0 if the fence has not yet been signaled, 1 if the fence has + * been signaled without an error condition, or a negative error code + * if the fence has been completed in err. + */ +int fence_get_status(struct fence *fence) +{ + unsigned long flags; + int status; + + spin_lock_irqsave(fence->lock, flags); + status = fence_get_status_locked(fence); + spin_unlock_irqrestore(fence->lock, flags); + + return status; +} +EXPORT_SYMBOL(fence_get_status); + /** * fence_remove_callback - remove a callback from the signaling list * @fence: [in] the fence to wait on diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 39386517175b..858263dbecd4 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -62,29 +62,29 @@ void sync_file_debug_remove(struct sync_file *sync_file) static const char *sync_status_str(int status) { - if (status == 0) - return "signaled"; + if (status < 0) + return "error"; if (status > 0) - return "active"; + return "signaled"; - return "error"; + return "active"; } -static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show) +static void sync_print_fence(struct seq_file *s, + struct fence *fence, bool show) { - int status = 1; struct sync_timeline *parent = fence_parent(fence); + int status; - if (fence_is_signaled_locked(fence)) - status = fence->status; + status = fence_get_status_locked(fence); seq_printf(s, " %s%sfence %s", show ? parent->name : "", show ? "_" : "", sync_status_str(status)); - if (status <= 0) { + if (status) { struct timespec64 ts64 = ktime_to_timespec64(fence->timestamp); @@ -133,7 +133,7 @@ static void sync_print_sync_file(struct seq_file *s, int i; seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name, - sync_status_str(!fence_is_signaled(sync_file->fence))); + sync_status_str(fence_get_status(sync_file->fence))); if (fence_is_array(sync_file->fence)) { struct fence_array *array = to_fence_array(sync_file->fence); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index b29a9e817320..eb3922031c3b 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -377,10 +377,8 @@ static void sync_fill_fence_info(struct fence *fence, sizeof(info->obj_name)); strlcpy(info->driver_name, fence->ops->get_driver_name(fence), sizeof(info->driver_name)); - if (fence_is_signaled(fence)) - info->status = fence->status >= 0 ? 1 : fence->status; - else - info->status = 0; + + info->status = fence_get_status(fence); info->timestamp_ns = ktime_to_ns(fence->timestamp); } diff --git a/include/linux/fence.h b/include/linux/fence.h index fcb69f99d717..2dfed6957b61 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -334,6 +334,30 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) return fence_is_signaled(f2) ? NULL : f2; } +/** + * fence_get_status_locked - returns the status upon completion + * @fence: [in] the fence to query + * + * Drivers can supply an optional error status condition before they signal + * the fence (to indicate whether the fence was completed due to an error + * rather than success). The value of the status condition is only valid + * if the fence has been signaled, fence_get_status_locked() first checks + * the signal state before reporting the error status. + * + * Returns 0 if the fence has not yet been signaled, 1 if the fence has + * been signaled without an error condition, or a negative error code + * if the fence has been completed in err. + */ +static inline int fence_get_status_locked(struct fence *fence) +{ + if (fence_is_signaled_locked(fence)) + return fence->status < 0 ? fence->status : 1; + else + return 0; +} + +int fence_get_status(struct fence *fence); + signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, bool intr, signed long timeout); -- GitLab From f5e0724e76c2c8efb4d3dc43c24228b409bbbfe9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Jan 2017 14:12:22 +0000 Subject: [PATCH 0141/1398] dma-fence: Introduce drm_fence_set_error() helper commit a009e975da5c7d42a7f5eaadc54946eb5f76c9af upstream. The dma_fence.error field (formerly known as dma_fence.status) is an optional field that may be set by drivers before calling dma_fence_signal(). The field can be used to indicate that the fence was completed in err rather than with success, and is visible to other consumers of the fence and to userspace via sync_file. This patch renames the field from status to error so that its meaning is hopefully more clear (and distinct from dma_fence_get_status() which is a composite between the error state and signal state) and adds a helper that validates the preconditions of when it is suitable to adjust the error field. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-3-chris@chris-wilson.co.uk [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/fence.c | 2 +- include/linux/fence.h | 30 +++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index c7a0eefb93ab..04bf29808200 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -551,7 +551,7 @@ fence_init(struct fence *fence, const struct fence_ops *ops, fence->context = context; fence->seqno = seqno; fence->flags = 0UL; - fence->status = 0; + fence->error = 0; trace_fence_init(fence); } diff --git a/include/linux/fence.h b/include/linux/fence.h index 2dfed6957b61..9bb2c0c97a21 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -47,7 +47,7 @@ struct fence_cb; * can be compared to decide which fence would be signaled later. * @flags: A mask of FENCE_FLAG_* defined below * @timestamp: Timestamp when the fence was signaled. - * @status: Optional, only valid if < 0, must be set before calling + * @error: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. * * the flags member must be manipulated and read using the appropriate @@ -79,7 +79,7 @@ struct fence { unsigned seqno; unsigned long flags; ktime_t timestamp; - int status; + int error; }; enum fence_flag_bits { @@ -132,7 +132,7 @@ struct fence_cb { * or some failure occurred that made it impossible to enable * signaling. True indicates successful enabling. * - * fence->status may be set in enable_signaling, but only when false is + * fence->error may be set in enable_signaling, but only when false is * returned. * * Calling fence_signal before enable_signaling is called allows @@ -144,7 +144,7 @@ struct fence_cb { * the second time will be a noop since it was already signaled. * * Notes on signaled: - * May set fence->status if returning true. + * May set fence->error if returning true. * * Notes on wait: * Must not be NULL, set to fence_default_wait for default implementation. @@ -351,13 +351,33 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) static inline int fence_get_status_locked(struct fence *fence) { if (fence_is_signaled_locked(fence)) - return fence->status < 0 ? fence->status : 1; + return fence->error ?: 1; else return 0; } int fence_get_status(struct fence *fence); +/** + * fence_set_error - flag an error condition on the fence + * @fence: [in] the fence + * @error: [in] the error to store + * + * Drivers can supply an optional error status condition before they signal + * the fence, to indicate that the fence was completed due to an error + * rather than success. This must be set before signaling (so that the value + * is visible before any waiters on the signal callback are woken). This + * helper exists to help catching erroneous setting of #fence.error. + */ +static inline void fence_set_error(struct fence *fence, + int error) +{ + BUG_ON(test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + BUG_ON(error >= 0 || error < -MAX_ERRNO); + + fence->error = error; +} + signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, bool intr, signed long timeout); -- GitLab From 424bdc5df002f9f9d802f488f27696a68c0a5c59 Mon Sep 17 00:00:00 2001 From: Dominik Behr Date: Thu, 7 Sep 2017 16:02:46 -0300 Subject: [PATCH 0142/1398] dma-buf/sw_sync: force signal all unsignaled fences on dying timeline commit ea4d5a270b57fa8d4871f372ca9b97b7697fdfda upstream. To avoid hanging userspace components that might have been waiting on the active fences of the destroyed timeline we need to signal with error all remaining fences on such timeline. This restore the default behaviour of the Android sw_sync framework, which Android still relies on. It was broken on the dma fence conversion a few years ago and never fixed. v2: Do not bother with cleanup do the list (Chris Wilson) Reviewed-by: Chris Wilson Signed-off-by: Dominik Behr Signed-off-by: Gustavo Padovan Link: https://patchwork.freedesktop.org/patch/msgid/20170907190246.16425-2-gustavo@padovan.org [s/dma_fence/fence/g - gregkh] Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sw_sync.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index b0ee084a7fdc..4f3511415b29 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -321,8 +321,16 @@ static int sw_sync_debugfs_open(struct inode *inode, struct file *file) static int sw_sync_debugfs_release(struct inode *inode, struct file *file) { struct sync_timeline *obj = file->private_data; + struct sync_pt *pt, *next; + + spin_lock_irq(&obj->lock); + + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + fence_set_error(&pt->base, -ENOENT); + fence_signal_locked(&pt->base); + } - smp_wmb(); + spin_unlock_irq(&obj->lock); sync_timeline_put(obj); return 0; -- GitLab From 3a83421d482e7d7c6f4251dcd825548e5262c054 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Wed, 19 Oct 2016 15:48:32 -0200 Subject: [PATCH 0143/1398] dma-buf/sync_file: hold reference to fence when creating sync_file commit 30cd85dd6edc86ea8d8589efb813f1fad41ef233 upstream. fence referencing was out of balance. It was not taking any ref to the fence at creating time, but it was putting a reference when freeing the sync file. This patch fixes the balancing issue by getting a reference for the fence when creating the sync_file. Signed-off-by: Gustavo Padovan Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1476899313-22241-1-git-send-email-gustavo@padovan.org Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sync_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index eb3922031c3b..6847fe6d84c2 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -79,7 +79,7 @@ struct sync_file *sync_file_create(struct fence *fence) if (!sync_file) return NULL; - sync_file->fence = fence; + sync_file->fence = fence_get(fence); snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d", fence->ops->get_driver_name(fence), -- GitLab From 0af23e49316d7870b8f2550a72052bf7d608f230 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 9 Dec 2016 19:53:06 +0100 Subject: [PATCH 0144/1398] dma-buf: Update kerneldoc for sync_file_create commit 24a367348a017555f982a9ee137070a7a821fa97 upstream. This was missed when adding a dma_fence_get call. While at it also remove the kerneldoc for the static inline helper - no point documenting internals down to every detail. Fixes: 30cd85dd6edc ("dma-buf/sync_file: hold reference to fence when creating sync_file") Cc: Gustavo Padovan Cc: Sean Paul Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Cc: Sumit Semwal Signed-off-by: Daniel Vetter Reviewed-by: Gustavo Padovan Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-3-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sync_file.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 6847fe6d84c2..f0c374d6ab40 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -67,9 +67,10 @@ static void fence_check_cb_func(struct fence *f, struct fence_cb *cb) * sync_file_create() - creates a sync file * @fence: fence to add to the sync_fence * - * Creates a sync_file containg @fence. Once this is called, the sync_file - * takes ownership of @fence. The sync_file can be released with - * fput(sync_file->file). Returns the sync_file or NULL in case of error. + * Creates a sync_file containg @fence. This function acquires and additional + * reference of @fence for the newly-created &sync_file, if it succeeds. The + * sync_file can be released with fput(sync_file->file). Returns the + * sync_file or NULL in case of error. */ struct sync_file *sync_file_create(struct fence *fence) { @@ -90,13 +91,6 @@ struct sync_file *sync_file_create(struct fence *fence) } EXPORT_SYMBOL(sync_file_create); -/** - * sync_file_fdget() - get a sync_file from an fd - * @fd: fd referencing a fence - * - * Ensures @fd references a valid sync_file, increments the refcount of the - * backing file. Returns the sync_file or NULL in case of error. - */ static struct sync_file *sync_file_fdget(int fd) { struct file *file = fget(fd); -- GitLab From e328dff288e68a94ae1ef0c43e3f8c6ad9fbf297 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Thu, 9 Nov 2017 13:16:46 +0100 Subject: [PATCH 0145/1398] usb: hub: Cycle HUB power when initialization fails commit 973593a960ddac0f14f0d8877d2d0abe0afda795 upstream. Sometimes the USB device gets confused about the state of the initialization and the connection fails. In particular, the device thinks that it's already set up and running while the host thinks the device still needs to be configured. To work around this issue, power-cycle the hub's output to issue a sort of "reset" to the device. This makes the device restart its state machine and then the initialization succeeds. This fixes problems where the kernel reports a list of errors like this: usb 1-1.3: device not accepting address 19, error -71 The end result is a non-functioning device. After this patch, the sequence becomes like this: usb 1-1.3: new high-speed USB device number 18 using ci_hdrc usb 1-1.3: device not accepting address 18, error -71 usb 1-1.3: new high-speed USB device number 19 using ci_hdrc usb 1-1.3: device not accepting address 19, error -71 usb 1-1-port3: attempt power cycle usb 1-1.3: new high-speed USB device number 21 using ci_hdrc usb-storage 1-1.3:1.2: USB Mass Storage device detected Signed-off-by: Mike Looijmans Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 706b3d6a7614..d0d3f9ef9f10 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4925,6 +4925,15 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; + + /* When halfway through our retry count, power-cycle the port */ + if (i == (SET_CONFIG_TRIES / 2) - 1) { + dev_info(&port_dev->dev, "attempt power cycle\n"); + usb_hub_set_port_power(hdev, hub, port1, false); + msleep(2 * hub_power_on_good_delay(hub)); + usb_hub_set_port_power(hdev, hub, port1, true); + msleep(hub_power_on_good_delay(hub)); + } } if (hub->hdev->parent || !hcd->driver->port_handed_over || -- GitLab From 34ba2f04a480101c3d60918a45f68ff66973ef49 Mon Sep 17 00:00:00 2001 From: Yu Chen Date: Fri, 1 Dec 2017 13:41:20 +0200 Subject: [PATCH 0146/1398] usb: xhci: fix panic in xhci_free_virt_devices_depth_first commit 80e457699a8dbdd70f2d26911e46f538645c55fc upstream. Check vdev->real_port 0 to avoid panic [ 9.261347] [] xhci_free_virt_devices_depth_first+0x58/0x108 [ 9.261352] [] xhci_mem_cleanup+0x1bc/0x570 [ 9.261355] [] xhci_stop+0x140/0x1c8 [ 9.261365] [] usb_remove_hcd+0xfc/0x1d0 [ 9.261369] [] xhci_plat_remove+0x6c/0xa8 [ 9.261377] [] platform_drv_remove+0x2c/0x70 [ 9.261384] [] __device_release_driver+0x80/0x108 [ 9.261387] [] device_release_driver+0x2c/0x40 [ 9.261392] [] bus_remove_device+0xe0/0x120 [ 9.261396] [] device_del+0x114/0x210 [ 9.261399] [] platform_device_del+0x30/0xa0 [ 9.261403] [] dwc3_otg_work+0x204/0x488 [ 9.261407] [] event_work+0x304/0x5b8 [ 9.261414] [] process_one_work+0x148/0x490 [ 9.261417] [] worker_thread+0x50/0x4a0 [ 9.261421] [] kthread+0xe8/0x100 [ 9.261427] [] ret_from_fork+0x10/0x50 The problem can occur if xhci_plat_remove() is called shortly after xhci_plat_probe(). While xhci_free_virt_devices_depth_first been called before the device has been setup and get real_port initialized. The problem occurred on Hikey960 and was reproduced by Guenter Roeck on Kevin with chromeos-4.4. Fixes: ee8665e28e8d ("xhci: free xhci virtual devices with leaf nodes first") Cc: Guenter Roeck Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Fan Ning Signed-off-by: Li Rui Signed-off-by: yangdi Signed-off-by: Yu Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index b7114c3f52aa..a3ecd8bd5324 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -996,6 +996,12 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) if (!vdev) return; + if (vdev->real_port == 0 || + vdev->real_port > HCS_MAX_PORTS(xhci->hcs_params1)) { + xhci_dbg(xhci, "Bad vdev->real_port.\n"); + goto out; + } + tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts); list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { /* is this a hub device that added a tt_info to the tts list */ @@ -1009,6 +1015,7 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) } } } +out: /* we are now at a leaf device */ xhci_free_virt_device(xhci, slot_id); } -- GitLab From 05ffc7ed55279d659a3e6786e1ffa54eb280a80d Mon Sep 17 00:00:00 2001 From: Masakazu Mokuno Date: Fri, 10 Nov 2017 01:25:50 +0900 Subject: [PATCH 0147/1398] USB: core: Add type-specific length check of BOS descriptors commit 81cf4a45360f70528f1f64ba018d61cb5767249a upstream. As most of BOS descriptors are longer in length than their header 'struct usb_dev_cap_header', comparing solely with it is not sufficient to avoid out-of-bounds access to BOS descriptors. This patch adds descriptor type specific length check in usb_get_bos_descriptor() to fix the issue. Signed-off-by: Masakazu Mokuno Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 28 ++++++++++++++++++++++++---- include/uapi/linux/usb/ch9.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 5008f71fb08d..5ebe04d3598b 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -900,14 +900,25 @@ void usb_release_bos_descriptor(struct usb_device *dev) } } +static const __u8 bos_desc_len[256] = { + [USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE, + [USB_CAP_TYPE_EXT] = USB_DT_USB_EXT_CAP_SIZE, + [USB_SS_CAP_TYPE] = USB_DT_USB_SS_CAP_SIZE, + [USB_SSP_CAP_TYPE] = USB_DT_USB_SSP_CAP_SIZE(1), + [CONTAINER_ID_TYPE] = USB_DT_USB_SS_CONTN_ID_SIZE, + [USB_PTM_CAP_TYPE] = USB_DT_USB_PTM_ID_SIZE, +}; + /* Get BOS descriptor set */ int usb_get_bos_descriptor(struct usb_device *dev) { struct device *ddev = &dev->dev; struct usb_bos_descriptor *bos; struct usb_dev_cap_header *cap; + struct usb_ssp_cap_descriptor *ssp_cap; unsigned char *buffer; - int length, total_len, num, i; + int length, total_len, num, i, ssac; + __u8 cap_type; int ret; bos = kzalloc(sizeof(struct usb_bos_descriptor), GFP_KERNEL); @@ -960,7 +971,13 @@ int usb_get_bos_descriptor(struct usb_device *dev) dev->bos->desc->bNumDeviceCaps = i; break; } + cap_type = cap->bDevCapabilityType; length = cap->bLength; + if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) { + dev->bos->desc->bNumDeviceCaps = i; + break; + } + total_len -= length; if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { @@ -968,7 +985,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) continue; } - switch (cap->bDevCapabilityType) { + switch (cap_type) { case USB_CAP_TYPE_WIRELESS_USB: /* Wireless USB cap descriptor is handled by wusb */ break; @@ -981,8 +998,11 @@ int usb_get_bos_descriptor(struct usb_device *dev) (struct usb_ss_cap_descriptor *)buffer; break; case USB_SSP_CAP_TYPE: - dev->bos->ssp_cap = - (struct usb_ssp_cap_descriptor *)buffer; + ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; + ssac = (le32_to_cpu(ssp_cap->bmAttributes) & + USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; + if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) + dev->bos->ssp_cap = ssp_cap; break; case CONTAINER_ID_TYPE: dev->bos->ss_id = diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 5e64a86989a5..ab1dadba9923 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -854,6 +854,8 @@ struct usb_wireless_cap_descriptor { /* Ultra Wide Band */ __u8 bReserved; } __attribute__((packed)); +#define USB_DT_USB_WIRELESS_CAP_SIZE 11 + /* USB 2.0 Extension descriptor */ #define USB_CAP_TYPE_EXT 2 @@ -1046,6 +1048,7 @@ struct usb_ptm_cap_descriptor { __u8 bDevCapabilityType; } __attribute__((packed)); +#define USB_DT_USB_PTM_ID_SIZE 3 /* * The size of the descriptor for the Sublink Speed Attribute Count * (SSAC) specified in bmAttributes[4:0]. -- GitLab From d6ab871c432dbb30d8a186347f4d9ebd22041b6a Mon Sep 17 00:00:00 2001 From: Mateusz Berezecki Date: Wed, 21 Dec 2016 09:19:14 -0800 Subject: [PATCH 0148/1398] USB: Increase usbfs transfer limit commit 1129d270cbfbb7e2b1ec3dede4a13930bdd10e41 upstream. Promote a variable keeping track of USB transfer memory usage to a wider data type and allow for higher bandwidth transfers from a large number of USB devices connected to a single host. Signed-off-by: Mateusz Berezecki Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 43 +++++++++++++++------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index fa619354c5c5..ec847e94286f 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -134,42 +134,35 @@ enum snoop_when { #define USB_DEVICE_DEV MKDEV(USB_DEVICE_MAJOR, 0) /* Limit on the total amount of memory we can allocate for transfers */ -static unsigned usbfs_memory_mb = 16; +static u32 usbfs_memory_mb = 16; module_param(usbfs_memory_mb, uint, 0644); MODULE_PARM_DESC(usbfs_memory_mb, "maximum MB allowed for usbfs buffers (0 = no limit)"); -/* Hard limit, necessary to avoid arithmetic overflow */ -#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000) - -static atomic_t usbfs_memory_usage; /* Total memory currently allocated */ +static atomic64_t usbfs_memory_usage; /* Total memory currently allocated */ /* Check whether it's okay to allocate more memory for a transfer */ -static int usbfs_increase_memory_usage(unsigned amount) +static int usbfs_increase_memory_usage(u64 amount) { - unsigned lim; + u64 lim; - /* - * Convert usbfs_memory_mb to bytes, avoiding overflows. - * 0 means use the hard limit (effectively unlimited). - */ lim = ACCESS_ONCE(usbfs_memory_mb); - if (lim == 0 || lim > (USBFS_XFER_MAX >> 20)) - lim = USBFS_XFER_MAX; - else - lim <<= 20; + lim <<= 20; - atomic_add(amount, &usbfs_memory_usage); - if (atomic_read(&usbfs_memory_usage) <= lim) - return 0; - atomic_sub(amount, &usbfs_memory_usage); - return -ENOMEM; + atomic64_add(amount, &usbfs_memory_usage); + + if (lim > 0 && atomic64_read(&usbfs_memory_usage) > lim) { + atomic64_sub(amount, &usbfs_memory_usage); + return -ENOMEM; + } + + return 0; } /* Memory for a transfer is being deallocated */ -static void usbfs_decrease_memory_usage(unsigned amount) +static void usbfs_decrease_memory_usage(u64 amount) { - atomic_sub(amount, &usbfs_memory_usage); + atomic64_sub(amount, &usbfs_memory_usage); } static int connected(struct usb_dev_state *ps) @@ -1191,7 +1184,7 @@ static int proc_bulk(struct usb_dev_state *ps, void __user *arg) if (!usb_maxpacket(dev, pipe, !(bulk.ep & USB_DIR_IN))) return -EINVAL; len1 = bulk.len; - if (len1 >= USBFS_XFER_MAX) + if (len1 >= (INT_MAX - sizeof(struct urb))) return -EINVAL; ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb)); if (ret) @@ -1584,10 +1577,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb return -EINVAL; } - if (uurb->buffer_length >= USBFS_XFER_MAX) { - ret = -EINVAL; - goto error; - } if (uurb->buffer_length > 0 && !access_ok(is_in ? VERIFY_WRITE : VERIFY_READ, uurb->buffer, uurb->buffer_length)) { -- GitLab From 545c10375330c9212227a6a7efd84f49ccd6c9b2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Sep 2017 23:43:25 +0300 Subject: [PATCH 0149/1398] USB: devio: Prevent integer overflow in proc_do_submiturb() commit 57999d1107c1e60c2ca7088f2ac0f819e2f554b3 upstream. There used to be an integer overflow check in proc_do_submiturb() but we removed it. It turns out that it's still required. The uurb->buffer_length variable is a signed integer and it's controlled by the user. It can lead to an integer overflow when we do: num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE); If we strip away the macro then that line looks like this: num_sgs = (uurb->buffer_length + USB_SG_SIZE - 1) / USB_SG_SIZE; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ It's the first addition which can overflow. Fixes: 1129d270cbfb ("USB: Increase usbfs transfer limit") Signed-off-by: Dan Carpenter Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ec847e94286f..6790a98bffdb 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -139,6 +139,9 @@ module_param(usbfs_memory_mb, uint, 0644); MODULE_PARM_DESC(usbfs_memory_mb, "maximum MB allowed for usbfs buffers (0 = no limit)"); +/* Hard limit, necessary to avoid arithmetic overflow */ +#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000) + static atomic64_t usbfs_memory_usage; /* Total memory currently allocated */ /* Check whether it's okay to allocate more memory for a transfer */ @@ -1459,6 +1462,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb USBDEVFS_URB_ZERO_PACKET | USBDEVFS_URB_NO_INTERRUPT)) return -EINVAL; + if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX) + return -EINVAL; if (uurb->buffer_length > 0 && !uurb->buffer) return -EINVAL; if (!(uurb->type == USBDEVFS_URB_TYPE_CONTROL && -- GitLab From 20c315ebabc5a63b0bb169ddd60455ee0b70633e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 23 Nov 2017 16:39:52 +0100 Subject: [PATCH 0150/1398] USB: usbfs: Filter flags passed in from user space commit 446f666da9f019ce2ffd03800995487e79a91462 upstream. USBDEVFS_URB_ISO_ASAP must be accepted only for ISO endpoints. Improve sanity checking. Reported-by: Andrey Konovalov Signed-off-by: Oliver Neukum Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6790a98bffdb..893ebae51029 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1454,14 +1454,18 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb int number_of_packets = 0; unsigned int stream_id = 0; void *buf; - - if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP | - USBDEVFS_URB_SHORT_NOT_OK | + unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK | USBDEVFS_URB_BULK_CONTINUATION | USBDEVFS_URB_NO_FSBR | USBDEVFS_URB_ZERO_PACKET | - USBDEVFS_URB_NO_INTERRUPT)) - return -EINVAL; + USBDEVFS_URB_NO_INTERRUPT; + /* USBDEVFS_URB_ISO_ASAP is a special case */ + if (uurb->type == USBDEVFS_URB_TYPE_ISO) + mask |= USBDEVFS_URB_ISO_ASAP; + + if (uurb->flags & ~mask) + return -EINVAL; + if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX) return -EINVAL; if (uurb->buffer_length > 0 && !uurb->buffer) -- GitLab From 08dd03812e8b1dba97eb00092a8cfbe8a7ebc13c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 7 Nov 2017 16:45:04 +0000 Subject: [PATCH 0151/1398] usb: host: fix incorrect updating of offset commit 1d5a31582ef046d3b233f0da1a68ae26519b2f0a upstream. The variable temp is incorrectly being updated, instead it should be offset otherwise the loop just reads the same capability value and loops forever. Thanks to Alan Stern for pointing out the correct fix to my original fix. Fix also cleans up clang warning: drivers/usb/host/ehci-dbg.c:840:4: warning: Value stored to 'temp' is never read Fixes: d49d43174400 ("USB: misc ehci updates") Signed-off-by: Colin Ian King Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 1a2614aae42c..3ff6468a1f5f 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -837,7 +837,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf) default: /* unknown */ break; } - temp = (cap >> 8) & 0xff; + offset = (cap >> 8) & 0xff; } } #endif -- GitLab From 3671e6728f796dee5c557f0782725eeb6a9ad721 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 11 May 2017 13:58:06 +0200 Subject: [PATCH 0152/1398] xen-netfront: avoid crashing on resume after a failure in talk_to_netback() commit d86b5672b1adb98b4cdd6fbf0224bbfb03db6e2e upstream. Unavoidable crashes in netfront_resume() and netback_changed() after a previous fail in talk_to_netback() (e.g. when we fail to read MAC from xenstore) were discovered. The failure path in talk_to_netback() does unregister/free for netdev but we don't reset drvdata and we try accessing it after resume. Fix the bug by removing the whole xen device completely with device_unregister(), this guarantees we won't have any calls into netfront after a failure. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 265e2095f8d9..8d498a997e25 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1958,8 +1958,7 @@ static int talk_to_netback(struct xenbus_device *dev, xennet_disconnect_backend(info); xennet_destroy_queues(info); out: - unregister_netdev(info->netdev); - xennet_free_netdev(info->netdev); + device_unregister(&dev->dev); return err; } -- GitLab From 3781db07c79e7e4c5273c902f9c4718795866a1c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 9 Dec 2017 22:01:57 +0100 Subject: [PATCH 0153/1398] Linux 4.9.68 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70546af61a0a..dfe17af517b2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 67 +SUBLEVEL = 68 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From fb66dc2a6e5ec417ab7f6df01531aa1ccc9827f8 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 18 Sep 2017 00:10:09 +0100 Subject: [PATCH 0154/1398] ANDROID: dma-buf/sw_sync: Rename active_list to link Upstream commit d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists") renamed sync_pt -> active_list to -> link and we run into folowing build error: drivers/dma-buf/sw_sync.c:176:19: error: 'struct sync_pt' has no member named 'active_list' Reported at KernelCI: https://kernelci.org/build/android/branch/android-4.9/kernel/ASB-2017-11-06_4.9-o-release-1566-gfdeec8fdb714/ Fixes: Change-Id: I05c34dcf74438c28405438c7ead0706b1f810fff ("CHROMIUM: android: fix warning when releasing active sync point") Signed-off-by: Amit Pundir --- drivers/dma-buf/sw_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 0b3aafe41500..9dc86d3303ae 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -173,7 +173,7 @@ static void timeline_fence_disable_signaling(struct fence *fence) { struct sync_pt *pt = container_of(fence, struct sync_pt, base); - list_del_init(&pt->active_list); + list_del_init(&pt->link); } static void timeline_fence_value_str(struct fence *fence, -- GitLab From fbe9a14180d6a57badfbca5986b1539db330a554 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 13 Nov 2017 17:59:18 +0900 Subject: [PATCH 0155/1398] usb: gadget: udc: renesas_usb3: fix number of the pipes commit a58204ab91ad8cae4d8474aa0ba5d1fc504860c9 upstream. This controller on R-Car Gen3 has 6 pipes that included PIPE 0 for control actually. But, the datasheet has error in writing as it has 31 pipes. (However, the previous code defined 30 pipes wrongly...) Anyway, this patch fixes it. Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index bb89e24c48b4..2197a50ed2ab 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -222,7 +222,7 @@ #define USB3_EP0_SS_MAX_PACKET_SIZE 512 #define USB3_EP0_HSFS_MAX_PACKET_SIZE 64 #define USB3_EP0_BUF_SIZE 8 -#define USB3_MAX_NUM_PIPES 30 +#define USB3_MAX_NUM_PIPES 6 /* This includes PIPE 0 */ #define USB3_WAIT_US 3 struct renesas_usb3; -- GitLab From 6a3a713880a5892cf7e169f397dc26d2352b0e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=A4bler?= Date: Mon, 20 Nov 2017 14:45:15 +0100 Subject: [PATCH 0156/1398] can: ti_hecc: Fix napi poll return value for repoll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f6c23b174c3c96616514827407769cbcfc8005cf upstream. After commit d75b1ade567f ("net: less interrupt masking in NAPI") napi repoll is done only when work_done == budget. So we need to return budget if there are still packets to receive. Signed-off-by: Oliver Stäbler Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/ti_hecc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 6749b1829469..4d01d7bc24ef 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -652,6 +652,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota) mbx_mask = hecc_read(priv, HECC_CANMIM); mbx_mask |= HECC_TX_MBOX_MASK; hecc_write(priv, HECC_CANMIM, mbx_mask); + } else { + /* repoll is done only if whole budget is used */ + num_pkts = quota; } return num_pkts; -- GitLab From f89682bb3cf20c373d19cbbdb2d1ddd65024fbf5 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:26 +0100 Subject: [PATCH 0157/1398] can: kvaser_usb: free buf in error paths commit 435019b48033138581a6171093b181fc6b4d3d30 upstream. The allocated buffer was not freed if usb_submit_urb() failed. Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 4224e066cb16..c427e19d07aa 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -813,6 +813,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, if (err) { netdev_err(netdev, "Error transmitting URB\n"); usb_unanchor_urb(urb); + kfree(buf); usb_free_urb(urb); return err; } @@ -1768,6 +1769,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); usb_unanchor_urb(urb); + kfree(buf); stats->tx_dropped++; -- GitLab From 561384a9cf1713b1aa9c4e47ca25c214050bb7db Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:27 +0100 Subject: [PATCH 0158/1398] can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback() commit e84f44eb5523401faeb9cc1c97895b68e3cfb78d upstream. The conditon in the while-loop becomes true when actual_length is less than 2 (MSG_HEADER_LEN). In best case we end up with a former, already dispatched msg, that got msg->len greater than actual_length. This will result in a "Format error" error printout. Problem seen when unplugging a Kvaser USB device connected to a vbox guest. warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index c427e19d07aa..c0d03b6b4307 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1334,7 +1334,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - while (pos <= urb->actual_length - MSG_HEADER_LEN) { + while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) { msg = urb->transfer_buffer + pos; /* The Kvaser firmware can only read and write messages that -- GitLab From d724a677f0c1dfcfea43985f54a0a65429f6199f Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:28 +0100 Subject: [PATCH 0159/1398] can: kvaser_usb: ratelimit errors if incomplete messages are received commit 8bd13bd522ff7dfa0eb371921aeb417155f7a3be upstream. Avoid flooding the kernel log with "Formate error", if incomplete message are received. Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index c0d03b6b4307..e719b907745e 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -609,8 +609,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id, } if (pos + tmp->len > actual_len) { - dev_err(dev->udev->dev.parent, - "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } @@ -1353,7 +1353,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) } if (pos + msg->len > urb->actual_length) { - dev_err(dev->udev->dev.parent, "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } -- GitLab From 4665483e4797f86e0d4286f7ca8d73353046f073 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:49 -0800 Subject: [PATCH 0160/1398] can: kvaser_usb: cancel urb on -EPIPE and -EPROTO commit 6aa8d5945502baf4687d80de59b7ac865e9e666b upstream. In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index e719b907745e..c9d61a6dfb7a 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1326,6 +1326,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) case 0: break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; default: -- GitLab From d3b72254789fd96701ef06778a3fc5c4a3d24cbe Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:47 -0800 Subject: [PATCH 0161/1398] can: ems_usb: cancel urb on -EPIPE and -EPROTO commit bd352e1adfe0d02d3ea7c8e3fb19183dc317e679 upstream. In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/ems_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b3d02759c226..b00358297424 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -288,6 +288,8 @@ static void ems_usb_read_interrupt_callback(struct urb *urb) case -ECONNRESET: /* unlink */ case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; -- GitLab From 0cdac09e6aeefed8058e913943c7c8765017afdc Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:48 -0800 Subject: [PATCH 0162/1398] can: esd_usb2: cancel urb on -EPIPE and -EPROTO commit 7a31ced3de06e9878e4f9c3abe8f87d9344d8144 upstream. In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/esd_usb2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 9fdb0f0bfa06..c6dcf93675c0 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -393,6 +393,8 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; -- GitLab From 4b4e6f675846c0700fae441f30bcb18dca074af7 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:50 -0800 Subject: [PATCH 0163/1398] can: usb_8dev: cancel urb on -EPIPE and -EPROTO commit 12147edc434c9e4c7c2f5fee2e5519b2e5ac34ce upstream. In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/usb_8dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index d000cb62d6ae..27861c417c94 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -524,6 +524,8 @@ static void usb_8dev_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; -- GitLab From 893fab0c338b16724fc67bcbd3b95f5bc97108f6 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Wed, 29 Nov 2017 09:23:01 +0800 Subject: [PATCH 0164/1398] virtio: release virtio index when fail to device_register commit e60ea67bb60459b95a50a156296041a13e0e380e upstream. index can be reused by other virtio device. Signed-off-by: weiping zhang Reviewed-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7062bb0975a5..462e183609b6 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -323,6 +323,8 @@ int register_virtio_device(struct virtio_device *dev) /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); + if (err) + ida_simple_remove(&virtio_index_ida, dev->index); out: if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); -- GitLab From 3aa6d7f8542e1c78bd0072dc4a668cb476360e56 Mon Sep 17 00:00:00 2001 From: Paul Meyer Date: Tue, 14 Nov 2017 13:06:47 -0700 Subject: [PATCH 0165/1398] hv: kvp: Avoid reading past allocated blocks from KVP file commit 297d6b6e56c2977fc504c61bbeeaa21296923f89 upstream. While reading in more than one block (50) of KVP records, the allocation goes per block, but the reads used the total number of allocated records (without resetting the pointer/stream). This causes the records buffer to overrun when the refresh reads more than one block over the previous capacity (e.g. reading more than 100 KVP records whereas the in-memory database was empty before). Fix this by reading the correct number of KVP records from file each time. Signed-off-by: Paul Meyer Signed-off-by: Long Li Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- tools/hv/hv_kvp_daemon.c | 70 ++++++++-------------------------------- 1 file changed, 14 insertions(+), 56 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index bc7adb84e679..60a94b3e532e 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -193,11 +193,14 @@ static void kvp_update_mem_state(int pool) for (;;) { readp = &record[records_read]; records_read += fread(readp, sizeof(struct kvp_record), - ENTRIES_PER_BLOCK * num_blocks, - filep); + ENTRIES_PER_BLOCK * num_blocks - records_read, + filep); if (ferror(filep)) { - syslog(LOG_ERR, "Failed to read file, pool: %d", pool); + syslog(LOG_ERR, + "Failed to read file, pool: %d; error: %d %s", + pool, errno, strerror(errno)); + kvp_release_lock(pool); exit(EXIT_FAILURE); } @@ -210,6 +213,7 @@ static void kvp_update_mem_state(int pool) if (record == NULL) { syslog(LOG_ERR, "malloc failed"); + kvp_release_lock(pool); exit(EXIT_FAILURE); } continue; @@ -224,15 +228,11 @@ static void kvp_update_mem_state(int pool) fclose(filep); kvp_release_lock(pool); } + static int kvp_file_init(void) { int fd; - FILE *filep; - size_t records_read; char *fname; - struct kvp_record *record; - struct kvp_record *readp; - int num_blocks; int i; int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; @@ -246,61 +246,19 @@ static int kvp_file_init(void) for (i = 0; i < KVP_POOL_COUNT; i++) { fname = kvp_file_info[i].fname; - records_read = 0; - num_blocks = 1; sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i); fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */); if (fd == -1) return 1; - - filep = fopen(fname, "re"); - if (!filep) { - close(fd); - return 1; - } - - record = malloc(alloc_unit * num_blocks); - if (record == NULL) { - fclose(filep); - close(fd); - return 1; - } - for (;;) { - readp = &record[records_read]; - records_read += fread(readp, sizeof(struct kvp_record), - ENTRIES_PER_BLOCK, - filep); - - if (ferror(filep)) { - syslog(LOG_ERR, "Failed to read file, pool: %d", - i); - exit(EXIT_FAILURE); - } - - if (!feof(filep)) { - /* - * We have more data to read. - */ - num_blocks++; - record = realloc(record, alloc_unit * - num_blocks); - if (record == NULL) { - fclose(filep); - close(fd); - return 1; - } - continue; - } - break; - } kvp_file_info[i].fd = fd; - kvp_file_info[i].num_blocks = num_blocks; - kvp_file_info[i].records = record; - kvp_file_info[i].num_records = records_read; - fclose(filep); - + kvp_file_info[i].num_blocks = 1; + kvp_file_info[i].records = malloc(alloc_unit); + if (kvp_file_info[i].records == NULL) + return 1; + kvp_file_info[i].num_records = 0; + kvp_update_mem_state(i); } return 0; -- GitLab From 1db069c057a1a35adde371547be223f4af6458eb Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 8 Nov 2017 10:23:11 -0500 Subject: [PATCH 0166/1398] isa: Prevent NULL dereference in isa_bus driver callbacks commit 5a244727f428a06634f22bb890e78024ab0c89f3 upstream. The isa_driver structure for an isa_bus device is stored in the device platform_data member of the respective device structure. This platform_data member may be reset to NULL if isa_driver match callback for the device fails, indicating a device unsupported by the ISA driver. This patch fixes a possible NULL pointer dereference if one of the isa_driver callbacks to attempted for an unsupported device. This error should not occur in practice since ISA devices are typically manually configured and loaded by the users, but we may as well prevent this error from popping up for the 0day testers. Fixes: a5117ba7da37 ("[PATCH] Driver model: add ISA bus") Signed-off-by: William Breathitt Gray Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/isa.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/base/isa.c b/drivers/base/isa.c index cd6ccdcf9df0..372d10af2600 100644 --- a/drivers/base/isa.c +++ b/drivers/base/isa.c @@ -39,7 +39,7 @@ static int isa_bus_probe(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->probe) + if (isa_driver && isa_driver->probe) return isa_driver->probe(dev, to_isa_dev(dev)->id); return 0; @@ -49,7 +49,7 @@ static int isa_bus_remove(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->remove) + if (isa_driver && isa_driver->remove) return isa_driver->remove(dev, to_isa_dev(dev)->id); return 0; @@ -59,7 +59,7 @@ static void isa_bus_shutdown(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->shutdown) + if (isa_driver && isa_driver->shutdown) isa_driver->shutdown(dev, to_isa_dev(dev)->id); } @@ -67,7 +67,7 @@ static int isa_bus_suspend(struct device *dev, pm_message_t state) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->suspend) + if (isa_driver && isa_driver->suspend) return isa_driver->suspend(dev, to_isa_dev(dev)->id, state); return 0; @@ -77,7 +77,7 @@ static int isa_bus_resume(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->resume) + if (isa_driver && isa_driver->resume) return isa_driver->resume(dev, to_isa_dev(dev)->id); return 0; -- GitLab From 4cb4d78c57f8ba642b5071d439d71217291d9e71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Nov 2017 14:23:37 +0100 Subject: [PATCH 0167/1398] scsi: dma-mapping: always provide dma_get_cache_alignment commit 860dd4424f344400b491b212ee4acb3a358ba9d9 upstream. Provide the dummy version of dma_get_cache_alignment that always returns 1 even if CONFIG_HAS_DMA is not set, so that drivers and subsystems can use it without ifdefs. Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 08528afdf58b..704caae69c42 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -659,7 +659,6 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, return ret; } -#ifdef CONFIG_HAS_DMA static inline int dma_get_cache_alignment(void) { #ifdef ARCH_DMA_MINALIGN @@ -667,7 +666,6 @@ static inline int dma_get_cache_alignment(void) #endif return 1; } -#endif /* flags for the coherent memory api */ #define DMA_MEMORY_MAP 0x01 -- GitLab From 326ebe77f0c584e1de142c788f61b447bcd170b3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2017 14:23:38 +0100 Subject: [PATCH 0168/1398] scsi: use dma_get_cache_alignment() as minimum DMA alignment commit 90addc6b3c9cda0146fbd62a08e234c2b224a80c upstream. In non-coherent DMA mode, kernel uses cache flushing operations to maintain I/O coherency, so scsi's block queue should be aligned to the value returned by dma_get_cache_alignment(). Otherwise, If a DMA buffer and a kernel structure share a same cache line, and if the kernel structure has dirty data, cache_invalidate (no writeback) will cause data corruption. Signed-off-by: Huacai Chen [hch: rebased and updated the comment and changelog] Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d8099c7cab00..c7b770075caa 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2041,11 +2041,13 @@ static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) q->limits.cluster = 0; /* - * set a reasonable default alignment on word boundaries: the - * host and device may alter it using - * blk_queue_update_dma_alignment() later. + * Set a reasonable default alignment: The larger of 32-byte (dword), + * which is a common minimum for HBAs, and the minimum DMA alignment, + * which is set by the platform. + * + * Devices that require a bigger alignment can increase it later. */ - blk_queue_dma_alignment(q, 0x03); + blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1); } struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, -- GitLab From 35b4bfbda54b8082e41b472641be9d51144f85ad Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2017 14:23:39 +0100 Subject: [PATCH 0169/1398] scsi: libsas: align sata_device's rps_resp on a cacheline commit c2e8fbf908afd81ad502b567a6639598f92c9b9d upstream. The rps_resp buffer in ata_device is a DMA target, but it isn't explicitly cacheline aligned. Due to this, adjacent fields can be overwritten with stale data from memory on non-coherent architectures. As a result, the kernel is sometimes unable to communicate with an SATA device behind a SAS expander. Fix this by ensuring that the rps_resp buffer is cacheline aligned. This issue is similar to that fixed by Commit 84bda12af31f93 ("libata: align ap->sector_buf") and Commit 4ee34ea3a12396f35b26 ("libata: Align ata_device's id on a cacheline"). Signed-off-by: Huacai Chen Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- include/scsi/libsas.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index dae99d7d2bc0..706a7017885c 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -165,11 +165,11 @@ struct expander_device { struct sata_device { unsigned int class; - struct smp_resp rps_resp; /* report_phy_sata_resp */ u8 port_no; /* port number, if this is a PM (Port) */ struct ata_port *ap; struct ata_host ata_host; + struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */ u8 fis[ATA_RESP_FIS_SIZE]; }; -- GitLab From 29c3b7a85409b584e35f4be69f71fa206eeaece0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Dec 2017 09:50:08 +0000 Subject: [PATCH 0170/1398] efi: Move some sysfs files to be read-only by root commit af97a77bc01ce49a466f9d4c0125479e2e2230b6 upstream. Thanks to the scripts/leaking_addresses.pl script, it was found that some EFI values should not be readable by non-root users. So make them root-only, and to do that, add a __ATTR_RO_MODE() macro to make this easier, and use it in other places at the same time. Reported-by: Linus Torvalds Tested-by: Dave Young Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ard Biesheuvel Cc: H. Peter Anvin Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20171206095010.24170-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 3 +-- drivers/firmware/efi/esrt.c | 15 ++++++--------- drivers/firmware/efi/runtime-map.c | 10 +++++----- include/linux/sysfs.h | 6 ++++++ 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index a4944e22f294..2f48f848865f 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -120,8 +120,7 @@ static ssize_t systab_show(struct kobject *kobj, return str - buf; } -static struct kobj_attribute efi_attr_systab = - __ATTR(systab, 0400, systab_show, NULL); +static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); #define EFI_FIELD(var) efi.var diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 14914074f716..2c02572ed41e 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -106,7 +106,7 @@ static const struct sysfs_ops esre_attr_ops = { }; /* Generic ESRT Entry ("ESRE") support. */ -static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) +static ssize_t fw_class_show(struct esre_entry *entry, char *buf) { char *str = buf; @@ -117,18 +117,16 @@ static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) return str - buf; } -static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400, - esre_fw_class_show, NULL); +static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400); #define esre_attr_decl(name, size, fmt) \ -static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \ +static ssize_t name##_show(struct esre_entry *entry, char *buf) \ { \ return sprintf(buf, fmt "\n", \ le##size##_to_cpu(entry->esre.esre1->name)); \ } \ \ -static struct esre_attribute esre_##name = __ATTR(name, 0400, \ - esre_##name##_show, NULL) +static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400) esre_attr_decl(fw_type, 32, "%u"); esre_attr_decl(fw_version, 32, "%u"); @@ -193,14 +191,13 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) /* support for displaying ESRT fields at the top level */ #define esrt_attr_decl(name, size, fmt) \ -static ssize_t esrt_##name##_show(struct kobject *kobj, \ +static ssize_t name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf)\ { \ return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \ } \ \ -static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \ - esrt_##name##_show, NULL) +static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400) esrt_attr_decl(fw_resource_count, 32, "%u"); esrt_attr_decl(fw_resource_count_max, 32, "%u"); diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 8e64b77aeac9..f377609ff141 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -63,11 +63,11 @@ static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, return map_attr->show(entry, buf); } -static struct map_attribute map_type_attr = __ATTR_RO(type); -static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr); -static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr); -static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages); -static struct map_attribute map_attribute_attr = __ATTR_RO(attribute); +static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400); +static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400); +static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400); +static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400); +static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400); /* * These are default attributes that are added for every memmap entry. diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c6f0f0d0e17e..00a1f330f93a 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -116,6 +116,12 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RO_MODE(_name, _mode) { \ + .attr = { .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .show = _name##_show, \ +} + #define __ATTR_WO(_name) { \ .attr = { .name = __stringify(_name), .mode = S_IWUSR }, \ .store = _name##_store, \ -- GitLab From 8b5106e1d22c6a57b62c71edd77d491203de3b8b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Dec 2017 09:50:09 +0000 Subject: [PATCH 0171/1398] efi/esrt: Use memunmap() instead of kfree() to free the remapping commit 89c5a2d34bda58319e3075e8e7dd727ea25a435c upstream. The remapping result of memremap() should be freed with memunmap(), not kfree(). Signed-off-by: Pan Bian Signed-off-by: Ard Biesheuvel Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20171206095010.24170-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/esrt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 2c02572ed41e..307ec1c11276 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -428,7 +428,7 @@ static int __init esrt_sysfs_init(void) err_remove_esrt: kobject_put(esrt_kobj); err: - kfree(esrt); + memunmap(esrt); esrt = NULL; return error; } -- GitLab From 608845ad4986c3022675c748a69cabdbacd2d4a6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Dec 2017 15:13:27 +0000 Subject: [PATCH 0172/1398] ASN.1: fix out-of-bounds read when parsing indefinite length item commit e0058f3a874ebb48b25be7ff79bc3b4e59929f90 upstream. In asn1_ber_decoder(), indefinitely-sized ASN.1 items were being passed to the action functions before their lengths had been computed, using the bogus length of 0x80 (ASN1_INDEFINITE_LENGTH). This resulted in reading data past the end of the input buffer, when given a specially crafted message. Fix it by rearranging the code so that the indefinite length is resolved before the action is called. This bug was originally found by fuzzing the X.509 parser in userspace using libFuzzer from the LLVM project. KASAN report (cleaned up slightly): BUG: KASAN: slab-out-of-bounds in memcpy ./include/linux/string.h:341 [inline] BUG: KASAN: slab-out-of-bounds in x509_fabricate_name.constprop.1+0x1a4/0x940 crypto/asymmetric_keys/x509_cert_parser.c:366 Read of size 128 at addr ffff880035dd9eaf by task keyctl/195 CPU: 1 PID: 195 Comm: keyctl Not tainted 4.14.0-09238-g1d3b78bbc6e9 #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0xd1/0x175 lib/dump_stack.c:53 print_address_description+0x78/0x260 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x23f/0x350 mm/kasan/report.c:409 memcpy+0x1f/0x50 mm/kasan/kasan.c:302 memcpy ./include/linux/string.h:341 [inline] x509_fabricate_name.constprop.1+0x1a4/0x940 crypto/asymmetric_keys/x509_cert_parser.c:366 asn1_ber_decoder+0xb4a/0x1fd0 lib/asn1_decoder.c:447 x509_cert_parse+0x1c7/0x620 crypto/asymmetric_keys/x509_cert_parser.c:89 x509_key_preparse+0x61/0x750 crypto/asymmetric_keys/x509_public_key.c:174 asymmetric_key_preparse+0xa4/0x150 crypto/asymmetric_keys/asymmetric_type.c:388 key_create_or_update+0x4d4/0x10a0 security/keys/key.c:850 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xe8/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Allocated by task 195: __do_kmalloc_node mm/slab.c:3675 [inline] __kmalloc_node+0x47/0x60 mm/slab.c:3682 kvmalloc ./include/linux/mm.h:540 [inline] SYSC_add_key security/keys/keyctl.c:104 [inline] SyS_add_key+0x19e/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 42d5ec27f873 ("X.509: Add an ASN.1 decoder") Reported-by: Alexander Potapenko Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- lib/asn1_decoder.c | 47 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 1ef0cec38d78..d77cdfc4b554 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -313,42 +313,47 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, /* Decide how to handle the operation */ switch (op) { - case ASN1_OP_MATCH_ANY_ACT: - case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: - case ASN1_OP_COND_MATCH_ANY_ACT: - case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: - ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len); - if (ret < 0) - return ret; - goto skip_data; - - case ASN1_OP_MATCH_ACT: - case ASN1_OP_MATCH_ACT_OR_SKIP: - case ASN1_OP_COND_MATCH_ACT_OR_SKIP: - ret = actions[machine[pc + 2]](context, hdr, tag, data + dp, len); - if (ret < 0) - return ret; - goto skip_data; - case ASN1_OP_MATCH: case ASN1_OP_MATCH_OR_SKIP: + case ASN1_OP_MATCH_ACT: + case ASN1_OP_MATCH_ACT_OR_SKIP: case ASN1_OP_MATCH_ANY: case ASN1_OP_MATCH_ANY_OR_SKIP: + case ASN1_OP_MATCH_ANY_ACT: + case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_OR_SKIP: + case ASN1_OP_COND_MATCH_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_ANY: case ASN1_OP_COND_MATCH_ANY_OR_SKIP: - skip_data: + case ASN1_OP_COND_MATCH_ANY_ACT: + case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: + if (!(flags & FLAG_CONS)) { if (flags & FLAG_INDEFINITE_LENGTH) { + size_t tmp = dp; + ret = asn1_find_indefinite_length( - data, datalen, &dp, &len, &errmsg); + data, datalen, &tmp, &len, &errmsg); if (ret < 0) goto error; - } else { - dp += len; } pr_debug("- LEAF: %zu\n", len); } + + if (op & ASN1_OP_MATCH__ACT) { + unsigned char act; + + if (op & ASN1_OP_MATCH__ANY) + act = machine[pc + 1]; + else + act = machine[pc + 2]; + ret = actions[act](context, hdr, tag, data + dp, len); + if (ret < 0) + return ret; + } + + if (!(flags & FLAG_CONS)) + dp += len; pc += asn1_op_lengths[op]; goto next_op; -- GitLab From 44acfcb6dd7368ad500cf79397f7c1805089a94a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Dec 2017 15:13:27 +0000 Subject: [PATCH 0173/1398] ASN.1: check for error from ASN1_OP_END__ACT actions commit 81a7be2cd69b412ab6aeacfe5ebf1bb6e5bce955 upstream. asn1_ber_decoder() was ignoring errors from actions associated with the opcodes ASN1_OP_END_SEQ_ACT, ASN1_OP_END_SET_ACT, ASN1_OP_END_SEQ_OF_ACT, and ASN1_OP_END_SET_OF_ACT. In practice, this meant the pkcs7_note_signed_info() action (since that was the only user of those opcodes). Fix it by checking for the error, just like the decoder does for actions associated with the other opcodes. This bug allowed users to leak slab memory by repeatedly trying to add a specially crafted "pkcs7_test" key (requires CONFIG_PKCS7_TEST_KEY). In theory, this bug could also be used to bypass module signature verification, by providing a PKCS#7 message that is misparsed such that a signature's ->authattrs do not contain its ->msgdigest. But it doesn't seem practical in normal cases, due to restrictions on the format of the ->authattrs. Fixes: 42d5ec27f873 ("X.509: Add an ASN.1 decoder") Signed-off-by: Eric Biggers Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: Greg Kroah-Hartman --- lib/asn1_decoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index d77cdfc4b554..dc14beae2c9a 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -439,6 +439,8 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, else act = machine[pc + 1]; ret = actions[act](context, hdr, 0, data + tdp, len); + if (ret < 0) + return ret; } pc += asn1_op_lengths[op]; goto next_op; -- GitLab From 982707eb4ff84d4ae21618c02dd8926801b10a07 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Dec 2017 15:13:27 +0000 Subject: [PATCH 0174/1398] KEYS: add missing permission check for request_key() destination commit 4dca6ea1d9432052afb06baf2e3ae78188a4410b upstream. When the request_key() syscall is not passed a destination keyring, it links the requested key (if constructed) into the "default" request-key keyring. This should require Write permission to the keyring. However, there is actually no permission check. This can be abused to add keys to any keyring to which only Search permission is granted. This is because Search permission allows joining the keyring. keyctl_set_reqkey_keyring(KEY_REQKEY_DEFL_SESSION_KEYRING) then will set the default request-key keyring to the session keyring. Then, request_key() can be used to add keys to the keyring. Both negatively and positively instantiated keys can be added using this method. Adding negative keys is trivial. Adding a positive key is a bit trickier. It requires that either /sbin/request-key positively instantiates the key, or that another thread adds the key to the process keyring at just the right time, such that request_key() misses it initially but then finds it in construct_alloc_key(). Fix this bug by checking for Write permission to the keyring in construct_get_dest_keyring() when the default keyring is being used. We don't do the permission check for non-default keyrings because that was already done by the earlier call to lookup_user_key(). Also, request_key_and_link() is currently passed a 'struct key *' rather than a key_ref_t, so the "possessed" bit is unavailable. We also don't do the permission check for the "requestor keyring", to continue to support the use case described by commit 8bbf4976b59f ("KEYS: Alter use of key instantiation link-to-keyring argument") where /sbin/request-key recursively calls request_key() to add keys to the original requestor's destination keyring. (I don't know of any users who actually do that, though...) Fixes: 3e30148c3d52 ("[PATCH] Keys: Make request-key create an authorisation key") Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- security/keys/request_key.c | 46 +++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 5030fcf23681..cb7f8f730c6d 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -250,11 +250,12 @@ static int construct_key(struct key *key, const void *callout_info, * The keyring selected is returned with an extra reference upon it which the * caller must release. */ -static void construct_get_dest_keyring(struct key **_dest_keyring) +static int construct_get_dest_keyring(struct key **_dest_keyring) { struct request_key_auth *rka; const struct cred *cred = current_cred(); struct key *dest_keyring = *_dest_keyring, *authkey; + int ret; kenter("%p", dest_keyring); @@ -263,6 +264,8 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) /* the caller supplied one */ key_get(dest_keyring); } else { + bool do_perm_check = true; + /* use a default keyring; falling through the cases until we * find one that we actually have */ switch (cred->jit_keyring) { @@ -277,8 +280,10 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) dest_keyring = key_get(rka->dest_keyring); up_read(&authkey->sem); - if (dest_keyring) + if (dest_keyring) { + do_perm_check = false; break; + } } case KEY_REQKEY_DEFL_THREAD_KEYRING: @@ -313,11 +318,29 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) default: BUG(); } + + /* + * Require Write permission on the keyring. This is essential + * because the default keyring may be the session keyring, and + * joining a keyring only requires Search permission. + * + * However, this check is skipped for the "requestor keyring" so + * that /sbin/request-key can itself use request_key() to add + * keys to the original requestor's destination keyring. + */ + if (dest_keyring && do_perm_check) { + ret = key_permission(make_key_ref(dest_keyring, 1), + KEY_NEED_WRITE); + if (ret) { + key_put(dest_keyring); + return ret; + } + } } *_dest_keyring = dest_keyring; kleave(" [dk %d]", key_serial(dest_keyring)); - return; + return 0; } /* @@ -443,11 +466,15 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, if (ctx->index_key.type == &key_type_keyring) return ERR_PTR(-EPERM); - user = key_user_lookup(current_fsuid()); - if (!user) - return ERR_PTR(-ENOMEM); + ret = construct_get_dest_keyring(&dest_keyring); + if (ret) + goto error; - construct_get_dest_keyring(&dest_keyring); + user = key_user_lookup(current_fsuid()); + if (!user) { + ret = -ENOMEM; + goto error_put_dest_keyring; + } ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key); key_user_put(user); @@ -462,7 +489,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, } else if (ret == -EINPROGRESS) { ret = 0; } else { - goto couldnt_alloc_key; + goto error_put_dest_keyring; } key_put(dest_keyring); @@ -472,8 +499,9 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); -couldnt_alloc_key: +error_put_dest_keyring: key_put(dest_keyring); +error: kleave(" = %d", ret); return ERR_PTR(ret); } -- GitLab From d4f134700c2071aa7f5c3c99e6d8f4aac2b3d880 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Dec 2017 15:13:27 +0000 Subject: [PATCH 0175/1398] X.509: reject invalid BIT STRING for subjectPublicKey commit 0f30cbea005bd3077bd98cd29277d7fc2699c1da upstream. Adding a specially crafted X.509 certificate whose subjectPublicKey ASN.1 value is zero-length caused x509_extract_key_data() to set the public key size to SIZE_MAX, as it subtracted the nonexistent BIT STRING metadata byte. Then, x509_cert_parse() called kmemdup() with that bogus size, triggering the WARN_ON_ONCE() in kmalloc_slab(). This appears to be harmless, but it still must be fixed since WARNs are never supposed to be user-triggerable. Fix it by updating x509_cert_parse() to validate that the value has a BIT STRING metadata byte, and that the byte is 0 which indicates that the number of bits in the bitstring is a multiple of 8. It would be nice to handle the metadata byte in asn1_ber_decoder() instead. But that would be tricky because in the general case a BIT STRING could be implicitly tagged, and/or could legitimately have a length that is not a whole number of bytes. Here was the WARN (cleaned up slightly): WARNING: CPU: 1 PID: 202 at mm/slab_common.c:971 kmalloc_slab+0x5d/0x70 mm/slab_common.c:971 Modules linked in: CPU: 1 PID: 202 Comm: keyctl Tainted: G B 4.14.0-09238-g1d3b78bbc6e9 #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 task: ffff880033014180 task.stack: ffff8800305c8000 Call Trace: __do_kmalloc mm/slab.c:3706 [inline] __kmalloc_track_caller+0x22/0x2e0 mm/slab.c:3726 kmemdup+0x17/0x40 mm/util.c:118 kmemdup include/linux/string.h:414 [inline] x509_cert_parse+0x2cb/0x620 crypto/asymmetric_keys/x509_cert_parser.c:106 x509_key_preparse+0x61/0x750 crypto/asymmetric_keys/x509_public_key.c:174 asymmetric_key_preparse+0xa4/0x150 crypto/asymmetric_keys/asymmetric_type.c:388 key_create_or_update+0x4d4/0x10a0 security/keys/key.c:850 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xe8/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 42d5ec27f873 ("X.509: Add an ASN.1 decoder") Signed-off-by: Eric Biggers Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/x509_cert_parser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index c80765b211cf..029f7051f2be 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -408,6 +408,8 @@ int x509_extract_key_data(void *context, size_t hdrlen, ctx->cert->pub->pkey_algo = "rsa"; /* Discard the BIT STRING metadata */ + if (vlen < 1 || *(const u8 *)value != 0) + return -EBADMSG; ctx->key = value + 1; ctx->key_size = vlen - 1; return 0; -- GitLab From 3d4696248c089854a3637265e9f8b001880e345d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Dec 2017 15:13:29 +0000 Subject: [PATCH 0176/1398] X.509: fix comparisons of ->pkey_algo commit 54c1fb39fe0495f846539ab765925b008f86801c upstream. ->pkey_algo used to be an enum, but was changed to a string by commit 4e8ae72a75aa ("X.509: Make algo identifiers text instead of enum"). But two comparisons were not updated. Fix them to use strcmp(). This bug broke signature verification in certain configurations, depending on whether the string constants were deduplicated or not. Fixes: 4e8ae72a75aa ("X.509: Make algo identifiers text instead of enum") Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/pkcs7_verify.c | 2 +- crypto/asymmetric_keys/x509_public_key.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 2ffd69769466..5a37962d2199 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -150,7 +150,7 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7, pr_devel("Sig %u: Found cert serial match X.509[%u]\n", sinfo->index, certix); - if (x509->pub->pkey_algo != sinfo->sig->pkey_algo) { + if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) { pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n", sinfo->index); continue; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index fb732296cd36..e16009a8da9c 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -125,7 +125,7 @@ int x509_check_for_self_signed(struct x509_certificate *cert) } ret = -EKEYREJECTED; - if (cert->pub->pkey_algo != cert->sig->pkey_algo) + if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0) goto out; ret = public_key_verify_signature(cert->pub, cert->sig); -- GitLab From 6662a55fa6eb87638eee0012e38815fe642a003b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Dec 2017 15:08:12 +0100 Subject: [PATCH 0177/1398] x86/PCI: Make broadcom_postcore_init() check acpi_disabled commit ddec3bdee05b06f1dda20ded003c3e10e4184cab upstream. acpi_os_get_root_pointer() may return a valid address even if acpi_disabled is set, but the host bridge information from the ACPI tables is not going to be used in that case and the Broadcom host bridge initialization should not be skipped then, So make broadcom_postcore_init() check acpi_disabled too to avoid this issue. Fixes: 6361d72b04d1 (x86/PCI: read Broadcom CNB20LE host bridge info before PCI scan) Reported-by: Dave Hansen Signed-off-by: Rafael J. Wysocki Signed-off-by: Thomas Gleixner Cc: Bjorn Helgaas Cc: Linux PCI Link: https://lkml.kernel.org/r/3186627.pxZj1QbYNg@aspire.rjw.lan Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/broadcom_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index bb461cfd01ab..526536c81ddc 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void) * We should get host bridge information from ACPI unless the BIOS * doesn't support it. */ - if (acpi_os_get_root_pointer()) + if (!acpi_disabled && acpi_os_get_root_pointer()) return 0; #endif -- GitLab From 9cf0eaf88d74e54c48b4f9580332e596b9d04290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 30 Nov 2017 19:05:45 +0100 Subject: [PATCH 0178/1398] KVM: x86: fix APIC page invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b1394e745b9453dcb5b0671c205b770e87dedb87 upstream. Implementation of the unpinned APIC page didn't update the VMCS address cache when invalidation was done through range mmu notifiers. This became a problem when the page notifier was removed. Re-introduce the arch-specific helper and call it from ...range_start. Reported-by: Fabian Grünbichler Fixes: 38b9917350cb ("kvm: vmx: Implement set_apic_access_page_addr") Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2") Reviewed-by: Paolo Bonzini Reviewed-by: Andrea Arcangeli Tested-by: Wanpeng Li Tested-by: Fabian Grünbichler Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/x86.c | 14 ++++++++++++++ virt/kvm/kvm_main.c | 8 ++++++++ 3 files changed, 25 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bdde80731f49..cbd1d44da2d3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1397,4 +1397,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #endif } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02d45296a97c..26b580ad268f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6526,6 +6526,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + unsigned long apic_address; + + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (start <= apic_address && apic_address < end) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f4c6d4f6d2e8..4569fdcab701 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -125,6 +125,11 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) @@ -361,6 +366,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + + kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + srcu_read_unlock(&kvm->srcu, idx); } -- GitLab From 8974b0320cc83b99a86d5fcca51e7c875254ec0d Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 4 Dec 2017 13:11:45 -0500 Subject: [PATCH 0179/1398] btrfs: fix missing error return in btrfs_drop_snapshot commit e19182c0fff451e3744c1107d98f072e7ca377a0 upstream. If btrfs_del_root fails in btrfs_drop_snapshot, we'll pick up the error but then return 0 anyway due to mixing err and ret. Fixes: 79787eaab4612 ("btrfs: replace many BUG_ONs with proper error handling") Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4cff5cc9c93..a29730c44850 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9362,6 +9362,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_del_root(trans, tree_root, &root->root_key); if (ret) { btrfs_abort_transaction(trans, ret); + err = ret; goto out_end_trans; } -- GitLab From 45ddff3ce4e9a3d7e935d5b596686d9e176ed4a9 Mon Sep 17 00:00:00 2001 From: Robb Glasser Date: Tue, 5 Dec 2017 09:16:55 -0800 Subject: [PATCH 0180/1398] ALSA: pcm: prevent UAF in snd_pcm_info commit 362bca57f5d78220f8b5907b875961af9436e229 upstream. When the device descriptor is closed, the `substream->runtime` pointer is freed. But another thread may be in the ioctl handler, case SNDRV_CTL_IOCTL_PCM_INFO. This case calls snd_pcm_info_user() which calls snd_pcm_info() which accesses the now freed `substream->runtime`. Note: this fixes CVE-2017-0861 Signed-off-by: Robb Glasser Signed-off-by: Nick Desaulniers Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 8e980aa678d0..074363b63cc4 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -149,7 +149,9 @@ static int snd_pcm_control_ioctl(struct snd_card *card, err = -ENXIO; goto _error; } + mutex_lock(&pcm->open_mutex); err = snd_pcm_info_user(substream, info); + mutex_unlock(&pcm->open_mutex); _error: mutex_unlock(®ister_mutex); return err; -- GitLab From 20ca63e0968ed1614fa91c836eee8f61d7f54c7e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Nov 2017 10:08:28 +0100 Subject: [PATCH 0181/1398] ALSA: seq: Remove spurious WARN_ON() at timer check commit 43a3542870328601be02fcc9d27b09db467336ef upstream. The use of snd_BUG_ON() in ALSA sequencer timer may lead to a spurious WARN_ON() when a slave timer is deployed as its backend and a corresponding master timer stops meanwhile. The symptom was triggered by syzkaller spontaneously. Since the NULL timer is valid there, rip off snd_BUG_ON(). Reported-by: syzbot Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 37d9cfbc29f9..b80985fbc334 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -355,7 +355,7 @@ static int initialize_timer(struct snd_seq_timer *tmr) unsigned long freq; t = tmr->timeri->timer; - if (snd_BUG_ON(!t)) + if (!t) return -EINVAL; freq = tmr->preferred_resolution; -- GitLab From c40457c952ecd218731a81aab33c422491f557a0 Mon Sep 17 00:00:00 2001 From: Jaejoong Kim Date: Mon, 4 Dec 2017 15:31:48 +0900 Subject: [PATCH 0182/1398] ALSA: usb-audio: Fix out-of-bound error commit 251552a2b0d454badc8f486e6d79100970c744b0 upstream. The snd_usb_copy_string_desc() retrieves the usb string corresponding to the index number through the usb_string(). The problem is that the usb_string() returns the length of the string (>= 0) when successful, but it can also return a negative value about the error case or status of usb_control_msg(). If iClockSource is '0' as shown below, usb_string() will returns -EINVAL. This will result in '0' being inserted into buf[-22], and the following KASAN out-of-bound error message will be output. AudioControl Interface Descriptor: bLength 8 bDescriptorType 36 bDescriptorSubtype 10 (CLOCK_SOURCE) bClockID 1 bmAttributes 0x07 Internal programmable Clock (synced to SOF) bmControls 0x07 Clock Frequency Control (read/write) Clock Validity Control (read-only) bAssocTerminal 0 iClockSource 0 To fix it, check usb_string()'return value and bail out. ================================================================== BUG: KASAN: stack-out-of-bounds in parse_audio_unit+0x1327/0x1960 [snd_usb_audio] Write of size 1 at addr ffff88007e66735a by task systemd-udevd/18376 CPU: 0 PID: 18376 Comm: systemd-udevd Not tainted 4.13.0+ #3 Hardware name: LG Electronics 15N540-RFLGL/White Tip Mountain, BIOS 15N5 Call Trace: dump_stack+0x63/0x8d print_address_description+0x70/0x290 ? parse_audio_unit+0x1327/0x1960 [snd_usb_audio] kasan_report+0x265/0x350 __asan_store1+0x4a/0x50 parse_audio_unit+0x1327/0x1960 [snd_usb_audio] ? save_stack+0xb5/0xd0 ? save_stack_trace+0x1b/0x20 ? save_stack+0x46/0xd0 ? kasan_kmalloc+0xad/0xe0 ? kmem_cache_alloc_trace+0xff/0x230 ? snd_usb_create_mixer+0xb0/0x4b0 [snd_usb_audio] ? usb_audio_probe+0x4de/0xf40 [snd_usb_audio] ? usb_probe_interface+0x1f5/0x440 ? driver_probe_device+0x3ed/0x660 ? build_feature_ctl+0xb10/0xb10 [snd_usb_audio] ? save_stack_trace+0x1b/0x20 ? init_object+0x69/0xa0 ? snd_usb_find_csint_desc+0xa8/0xf0 [snd_usb_audio] snd_usb_mixer_controls+0x1dc/0x370 [snd_usb_audio] ? build_audio_procunit+0x890/0x890 [snd_usb_audio] ? snd_usb_create_mixer+0xb0/0x4b0 [snd_usb_audio] ? kmem_cache_alloc_trace+0xff/0x230 ? usb_ifnum_to_if+0xbd/0xf0 snd_usb_create_mixer+0x25b/0x4b0 [snd_usb_audio] ? snd_usb_create_stream+0x255/0x2c0 [snd_usb_audio] usb_audio_probe+0x4de/0xf40 [snd_usb_audio] ? snd_usb_autosuspend.part.7+0x30/0x30 [snd_usb_audio] ? __pm_runtime_idle+0x90/0x90 ? kernfs_activate+0xa6/0xc0 ? usb_match_one_id_intf+0xdc/0x130 ? __pm_runtime_set_status+0x2d4/0x450 usb_probe_interface+0x1f5/0x440 Signed-off-by: Jaejoong Kim Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9133d3e53d9d..1196bb2bd5ff 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -204,6 +204,10 @@ static int snd_usb_copy_string_desc(struct mixer_build *state, int index, char *buf, int maxlen) { int len = usb_string(state->chip->dev, index, buf, maxlen - 1); + + if (len < 0) + return 0; + buf[len] = 0; return len; } -- GitLab From 173c8c346587cfdca49f73b6cc2ac6500f425112 Mon Sep 17 00:00:00 2001 From: Jaejoong Kim Date: Mon, 4 Dec 2017 15:31:49 +0900 Subject: [PATCH 0183/1398] ALSA: usb-audio: Add check return value for usb_string() commit 89b89d121ffcf8d9546633b98ded9d18b8f75891 upstream. snd_usb_copy_string_desc() returns zero if usb_string() fails. In case of failure, we need to check the snd_usb_copy_string_desc()'s return value and add an exception case Signed-off-by: Jaejoong Kim Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 1196bb2bd5ff..24c897f0b571 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2172,13 +2172,14 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, if (len) ; else if (nameid) - snd_usb_copy_string_desc(state, nameid, kctl->id.name, + len = snd_usb_copy_string_desc(state, nameid, kctl->id.name, sizeof(kctl->id.name)); - else { + else len = get_term_name(state, &state->oterm, kctl->id.name, sizeof(kctl->id.name), 0); - if (!len) - strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + + if (!len) { + strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) append_ctl_name(kctl, " Clock Source"); -- GitLab From e17f2b51617dfa528e97ae81b4ea084f511585a4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 15:14:01 +0100 Subject: [PATCH 0184/1398] iommu/vt-d: Fix scatterlist offset handling commit 29a90b70893817e2f2bb3cea40a29f5308e21b21 upstream. The intel-iommu DMA ops fail to correctly handle scatterlists where sg->offset is greater than PAGE_SIZE - the IOVA allocation is computed appropriately based on the page-aligned portion of the offset, but the mapping is set up relative to sg->page, which means it fails to actually cover the whole buffer (and in the worst case doesn't cover it at all): (sg->dma_address + sg->dma_len) ----+ sg->dma_address ---------+ | iov_pfn------+ | | | | | v v v iova: a b c d e f |--------|--------|--------|--------|--------| <...calculated....> [_____mapped______] pfn: 0 1 2 3 4 5 |--------|--------|--------|--------|--------| ^ ^ ^ | | | sg->page ----+ | | sg->offset --------------+ | (sg->offset + sg->length) ----------+ As a result, the caller ends up overrunning the mapping into whatever lies beyond, which usually goes badly: [ 429.645492] DMAR: DRHD: handling fault status reg 2 [ 429.650847] DMAR: [DMA Write] Request device [02:00.4] fault addr f2682000 ... Whilst this is a fairly rare occurrence, it can happen from the result of intermediate scatterlist processing such as scatterwalk_ffwd() in the crypto layer. Whilst that particular site could be fixed up, it still seems worthwhile to bring intel-iommu in line with other DMA API implementations in handling this robustly. To that end, fix the intel_map_sg() path to line up the mapping correctly (in units of MM pages rather than VT-d pages to match the aligned_nrpages() calculation) regardless of the offset, and use sg_phys() consistently for clarity. Reported-by: Harsh Jain Signed-off-by: Robin Murphy Reviewed by: Ashok Raj Tested by: Jacob Pan Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 002f8a421efa..88bbc8ccc5e3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2245,10 +2245,12 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, uint64_t tmp; if (!sg_res) { + unsigned int pgoff = sg->offset & ~PAGE_MASK; + sg_res = aligned_nrpages(sg->offset, sg->length); - sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; + sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; sg->dma_length = sg->length; - pteval = page_to_phys(sg_page(sg)) | prot; + pteval = (sg_phys(sg) - pgoff) | prot; phys_pfn = pteval >> VTD_PAGE_SHIFT; } @@ -3894,7 +3896,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, for_each_sg(sglist, sg, nelems, i) { BUG_ON(!sg_page(sg)); - sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; + sg->dma_address = sg_phys(sg); sg->dma_length = sg->length; } return nelems; -- GitLab From ff3d4fd5374f62a322dba722f23cb3250dcebb4e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 28 Nov 2017 21:19:53 +0800 Subject: [PATCH 0185/1398] smp/hotplug: Move step CPUHP_AP_SMPCFD_DYING to the correct place commit 46febd37f9c758b05cd25feae8512f22584742fe upstream. Commit 31487f8328f2 ("smp/cfd: Convert core to hotplug state machine") accidently put this step on the wrong place. The step should be at the cpuhp_ap_states[] rather than the cpuhp_bp_states[]. grep smpcfd /sys/devices/system/cpu/hotplug/states 40: smpcfd:prepare 129: smpcfd:dying "smpcfd:dying" was missing before. So was the invocation of the function smpcfd_dying_cpu(). Fixes: 31487f8328f2 ("smp/cfd: Convert core to hotplug state machine") Signed-off-by: Lai Jiangshan Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Sebastian Andrzej Siewior Cc: Boris Ostrovsky Link: https://lkml.kernel.org/r/20171128131954.81229-1-jiangshanlai@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 26a4f74bff83..e1436ca4aed0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1321,11 +1321,6 @@ static struct cpuhp_step cpuhp_bp_states[] = { .teardown.single = NULL, .cant_stop = true, }, - [CPUHP_AP_SMPCFD_DYING] = { - .name = "smpcfd:dying", - .startup.single = NULL, - .teardown.single = smpcfd_dying_cpu, - }, /* * Handled on controll processor until the plugged processor manages * this itself. @@ -1367,6 +1362,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup.single = NULL, .teardown.single = rcutree_dying_cpu, }, + [CPUHP_AP_SMPCFD_DYING] = { + .name = "smpcfd:dying", + .startup.single = NULL, + .teardown.single = smpcfd_dying_cpu, + }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { -- GitLab From bd6a7055b84bf53b2937d4f99005a0f1beadcbe9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 6 Dec 2017 16:11:27 +0100 Subject: [PATCH 0186/1398] s390: fix compat system call table commit e779498df587dd2189b30fe5b9245aefab870eb8 upstream. When wiring up the socket system calls the compat entries were incorrectly set. Not all of them point to the corresponding compat wrapper functions, which clear the upper 33 bits of user space pointers, like it is required. Fixes: 977108f89c989 ("s390: wire up separate socketcalls system calls") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/syscalls.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9b59e6212d8f..709da452413d 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -369,10 +369,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg) SYSCALL(sys_sendmmsg,compat_sys_sendmmsg) SYSCALL(sys_socket,sys_socket) SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */ -SYSCALL(sys_bind,sys_bind) -SYSCALL(sys_connect,sys_connect) +SYSCALL(sys_bind,compat_sys_bind) +SYSCALL(sys_connect,compat_sys_connect) SYSCALL(sys_listen,sys_listen) -SYSCALL(sys_accept4,sys_accept4) +SYSCALL(sys_accept4,compat_sys_accept4) SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */ SYSCALL(sys_setsockopt,compat_sys_setsockopt) SYSCALL(sys_getsockname,compat_sys_getsockname) -- GitLab From ffb17c0cce843d2a07db0765b30ee754c9ef3bec Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 4 Dec 2017 12:19:11 +0100 Subject: [PATCH 0187/1398] KVM: s390: Fix skey emulation permission check commit ca76ec9ca871e67d8cd0b6caba24aca3d3ac4546 upstream. All skey functions call skey_check_enable at their start, which checks if we are in the PSTATE and injects a privileged operation exception if we are. Unfortunately they continue processing afterwards and perform the operation anyhow as skey_check_enable does not deliver an error if the exception injection was successful. Let's move the PSTATE check into the skey functions and exit them on such an occasion, also we now do not enable skey handling anymore in such a case. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Fixes: a7e19ab ("KVM: s390: handle missing storage-key facility") Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/priv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e18435355c16..c2905a10cb37 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -197,8 +197,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return -EAGAIN; } - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); return 0; } @@ -209,6 +207,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -238,6 +239,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -273,6 +277,9 @@ static int handle_sske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; -- GitLab From 8950c982fffd9700ca7ef2fc2586638e47669f19 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 6 Dec 2017 18:21:14 +1000 Subject: [PATCH 0188/1398] powerpc/64s: Initialize ISAv3 MMU registers before setting partition table commit 371b80447ff33ddac392c189cf884a5a3e18faeb upstream. kexec can leave MMU registers set when booting into a new kernel, the PIDR (Process Identification Register) in particular. The boot sequence does not zero PIDR, so it only gets set when CPUs first switch to a userspace processes (until then it's running a kernel thread with effective PID = 0). This leaves a window where a process table entry and page tables are set up due to user processes running on other CPUs, that happen to match with a stale PID. The CPU with that PID may cause speculative accesses that address quadrant 0 (aka userspace addresses), which will result in cached translations and PWC (Page Walk Cache) for that process, on a CPU which is not in the mm_cpumask and so they will not be invalidated properly. The most common result is the kernel hanging in infinite page fault loops soon after kexec (usually in schedule_tail, which is usually the first non-speculative quadrant 0 access to a new PID) due to a stale PWC. However being a stale translation error, it could result in anything up to security and data corruption problems. Fix this by zeroing out PIDR at boot and kexec. Fixes: 7e381c0ff618 ("powerpc/mm/radix: Add mmu context handling callback for radix") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cpu_setup_power.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 7803756998e2..9e05c8828ee2 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -97,6 +97,7 @@ _GLOBAL(__setup_cpu_power9) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 @@ -119,6 +120,7 @@ _GLOBAL(__restore_cpu_power9) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 -- GitLab From 4a70f07da85313aa817c0888d2b623ea30cbbbe5 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Sat, 25 Nov 2017 21:39:25 +0100 Subject: [PATCH 0189/1398] brcmfmac: change driver unbind order of the sdio function devices commit 5c3de777bdaf48bd0cfb43097c0d0fb85056cab7 upstream. In the function brcmf_sdio_firmware_callback() the driver is unbound from the sdio function devices in the error path. However, the order in which it is done resulted in a use-after-free issue (see brcmf_ops_sdio_remove() in bcmsdh.c). Hence change the order and first unbind sdio function #2 device and then unbind sdio function #1 device. Fixes: 7a51461fc2da ("brcmfmac: unbind all devices upon failure in firmware callback") Reported-by: Stefan Wahren Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8e3c6f4bdaa0..edffe5aeeeb1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4080,8 +4080,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err, sdio_release_host(sdiodev->func[1]); fail: brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err); - device_release_driver(dev); device_release_driver(&sdiodev->func[2]->dev); + device_release_driver(dev); } struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) -- GitLab From 30b18ee253a09a36d43a1e126b124296b0d649a7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 2 Mar 2015 14:13:36 +0000 Subject: [PATCH 0190/1398] kdb: Fix handling of kallsyms_symbol_next() return value commit c07d35338081d107e57cf37572d8cc931a8e32e2 upstream. kallsyms_symbol_next() returns a boolean (true on success). Currently kdb_read() tests the return value with an inequality that unconditionally evaluates to true. This is fixed in the obvious way and, since the conditional branch is supposed to be unreachable, we also add a WARN_ON(). Reported-by: Dan Carpenter Signed-off-by: Daniel Thompson Signed-off-by: Jason Wessel Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index fc1ef736253c..77777d918676 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -349,7 +349,7 @@ static char *kdb_read(char *buffer, size_t bufsize) } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (kallsyms_symbol_next(p_tmp, i) < 0) + if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) break; kdb_printf("%s ", p_tmp); *(p_tmp + len) = '\0'; -- GitLab From 4f128c8aa35b81f418617c2eab7edd807908cbf1 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 22 Nov 2017 14:14:47 +0100 Subject: [PATCH 0191/1398] drm/exynos: gem: Drop NONCONTIG flag for buffers allocated without IOMMU commit 120a264f9c2782682027d931d83dcbd22e01da80 upstream. When no IOMMU is available, all GEM buffers allocated by Exynos DRM driver are contiguous, because of the underlying dma_alloc_attrs() function provides only such buffers. In such case it makes no sense to keep BO_NONCONTIG flag for the allocated GEM buffers. This allows to avoid failures for buffer contiguity checks in the subsequent operations on GEM objects. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index f2ae72ba7d5a..2abc47b554ab 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -246,6 +246,15 @@ struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev, if (IS_ERR(exynos_gem)) return exynos_gem; + if (!is_drm_iommu_supported(dev) && (flags & EXYNOS_BO_NONCONTIG)) { + /* + * when no IOMMU is available, all allocated buffers are + * contiguous anyway, so drop EXYNOS_BO_NONCONTIG flag + */ + flags &= ~EXYNOS_BO_NONCONTIG; + DRM_WARN("Non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); + } + /* set memory type and cache attribute from user side. */ exynos_gem->flags = flags; -- GitLab From 730810d9232714ece9410d0e091455ba71624696 Mon Sep 17 00:00:00 2001 From: Laurent Caumont Date: Sat, 11 Nov 2017 12:44:46 -0500 Subject: [PATCH 0192/1398] media: dvb: i2c transfers over usb cannot be done from stack commit 6d33377f2abbf9f0e561b116dd468d1c3ff36a6a upstream. Signed-off-by: Laurent Caumont Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dibusb-common.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index 8207e6900656..bcacb0f22028 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -223,8 +223,20 @@ EXPORT_SYMBOL(dibusb_i2c_algo); int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val) { - u8 wbuf[1] = { offs }; - return dibusb_i2c_msg(d, 0x50, wbuf, 1, val, 1); + u8 *buf; + int rc; + + buf = kmalloc(2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = offs; + + rc = dibusb_i2c_msg(d, 0x50, &buf[0], 1, &buf[1], 1); + *val = buf[1]; + kfree(buf); + + return rc; } EXPORT_SYMBOL(dibusb_read_eeprom_byte); -- GitLab From 63fba9ff76f0ef8af6009903ccc903bd5cee9876 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Thu, 16 Nov 2017 17:58:20 +0000 Subject: [PATCH 0193/1398] arm64: KVM: fix VTTBR_BADDR_MASK BUG_ON off-by-one commit 26aa7b3b1c0fb3f1a6176a0c1847204ef4355693 upstream. VTTBR_BADDR_MASK is used to sanity check the size and alignment of the VTTBR address. It seems to currently be off by one, thereby only allowing up to 47-bit addresses (instead of 48-bit) and also insufficiently checking the alignment. This patch fixes it. As an example, with 4k pages, before this patch we have: PHYS_MASK_SHIFT = 48 VTTBR_X = 37 - 24 = 13 VTTBR_BADDR_SHIFT = 13 - 1 = 12 VTTBR_BADDR_MASK = ((1 << 35) - 1) << 12 = 0x00007ffffffff000 Which is wrong, because the mask doesn't allow bit 47 of the VTTBR address to be set, and only requires the address to be 12-bit (4k) aligned, while it actually needs to be 13-bit (8k) aligned because we concatenate two 4k tables. With this patch, the mask becomes 0x0000ffffffffe000, which is what we want. Fixes: 0369f6a34b9f ("arm64: KVM: EL2 register definitions") Reviewed-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Kristina Martsenko Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_arm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 2a2752b5b6aa..0dbc1c6ab7dc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -170,8 +170,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) -- GitLab From 6ed459712e26acd638b85c4f06dd355960e741c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:21 +0000 Subject: [PATCH 0194/1398] arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one commit 5553b142be11e794ebc0805950b2e8313f93d718 upstream. VTTBR_BADDR_MASK is used to sanity check the size and alignment of the VTTBR address. It seems to currently be off by one, thereby only allowing up to 39-bit addresses (instead of 40-bit) and also insufficiently checking the alignment. This patch fixes it. This patch is the 32bit pendent of Kristina's arm64 fix, and she deserves the actual kudos for pinpointing that one. Fixes: f7ed45be3ba52 ("KVM: ARM: World-switch implementation") Reported-by: Kristina Martsenko Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_arm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index e22089fb44dc..5f5defdad025 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,8 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) -- GitLab From 6ead44d4b5b8b1ecfcbd2302f15028dab7774da3 Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Fri, 1 Dec 2017 10:21:09 -0800 Subject: [PATCH 0195/1398] KVM: VMX: remove I/O port 0x80 bypass on Intel hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d59d51f088014f25c2562de59b9abff4f42a7468 upstream. This fixes CVE-2017-1000407. KVM allows guests to directly access I/O port 0x80 on Intel hosts. If the guest floods this port with writes it generates exceptions and instability in the host kernel, leading to a crash. With this change guest writes to port 0x80 on Intel will behave the same as they currently behave on AMD systems. Prevent the flooding by removing the code that sets port 0x80 as a passthrough port. This is essentially the same as upstream patch 99f85a28a78e96d28907fe036e1671a218fee597, except that patch was for AMD chipsets and this patch is for Intel. Signed-off-by: Andrew Honig Signed-off-by: Jim Mattson Fixes: fdef3ad1b386 ("KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs") Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f0d3de153e29..2e65760e546f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6413,12 +6413,7 @@ static __init int hardware_setup(void) memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); -- GitLab From cf6668d57a26e7d72d12117e9aba0c3c1578bfc5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 3 Dec 2017 23:54:41 +0100 Subject: [PATCH 0196/1398] KVM: arm/arm64: Fix broken GICH_ELRSR big endian conversion commit fc396e066318c0a02208c1d3f0b62950a7714999 upstream. We are incorrectly rearranging 32-bit words inside a 64-bit typed value for big endian systems, which would result in never marking a virtual interrupt as inactive on big endian systems (assuming 32 or fewer LRs on the hardware). Fix this by not doing any word order manipulation for the typed values. Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/hyp/vgic-v2-sr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index c8aeb7b91ec8..95021246ee26 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -77,11 +77,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) else elrsr1 = 0; -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; -#else cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -#endif } static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) -- GitLab From 42c3f4c55f10102b3e522ae99754a9a0ea41235d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:15 +0000 Subject: [PATCH 0197/1398] KVM: arm/arm64: vgic-irqfd: Fix MSI entry allocation commit 150009e2c70cc3c6e97f00e7595055765d32fb85 upstream. Using the size of the structure we're allocating is a good idea and avoids any surprise... In this case, we're happilly confusing kvm_kernel_irq_routing_entry and kvm_irq_routing_entry... Fixes: 95b110ab9a09 ("KVM: arm/arm64: Enable irqchip routing") Reported-by: AKASHI Takahiro Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-irqfd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c63..a26c6773d6df 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -112,8 +112,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) u32 nr = dist->nr_spis; int i, ret; - entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), - GFP_KERNEL); + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; -- GitLab From 7df3dbef3dd5bd1e0a7e1902aab7c5bdd8923a81 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:18 +0000 Subject: [PATCH 0198/1398] KVM: arm/arm64: vgic-its: Check result of allocation before use commit 686f294f2f1ae40705283dd413ca1e4c14f20f93 upstream. We miss a test against NULL after allocation. Fixes: 6d03a68f8054 ("KVM: arm64: vgic-its: Turn device_id validation into generic ID validation") Reported-by: AKASHI Takahiro Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 4660a7d04eea..6af22fe9e082 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -687,6 +687,8 @@ static int vgic_its_alloc_collection(struct vgic_its *its, return E_ITS_MAPC_COLLECTION_OOR; collection = kzalloc(sizeof(*collection), GFP_KERNEL); + if (!collection) + return -ENOMEM; collection->collection_id = coll_id; collection->target_addr = COLLECTION_NOT_MAPPED; -- GitLab From a39224199e97e2f49870933f8a9caaaa1edf736c Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 5 Dec 2017 14:56:42 +0000 Subject: [PATCH 0199/1398] arm64: fpsimd: Prevent registers leaking from dead tasks commit 071b6d4a5d343046f253a5a8835d477d93992002 upstream. Currently, loading of a task's fpsimd state into the CPU registers is skipped if that task's state is already present in the registers of that CPU. However, the code relies on the struct fpsimd_state * (and by extension struct task_struct *) to unambiguously identify a task. There is a particular case in which this doesn't work reliably: when a task exits, its task_struct may be recycled to describe a new task. Consider the following scenario: 1) Task P loads its fpsimd state onto cpu C. per_cpu(fpsimd_last_state, C) := P; P->thread.fpsimd_state.cpu := C; 2) Task X is scheduled onto C and loads its fpsimd state on C. per_cpu(fpsimd_last_state, C) := X; X->thread.fpsimd_state.cpu := C; 3) X exits, causing X's task_struct to be freed. 4) P forks a new child T, which obtains X's recycled task_struct. T == X. T->thread.fpsimd_state.cpu == C (inherited from P). 5) T is scheduled on C. T's fpsimd state is not loaded, because per_cpu(fpsimd_last_state, C) == T (== X) && T->thread.fpsimd_state.cpu == C. (This is the check performed by fpsimd_thread_switch().) So, T gets X's registers because the last registers loaded onto C were those of X, in (2). This patch fixes the problem by ensuring that the sched-in check fails in (5): fpsimd_flush_task_state(T) is called when T is forked, so that T->thread.fpsimd_state.cpu == C cannot be true. This relies on the fact that T is not schedulable until after copy_thread() completes. Once T's fpsimd state has been loaded on some CPU C there may still be other cpus D for which per_cpu(fpsimd_last_state, D) == &X->thread.fpsimd_state. But D is necessarily != C in this case, and the check in (5) must fail. An alternative fix would be to do refcounting on task_struct. This would result in each CPU holding a reference to the last task whose fpsimd state was loaded there. It's not clear whether this is preferable, and it involves higher overhead than the fix proposed in this patch. It would also move all the task_struct freeing work into the context switch critical section, or otherwise some deferred cleanup mechanism would need to be introduced, neither of which seems obviously justified. Fixes: 005f78cd8849 ("arm64: defer reloading a task's FPSIMD state to userland resume") Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel [will: word-smithed the comment so it makes more sense] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/process.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 01753cd7d3f0..0e7394915c70 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -255,6 +255,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); + /* + * In case p was allocated the same task_struct pointer as some + * other recently-exited task, make sure p is disassociated from + * any cpu that may have run that now-exited task recently. + * Otherwise we could erroneously skip reloading the FPSIMD + * registers for p. + */ + fpsimd_flush_task_state(p); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; -- GitLab From a0a2f97d03758c3d08ca68f587fe8062ff768fea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Oct 2017 18:14:13 +0100 Subject: [PATCH 0200/1398] bus: arm-cci: Fix use of smp_processor_id() in preemptible context commit 4608af8aa53e7f3922ddee695d023b7bcd5cb35b upstream. The ARM CCI driver seem to be using smp_processor_id() in a preemptible context, which is likely to make a DEBUG_PREMPT kernel scream at boot time. Turn this into a get_cpu()/put_cpu() that extends over the CPU hotplug registration, making sure that we don't race against a CPU down operation. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Signed-off-by: Pawel Moll Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-cci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 890082315054..10f56133b281 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1755,14 +1755,17 @@ static int cci_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); mutex_init(&cci_pmu->reserve_mutex); atomic_set(&cci_pmu->active_events, 0); - cpumask_set_cpu(smp_processor_id(), &cci_pmu->cpus); + cpumask_set_cpu(get_cpu(), &cci_pmu->cpus); ret = cci_pmu_init(cci_pmu, pdev); - if (ret) + if (ret) { + put_cpu(); return ret; + } cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, &cci_pmu->node); + put_cpu(); pr_info("ARM %s PMU driver probed", cci_pmu->model->name); return 0; } -- GitLab From 595aca74c67a2a6ffa78c3760a0c3027b90c36a7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 27 Aug 2017 11:06:50 +0100 Subject: [PATCH 0201/1398] bus: arm-ccn: Check memory allocation failure commit 24771179c5c138f0ea3ef88b7972979f62f2d5db upstream. Check memory allocation failures and return -ENOMEM in such cases This avoids a potential NULL pointer dereference. Signed-off-by: Christophe JAILLET Acked-by: Scott Branden Signed-off-by: Pawel Moll Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index aee83462b796..45dfcc80523b 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1271,6 +1271,10 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) int len = snprintf(NULL, 0, "ccn_%d", ccn->dt.id); name = devm_kzalloc(ccn->dev, len + 1, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto error_choose_name; + } snprintf(name, len + 1, "ccn_%d", ccn->dt.id); } @@ -1318,6 +1322,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) error_pmu_register: error_set_affinity: +error_choose_name: ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); -- GitLab From 793eed33da5fd9745f8b62e2a93b4b4a3e36caa3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Oct 2017 18:14:12 +0100 Subject: [PATCH 0202/1398] bus: arm-ccn: Fix use of smp_processor_id() in preemptible context commit b18c2b9487d8e797fc0a757e57ac3645348c5fba upstream. Booting a DEBUG_PREEMPT enabled kernel on a CCN-based system results in the following splat: [...] arm-ccn e8000000.ccn: No access to interrupts, using timer. BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is debug_smp_processor_id+0x1c/0x28 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.13.0 #6111 Hardware name: AMD Seattle/Seattle, BIOS 17:08:23 Jun 26 2017 Call trace: [] dump_backtrace+0x0/0x278 [] show_stack+0x24/0x30 [] dump_stack+0x8c/0xb0 [] check_preemption_disabled+0xfc/0x100 [] debug_smp_processor_id+0x1c/0x28 [] arm_ccn_probe+0x358/0x4f0 [...] as we use smp_processor_id() in the wrong context. Turn this into a get_cpu()/put_cpu() that extends over the CPU hotplug registration, making sure that we don't race against a CPU down operation. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Signed-off-by: Pawel Moll Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 45dfcc80523b..96d50e4b38c6 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1301,7 +1301,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) } /* Pick one CPU which we will use to collect data from CCN... */ - cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu); + cpumask_set_cpu(get_cpu(), &ccn->dt.cpu); /* Also make sure that the overflow interrupt is handled by this CPU */ if (ccn->irq) { @@ -1318,10 +1318,12 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, &ccn->dt.node); + put_cpu(); return 0; error_pmu_register: error_set_affinity: + put_cpu(); error_choose_name: ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) -- GitLab From 3f0597ae9a382f5771690005b3d43fb4c868cd99 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 11 Oct 2017 22:33:24 +0100 Subject: [PATCH 0203/1398] bus: arm-ccn: fix module unloading Error: Removing state 147 which has instances left. commit b69f63ebf553504739cc8534cbed31bd530c6f0b upstream. Unregistering the driver before calling cpuhp_remove_multi_state() removes any remaining hotplug cpu instances so __cpuhp_remove_state_cpuslocked() doesn't emit this warning: [ 268.748362] Error: Removing state 147 which has instances left. [ 268.748373] ------------[ cut here ]------------ [ 268.748386] WARNING: CPU: 2 PID: 5476 at kernel/cpu.c:1734 __cpuhp_remove_state_cpuslocked+0x454/0x4f0 [ 268.748389] Modules linked in: arm_ccn(-) [last unloaded: arm_ccn] [ 268.748403] CPU: 2 PID: 5476 Comm: rmmod Tainted: G W 4.14.0-rc4+ #3 [ 268.748406] Hardware name: AMD Seattle/Seattle, BIOS 10:18:39 Dec 8 2016 [ 268.748410] task: ffff8001a18ca000 task.stack: ffff80019c120000 [ 268.748416] PC is at __cpuhp_remove_state_cpuslocked+0x454/0x4f0 [ 268.748421] LR is at __cpuhp_remove_state_cpuslocked+0x448/0x4f0 [ 268.748425] pc : [] lr : [] pstate: 60000145 [ 268.748427] sp : ffff80019c127d30 [ 268.748430] x29: ffff80019c127d30 x28: ffff8001a18ca000 [ 268.748437] x27: ffff20000c2cb000 x26: 1fffe4000042d490 [ 268.748443] x25: ffff20000216a480 x24: 0000000000000000 [ 268.748449] x23: ffff20000b08e000 x22: 0000000000000001 [ 268.748455] x21: 0000000000000093 x20: 00000000000016f8 [ 268.748460] x19: ffff20000c2cbb80 x18: 0000ffffb5fe7c58 [ 268.748466] x17: 00000000004402d0 x16: 1fffe40001864f01 [ 268.748472] x15: ffff20000c4bf8b0 x14: 0000000000000000 [ 268.748477] x13: 0000000000007032 x12: ffff20000829ae48 [ 268.748483] x11: ffff20000c4bf000 x10: 0000000000000004 [ 268.748488] x9 : 0000000000006fbc x8 : ffff20000c318a40 [ 268.748494] x7 : 0000000000000000 x6 : ffff040001864f02 [ 268.748500] x5 : 0000000000000000 x4 : 0000000000000000 [ 268.748505] x3 : 0000000000000007 x2 : dfff200000000000 [ 268.748510] x1 : 000000000000ad3d x0 : 00000000000001f0 [ 268.748516] Call trace: [ 268.748521] Exception stack(0xffff80019c127bf0 to 0xffff80019c127d30) [ 268.748526] 7be0: 00000000000001f0 000000000000ad3d [ 268.748531] 7c00: dfff200000000000 0000000000000007 0000000000000000 0000000000000000 [ 268.748535] 7c20: ffff040001864f02 0000000000000000 ffff20000c318a40 0000000000006fbc [ 268.748539] 7c40: 0000000000000004 ffff20000c4bf000 ffff20000829ae48 0000000000007032 [ 268.748544] 7c60: 0000000000000000 ffff20000c4bf8b0 1fffe40001864f01 00000000004402d0 [ 268.748548] 7c80: 0000ffffb5fe7c58 ffff20000c2cbb80 00000000000016f8 0000000000000093 [ 268.748553] 7ca0: 0000000000000001 ffff20000b08e000 0000000000000000 ffff20000216a480 [ 268.748557] 7cc0: 1fffe4000042d490 ffff20000c2cb000 ffff8001a18ca000 ffff80019c127d30 [ 268.748562] 7ce0: ffff2000081729e0 ffff80019c127d30 ffff2000081729ec 0000000060000145 [ 268.748566] 7d00: 00000000000001f0 0000000000000000 0001000000000000 0000000000000000 [ 268.748569] 7d20: ffff80019c127d30 ffff2000081729ec [ 268.748575] [] __cpuhp_remove_state_cpuslocked+0x454/0x4f0 [ 268.748580] [] __cpuhp_remove_state+0x54/0x80 [ 268.748597] [] arm_ccn_exit+0x2c/0x70 [arm_ccn] [ 268.748604] [] SyS_delete_module+0x5a4/0x708 [ 268.748607] Exception stack(0xffff80019c127ec0 to 0xffff80019c128000) [ 268.748612] 7ec0: 0000000019bb7258 0000000000000800 ba64d0fb3d26a800 00000000000000da [ 268.748616] 7ee0: 0000ffffb6144e28 0000ffffcd95b409 fefefefefefefeff 7f7f7f7f7f7f7f7f [ 268.748621] 7f00: 000000000000006a 1999999999999999 0000ffffb6179000 0000000000bbcc6d [ 268.748625] 7f20: 0000ffffb6176b98 0000ffffcd95c2d0 0000ffffb5fe7b58 0000ffffb6163000 [ 268.748630] 7f40: 0000ffffb60ad3e0 00000000004402d0 0000ffffb5fe7c58 0000000019bb71f0 [ 268.748634] 7f60: 0000ffffcd95c740 0000000000000000 0000000019bb71f0 0000000000416700 [ 268.748639] 7f80: 0000000000000000 00000000004402e8 0000000019bb6010 0000ffffcd95c748 [ 268.748643] 7fa0: 0000000000000000 0000ffffcd95c460 00000000004113a8 0000ffffcd95c460 [ 268.748648] 7fc0: 0000ffffb60ad3e8 0000000080000000 0000000019bb7258 000000000000006a [ 268.748652] 7fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 268.748657] [] __sys_trace_return+0x0/0x4 [ 268.748661] ---[ end trace a996d358dcaa7f9c ]--- Fixes: 8df038725ad5 ("bus/arm-ccn: Use cpu-hp's multi instance support instead custom list") Signed-off-by: Kim Phillips Acked-by: Sebastian Andrzej Siewior Signed-off-by: Pawel Moll Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 96d50e4b38c6..f0249899fc96 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1585,8 +1585,8 @@ static int __init arm_ccn_init(void) static void __exit arm_ccn_exit(void) { - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); platform_driver_unregister(&arm_ccn_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); } module_init(arm_ccn_init); -- GitLab From bde6667a7df125cbbb1b9ce197e575b0fba15338 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 6 Oct 2017 15:04:33 +0200 Subject: [PATCH 0204/1398] crypto: talitos - fix AEAD test failures commit ec8c7d14acc0a477429d3a6fade5dab72c996c82 upstream. AEAD tests fail when destination SG list has more than 1 element. [ 2.058752] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos [ 2.066965] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: c0 43 ff 74 c0 43 ff e0 de 83 d1 20 de 84 8e 54 00000020: de 83 d7 c4 [ 2.082138] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos [ 2.090435] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: de 84 ea 58 c0 93 1a 24 de 84 e8 59 de 84 f1 20 00000020: 00 00 00 00 [ 2.105721] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-3des-talitos [ 2.114259] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c [ 2.166410] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-3des-talitos [ 2.174794] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c [ 2.226486] alg: No test for authenc(hmac(sha224),cbc(aes)) (authenc-hmac-sha224-cbc-aes-talitos) [ 2.236459] alg: No test for authenc(hmac(sha224),cbc(aes)) (authenc-hmac-sha224-cbc-aes-talitos) [ 2.247196] alg: aead: Test 1 failed on encryption for authenc-hmac-sha224-cbc-3des-talitos [ 2.255555] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 [ 2.309004] alg: aead: Test 1 failed on encryption for authenc-hmac-sha224-cbc-3des-talitos [ 2.317562] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 [ 2.370710] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-aes-talitos [ 2.379177] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 [ 2.397863] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-aes-talitos [ 2.406134] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 [ 2.424789] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-3des-talitos [ 2.433491] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 c0 96 e9 20 c0 00 3d dc [ 2.488832] alg: aead: Test 1 failed on encryption for authenc-hmac-sha256-cbc-3des-talitos [ 2.497387] 00000000: 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 73 74 00000010: 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 74 65 00000020: 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 65 72 00000030: 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 72 63 00000040: 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 63 65 00000050: 65 72 73 74 54 20 6f 6f 4d 20 6e 61 20 79 65 53 00000060: 72 63 74 65 20 73 6f 54 20 6f 61 4d 79 6e 53 20 00000070: 63 65 65 72 73 74 54 20 6f 6f 4d 20 6e 61 0a 79 00000080: c0 50 f1 ac c0 50 f3 38 c0 50 f3 94 c0 50 f5 30 00000090: c0 99 74 3c c0 96 e5 b8 c0 96 e9 20 c0 00 3d dc This patch fixes that. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index e2d323fa2437..18a1729969e2 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1232,12 +1232,11 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sg_link_tbl_len += authsize; } - sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, - &desc->ptr[4], sg_count, areq->assoclen, - tbl_off); + ret = talitos_sg_map(dev, areq->src, cryptlen, edesc, &desc->ptr[4], + sg_count, areq->assoclen, tbl_off); - if (sg_count > 1) { - tbl_off += sg_count; + if (ret > 1) { + tbl_off += ret; sync_needed = true; } -- GitLab From 5272b0e1c38296d28e90af857f10159812c523f7 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 6 Oct 2017 15:04:35 +0200 Subject: [PATCH 0205/1398] crypto: talitos - fix memory corruption on SEC2 commit e04a61bebc5da1535b6f194b464295b8d558e2fc upstream. On SEC2, when using the old descriptors type (hmac snoop no afeu) for doing IPsec, the CICV out pointeur points out of the allocated memory. [ 2.502554] ============================================================================= [ 2.510740] BUG dma-kmalloc-256 (Not tainted): Redzone overwritten [ 2.516907] ----------------------------------------------------------------------------- [ 2.516907] [ 2.526535] Disabling lock debugging due to kernel taint [ 2.531845] INFO: 0xde858108-0xde85810b. First byte 0xf8 instead of 0xcc [ 2.538549] INFO: Allocated in 0x806181a9 age=0 cpu=0 pid=58 [ 2.544229] __kmalloc+0x374/0x564 [ 2.547649] talitos_edesc_alloc+0x17c/0x48c [ 2.551929] aead_edesc_alloc+0x80/0x154 [ 2.555863] aead_encrypt+0x30/0xe0 [ 2.559368] __test_aead+0x5a0/0x1f3c [ 2.563042] test_aead+0x2c/0x110 [ 2.566371] alg_test_aead+0x5c/0xf4 [ 2.569958] alg_test+0x1dc/0x5a0 [ 2.573305] cryptomgr_test+0x50/0x70 [ 2.576984] kthread+0xd8/0x134 [ 2.580155] ret_from_kernel_thread+0x5c/0x64 [ 2.584534] INFO: Freed in ipsec_esp_encrypt_done+0x130/0x240 age=6 cpu=0 pid=0 [ 2.591839] ipsec_esp_encrypt_done+0x130/0x240 [ 2.596395] flush_channel+0x1dc/0x488 [ 2.600161] talitos2_done_4ch+0x30/0x200 [ 2.604185] tasklet_action+0xa0/0x13c [ 2.607948] __do_softirq+0x148/0x6cc [ 2.611623] irq_exit+0xc0/0x124 [ 2.614869] call_do_irq+0x24/0x3c [ 2.618292] do_IRQ+0x78/0x108 [ 2.621369] ret_from_except+0x0/0x14 [ 2.625055] finish_task_switch+0x58/0x350 [ 2.629165] schedule+0x80/0x134 [ 2.632409] schedule_preempt_disabled+0x38/0xc8 [ 2.637042] cpu_startup_entry+0xe4/0x190 [ 2.641074] start_kernel+0x3f4/0x408 [ 2.644741] 0x3438 [ 2.646857] INFO: Slab 0xdffbdb00 objects=9 used=1 fp=0xde8581c0 flags=0x0080 [ 2.653978] INFO: Object 0xde858008 @offset=8 fp=0xca4395df [ 2.653978] [ 2.661032] Redzone de858000: cc cc cc cc cc cc cc cc ........ [ 2.669029] Object de858008: 00 00 00 02 00 00 00 02 00 6b 6b 6b 1e 83 ea 28 .........kkk...( [ 2.677628] Object de858018: 00 00 00 70 1e 85 80 64 ff 73 1d 21 6b 6b 6b 6b ...p...d.s.!kkkk [ 2.686228] Object de858028: 00 20 00 00 1e 84 17 24 00 10 00 00 1e 85 70 00 . .....$......p. [ 2.694829] Object de858038: 00 18 00 00 1e 84 17 44 00 08 00 00 1e 83 ea 28 .......D.......( [ 2.703430] Object de858048: 00 80 00 00 1e 84 f0 00 00 80 00 00 1e 85 70 10 ..............p. [ 2.712030] Object de858058: 00 20 6b 00 1e 85 80 f4 6b 6b 6b 6b 00 80 02 00 . k.....kkkk.... [ 2.720629] Object de858068: 1e 84 f0 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ....kkkkkkkkkkkk [ 2.729230] Object de858078: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.737830] Object de858088: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.746429] Object de858098: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.755029] Object de8580a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.763628] Object de8580b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.772229] Object de8580c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.780829] Object de8580d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [ 2.789430] Object de8580e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 73 b0 ea 9f kkkkkkkkkkkks... [ 2.798030] Object de8580f8: e8 18 80 d6 56 38 44 c0 db e3 4f 71 f7 ce d1 d3 ....V8D...Oq.... [ 2.806629] Redzone de858108: f8 bd 3e 4f ..>O [ 2.814279] Padding de8581b0: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [ 2.822283] CPU: 0 PID: 0 Comm: swapper Tainted: G B 4.9.50-g995be12679 #179 [ 2.831819] Call Trace: [ 2.834301] [dffefd20] [c01aa9a8] check_bytes_and_report+0x100/0x194 (unreliable) [ 2.841801] [dffefd50] [c01aac3c] check_object+0x200/0x530 [ 2.847306] [dffefd80] [c01ae584] free_debug_processing+0x290/0x690 [ 2.853585] [dffefde0] [c01aec8c] __slab_free+0x308/0x628 [ 2.859000] [dffefe80] [c05057f4] ipsec_esp_encrypt_done+0x130/0x240 [ 2.865378] [dffefeb0] [c05002c4] flush_channel+0x1dc/0x488 [ 2.870968] [dffeff10] [c05007a8] talitos2_done_4ch+0x30/0x200 [ 2.876814] [dffeff30] [c002fe38] tasklet_action+0xa0/0x13c [ 2.882399] [dffeff60] [c002f118] __do_softirq+0x148/0x6cc [ 2.887896] [dffeffd0] [c002f954] irq_exit+0xc0/0x124 [ 2.892968] [dffefff0] [c0013adc] call_do_irq+0x24/0x3c [ 2.898213] [c0d4be00] [c000757c] do_IRQ+0x78/0x108 [ 2.903113] [c0d4be30] [c0015c08] ret_from_except+0x0/0x14 [ 2.908634] --- interrupt: 501 at finish_task_switch+0x70/0x350 [ 2.908634] LR = finish_task_switch+0x58/0x350 [ 2.919327] [c0d4bf20] [c085e1d4] schedule+0x80/0x134 [ 2.924398] [c0d4bf50] [c085e2c0] schedule_preempt_disabled+0x38/0xc8 [ 2.930853] [c0d4bf60] [c007f064] cpu_startup_entry+0xe4/0x190 [ 2.936707] [c0d4bfb0] [c096c434] start_kernel+0x3f4/0x408 [ 2.942198] [c0d4bff0] [00003438] 0x3438 [ 2.946137] FIX dma-kmalloc-256: Restoring 0xde858108-0xde85810b=0xcc [ 2.946137] [ 2.954158] FIX dma-kmalloc-256: Object at 0xde858008 not freed This patch reworks the handling of the CICV out in order to properly handle all cases. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 42 ++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 18a1729969e2..3cfd1005935e 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1247,14 +1247,15 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); } - sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc, - &desc->ptr[5], sg_count, areq->assoclen, - tbl_off); + ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[5], + sg_count, areq->assoclen, tbl_off); if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1); - if (sg_count > 1) { + /* ICV data */ + if (ret > 1) { + tbl_off += ret; edesc->icv_ool = true; sync_needed = true; @@ -1264,9 +1265,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sizeof(struct talitos_ptr) + authsize; /* Add an entry to the link table for ICV data */ - tbl_ptr += sg_count - 1; - to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1); - tbl_ptr++; + to_talitos_ptr_ext_set(tbl_ptr - 1, 0, is_sec1); to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, is_sec1); to_talitos_ptr_len(tbl_ptr, authsize, is_sec1); @@ -1274,18 +1273,33 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* icv data follows link tables */ to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset, is_sec1); + } else { + dma_addr_t addr = edesc->dma_link_tbl; + + if (is_sec1) + addr += areq->assoclen + cryptlen; + else + addr += sizeof(struct talitos_ptr) * tbl_off; + + to_talitos_ptr(&desc->ptr[6], addr, is_sec1); + to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); + } + } else if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + ret = talitos_sg_map(dev, areq->dst, authsize, edesc, + &desc->ptr[6], sg_count, areq->assoclen + + cryptlen, + tbl_off); + if (ret > 1) { + tbl_off += ret; + edesc->icv_ool = true; + sync_needed = true; + } else { + edesc->icv_ool = false; } } else { edesc->icv_ool = false; } - /* ICV data */ - if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { - to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); - to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl + - areq->assoclen + cryptlen, is_sec1); - } - /* iv out */ if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, -- GitLab From 552f74cbd6ec806ab5ce45336a26cae1e015dbe2 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 6 Oct 2017 15:04:37 +0200 Subject: [PATCH 0206/1398] crypto: talitos - fix setkey to check key weakness commit f384cdc4faf350fdb6ad93c5f26952b9ba7c7566 upstream. Crypto manager test report the following failures: [ 3.061081] alg: skcipher: setkey failed on test 5 for ecb-des-talitos: flags=100 [ 3.069342] alg: skcipher-ddst: setkey failed on test 5 for ecb-des-talitos: flags=100 [ 3.077754] alg: skcipher-ddst: setkey failed on test 5 for ecb-des-talitos: flags=100 This is due to setkey being expected to detect weak keys. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 3cfd1005935e..cc596bdd6874 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1507,12 +1507,20 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key, unsigned int keylen) { struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); + u32 tmp[DES_EXPKEY_WORDS]; if (keylen > TALITOS_MAX_KEY_SIZE) { crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } + if (unlikely(crypto_ablkcipher_get_flags(cipher) & + CRYPTO_TFM_REQ_WEAK_KEY) && + !des_ekey(tmp, key)) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY); + return -EINVAL; + } + memcpy(&ctx->key, key, keylen); ctx->keylen = keylen; -- GitLab From 6bf30e664221488800acc9b9cbe7b0059865f02d Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 6 Oct 2017 15:04:39 +0200 Subject: [PATCH 0207/1398] crypto: talitos - fix AEAD for sha224 on non sha224 capable chips commit 6cda075aff67a1b9b5ba1b2818091dc939643b6c upstream. sha224 AEAD test fails with: [ 2.803125] talitos ff020000.crypto: DEUISR 0x00000000_00000000 [ 2.808743] talitos ff020000.crypto: MDEUISR 0x80100000_00000000 [ 2.814678] talitos ff020000.crypto: DESCBUF 0x20731f21_00000018 [ 2.820616] talitos ff020000.crypto: DESCBUF 0x0628d64c_00000010 [ 2.826554] talitos ff020000.crypto: DESCBUF 0x0631005c_00000018 [ 2.832492] talitos ff020000.crypto: DESCBUF 0x0628d664_00000008 [ 2.838430] talitos ff020000.crypto: DESCBUF 0x061b13a0_00000080 [ 2.844369] talitos ff020000.crypto: DESCBUF 0x0631006c_00000080 [ 2.850307] talitos ff020000.crypto: DESCBUF 0x0631006c_00000018 [ 2.856245] talitos ff020000.crypto: DESCBUF 0x063100ec_00000000 [ 2.884972] talitos ff020000.crypto: failed to reset channel 0 [ 2.890503] talitos ff020000.crypto: done overflow, internal time out, or rngu error: ISR 0x20000000_00020000 [ 2.900652] alg: aead: encryption failed on test 1 for authenc-hmac-sha224-cbc-3des-talitos: ret=22 This is due to SHA224 not being supported by the HW. Allthough for hash we are able to init the hash context by SW, it is not possible for AEAD. Therefore SHA224 AEAD has to be deactivated. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index cc596bdd6874..70036f02f4d1 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -3068,6 +3068,11 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.aead.setkey = aead_setkey; t_alg->algt.alg.aead.encrypt = aead_encrypt; t_alg->algt.alg.aead.decrypt = aead_decrypt; + if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) && + !strncmp(alg->cra_name, "authenc(hmac(sha224)", 20)) { + kfree(t_alg); + return ERR_PTR(-ENOTSUPP); + } break; case CRYPTO_ALG_TYPE_AHASH: alg = &t_alg->algt.alg.hash.halg.base; -- GitLab From 24ab6e7f6b6199964331eda3928a3dafdd20adbe Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 6 Oct 2017 15:04:41 +0200 Subject: [PATCH 0208/1398] crypto: talitos - fix use of sg_link_tbl_len commit fbb22137c4d9bab536958b152d096fb3f98020ea upstream. sg_link_tbl_len shall be used instead of cryptlen, otherwise SECs which perform HW CICV verification will fail. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 70036f02f4d1..1ae09c2a8430 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1232,8 +1232,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sg_link_tbl_len += authsize; } - ret = talitos_sg_map(dev, areq->src, cryptlen, edesc, &desc->ptr[4], - sg_count, areq->assoclen, tbl_off); + ret = talitos_sg_map(dev, areq->src, sg_link_tbl_len, edesc, + &desc->ptr[4], sg_count, areq->assoclen, tbl_off); if (ret > 1) { tbl_off += ret; -- GitLab From 47ab72034e9d7abf4538bf4580610cff8aaa576c Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 6 Oct 2017 15:04:43 +0200 Subject: [PATCH 0209/1398] crypto: talitos - fix ctr-aes-talitos commit 70d355ccea899dad47dc22d3a4406998f55143fd upstream. ctr-aes-talitos test fails as follows on SEC2 [ 0.837427] alg: skcipher: Test 1 failed (invalid result) on encryption for ctr-aes-talitos [ 0.845763] 00000000: 16 36 d5 ee 34 f8 06 25 d7 7f 8e 56 ca 88 43 45 [ 0.852345] 00000010: f9 3f f7 17 2a b2 12 23 30 43 09 15 82 dd e1 97 [ 0.858940] 00000020: a7 f7 32 b5 eb 25 06 13 9a ec f5 29 25 f8 4d 66 [ 0.865366] 00000030: b0 03 5b 8e aa 9a 42 b6 19 33 8a e2 9d 65 96 95 This patch fixes the descriptor type which is special for CTR AES Fixes: 5e75ae1b3cef6 ("crypto: talitos - add new crypto modes") Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 1ae09c2a8430..1c8d79d93098 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2635,7 +2635,7 @@ static struct talitos_alg_template driver_algs[] = { .ivsize = AES_BLOCK_SIZE, } }, - .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + .desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP | DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CTR, }, -- GitLab From 112b8a8f558de4004c2a0f7a6d04aba3520fd4ff Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 27 Nov 2017 18:15:40 +0000 Subject: [PATCH 0210/1398] usb: f_fs: Force Reserved1=1 in OS_DESC_EXT_COMPAT commit a3acc696085e112733d191a77b106e67a4fa110b upstream. The specification says that the Reserved1 field in OS_DESC_EXT_COMPAT must have the value "1", but when this feature was first implemented we rejected any non-zero values. This was adjusted to accept all non-zero values (while now rejecting zero) in commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 of OS_DESC_EXT_COMPAT"), but that breaks any userspace programs that worked previously by returning EINVAL when Reserved1 == 0 which was previously the only value that succeeded! If we just set the field to "1" ourselves, both old and new userspace programs continue to work correctly and, as a bonus, old programs are now compliant with the specification without having to fix anything themselves. Fixes: 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 of OS_DESC_EXT_COMPAT") Signed-off-by: John Keeping Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 4fce83266926..346a630cebd5 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2262,9 +2262,18 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, int i; if (len < sizeof(*d) || - d->bFirstInterfaceNumber >= ffs->interfaces_count || - d->Reserved1) + d->bFirstInterfaceNumber >= ffs->interfaces_count) return -EINVAL; + if (d->Reserved1 != 1) { + /* + * According to the spec, Reserved1 must be set to 1 + * but older kernels incorrectly rejected non-zero + * values. We fix it here to avoid returning EINVAL + * in response to values we used to accept. + */ + pr_debug("usb_ext_compat_desc::Reserved1 forced to 1\n"); + d->Reserved1 = 1; + } for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) return -EINVAL; -- GitLab From 6192f870abf25d6bb83cb5cb85b072b5dcc40ba4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 24 Nov 2017 23:49:34 +0000 Subject: [PATCH 0211/1398] ARM: BUG if jumping to usermode address in kernel mode commit 8bafae202c82dc257f649ea3c275a0f35ee15113 upstream. Detect if we are returning to usermode via the normal kernel exit paths but the saved PSR value indicates that we are in kernel mode. This could occur due to corrupted stack state, which has been observed with "ftracetest". This ensures that we catch the problem case before we get to user code. Signed-off-by: Russell King Cc: Alex Shi Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/assembler.h | 18 ++++++++++++++++++ arch/arm/kernel/entry-header.S | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 68b06f9c65de..12f99fd2e3b2 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -516,4 +516,22 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro bug, msg, line +#ifdef CONFIG_THUMB2_KERNEL +1: .inst 0xde02 +#else +1: .inst 0xe7f001f2 +#endif +#ifdef CONFIG_DEBUG_BUGVERBOSE + .pushsection .rodata.str, "aMS", %progbits, 1 +2: .asciz "\msg" + .popsection + .pushsection __bug_table, "aw" + .align 2 + .word 1b, 2b + .hword \line + .popsection +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 6391728c8f03..75f7a4e8541a 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -299,6 +299,8 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc + tst r1, #0xcf + bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -313,6 +315,7 @@ @ after ldm {}^ add sp, sp, #\offset + PT_REGS_SIZE movs pc, lr @ return & move spsr_svc into cpsr +1: bug "Returning to usermode but unexpected PSR bits set?", \@ #elif defined(CONFIG_CPU_V7M) @ V7M restore. @ Note that we don't need to do clrex here as clearing the local @@ -328,6 +331,8 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP + tst r1, #0xcf + bne 1f msr spsr_cxsf, r1 @ save in spsr_svc @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -340,6 +345,7 @@ .endif add sp, sp, #PT_REGS_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr +1: bug "Returning to usermode but unexpected PSR bits set?", \@ #endif /* !CONFIG_THUMB2_KERNEL */ .endm -- GitLab From 5bcb9c842a5362bba61383cbb22799f2f0ce63a7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Nov 2017 11:22:42 +0000 Subject: [PATCH 0212/1398] ARM: avoid faulting on qemu commit 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 upstream. When qemu starts a kernel in a bare environment, the default SCR has the AW and FW bits clear, which means that the kernel can't modify the PSR A or PSR F bits, and means that FIQs and imprecise aborts are always masked. When running uboot under qemu, the AW and FW SCR bits are set, and the kernel functions normally - and this is how real hardware behaves. Fix this for qemu by ignoring the FIQ bit. Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode") Signed-off-by: Russell King Cc: Alex Shi Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/entry-header.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 75f7a4e8541a..e056c9a9aa9d 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -299,7 +299,7 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @@ -331,7 +331,7 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc -- GitLab From 7bdd685cef4b4ae665970f330bb426f85c2be029 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:17 -0700 Subject: [PATCH 0213/1398] thp: reduce indentation level in change_huge_pmd() commit 0a85e51d37645e9ce57e5e1a30859e07810ed07c upstream. Patch series "thp: fix few MADV_DONTNEED races" For MADV_DONTNEED to work properly with huge pages, it's critical to not clear pmd intermittently unless you hold down_write(mmap_sem). Otherwise MADV_DONTNEED can miss the THP which can lead to userspace breakage. See example of such race in commit message of patch 2/4. All these races are found by code inspection. I haven't seen them triggered. I don't think it's worth to apply them to stable@. This patch (of 4): Restructure code in preparation for a fix. Link: http://lkml.kernel.org/r/20170302151034.27829-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [jwang: adjust context for 4.9] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3cae1dcf069c..660e7323fb2d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1509,37 +1509,37 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - int ret = 0; + pmd_t entry; + bool preserve_write; + int ret; ptl = __pmd_trans_huge_lock(pmd, vma); - if (ptl) { - pmd_t entry; - bool preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; + if (!ptl) + return 0; - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) { - spin_unlock(ptl); - return ret; - } + preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; - if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); - entry = pmd_modify(entry, newprot); - if (preserve_write) - entry = pmd_mkwrite(entry); - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - BUG_ON(vma_is_anonymous(vma) && !preserve_write && - pmd_write(entry)); - } - spin_unlock(ptl); - } + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (prot_numa && is_huge_zero_pmd(*pmd)) + goto unlock; + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + + entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); + ret = HPAGE_PMD_NR; + set_pmd_at(mm, addr, pmd, entry); + BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); +unlock: + spin_unlock(ptl); return ret; } -- GitLab From c2edc33d4abfbdedcfb068d86dffd216f6fca69f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:20 -0700 Subject: [PATCH 0214/1398] thp: fix MADV_DONTNEED vs. numa balancing race commit ced108037c2aa542b3ed8b7afd1576064ad1362a upstream. In case prot_numa, we are under down_read(mmap_sem). It's critical to not clear pmd intermittently to avoid race with MADV_DONTNEED which is also under down_read(mmap_sem): CPU0: CPU1: change_huge_pmd(prot_numa=1) pmdp_huge_get_and_clear_notify() madvise_dontneed() zap_pmd_range() pmd_trans_huge(*pmd) == 0 (without ptl) // skip the pmd set_pmd_at(); // pmd is re-established The race makes MADV_DONTNEED miss the huge pmd and don't clear it which may break userspace. Found by code analysis, never saw triggered. Link: http://lkml.kernel.org/r/20170302151034.27829-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [jwang: adjust context for 4.9 ] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 660e7323fb2d..c234c078693c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1531,7 +1531,39 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (prot_numa && pmd_protnone(*pmd)) goto unlock; - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); + /* + * In case prot_numa, we are under down_read(mmap_sem). It's critical + * to not clear pmd intermittently to avoid race with MADV_DONTNEED + * which is also under down_read(mmap_sem): + * + * CPU0: CPU1: + * change_huge_pmd(prot_numa=1) + * pmdp_huge_get_and_clear_notify() + * madvise_dontneed() + * zap_pmd_range() + * pmd_trans_huge(*pmd) == 0 (without ptl) + * // skip the pmd + * set_pmd_at(); + * // pmd is re-established + * + * The race makes MADV_DONTNEED miss the huge pmd and don't clear it + * which may break userspace. + * + * pmdp_invalidate() is required to make sure we don't miss + * dirty/young flags set by hardware. + */ + entry = *pmd; + pmdp_invalidate(vma, addr, pmd); + + /* + * Recover dirty/young flags. It relies on pmdp_invalidate to not + * corrupt them. + */ + if (pmd_dirty(*pmd)) + entry = pmd_mkdirty(entry); + if (pmd_young(*pmd)) + entry = pmd_mkyoung(entry); + entry = pmd_modify(entry, newprot); if (preserve_write) entry = pmd_mkwrite(entry); -- GitLab From 6a53078b9357bccfb2f1e30ee0f3dc174ab740d3 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:23 -0700 Subject: [PATCH 0215/1398] mm: drop unused pmdp_huge_get_and_clear_notify() commit c0c379e2931b05facef538e53bf3b21f283d9a0b upstream. Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the last pmdp_huge_get_and_clear_notify() user is gone. Let's drop the helper. Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [jwang: adjust context for 4.9] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- include/linux/mmu_notifier.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 25c0dc31f084..854dfa6fa6e3 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pmd; \ }) -#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ - mmu_notifier_invalidate_range(__mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -480,7 +468,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ -- GitLab From b6c15a7c6e79da70b1dd0b3a7b12cb979a329c97 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 7 Dec 2017 23:21:06 -0500 Subject: [PATCH 0216/1398] Revert "drm/armada: Fix compile fail" This reverts commit 82f260d472c3b4dbb7324624e395c3e91f73a040. Not required on < 4.10. Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/armada/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile index 26412d2f8c98..ffd673615772 100644 --- a/drivers/gpu/drm/armada/Makefile +++ b/drivers/gpu/drm/armada/Makefile @@ -4,5 +4,3 @@ armada-y += armada_510.o armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o obj-$(CONFIG_DRM_ARMADA) := armada.o - -CFLAGS_armada_trace.o := -I$(src) -- GitLab From 7c4615c853fa7969755f7b0e8bcfaaa2ec264243 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 7 Dec 2017 23:23:42 -0500 Subject: [PATCH 0217/1398] Revert "spi: SPI_FSL_DSPI should depend on HAS_DMA" This reverts commit dadab2d4e3cf708ceba22ecddd94aedfecb39199. Not required on < 4.10. Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 8e281e47afec..b7995474148c 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -365,7 +365,6 @@ config SPI_FSL_SPI config SPI_FSL_DSPI tristate "Freescale DSPI controller" select REGMAP_MMIO - depends on HAS_DMA depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST help This enables support for the Freescale DSPI controller in master -- GitLab From 5d4d0a95437efc94c3b10a2ea04a81e5e3863dfb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 16 Feb 2017 01:43:58 +0100 Subject: [PATCH 0218/1398] ARM: 8657/1: uaccess: consistently check object sizes [ Upstream commit 32b143637e8180f5d5cea54320c769210dea4f19 ] In commit 76624175dcae ("arm64: uaccess: consistently check object sizes"), the object size checks are moved outside the access_ok() so that bad destinations are detected before hitting the "memset(dest, 0, size)" in the copy_from_user() failure path. This makes the same change for arm, with attention given to possibly extracting the uaccess routines into a common header file for all architectures in the future. Suggested-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/uaccess.h | 44 ++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 1f59ea051bab..b7e0125c0bbf 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -478,11 +478,10 @@ extern unsigned long __must_check arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) +__arch_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned int __ua_flags; - check_object_size(to, n, false); __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); @@ -495,18 +494,15 @@ extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) +__arch_copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY unsigned int __ua_flags; - - check_object_size(from, n, true); __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else - check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } @@ -526,25 +522,49 @@ __clear_user(void __user *addr, unsigned long n) } #else -#define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) -#define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) +#define __arch_copy_from_user(to, from, n) \ + (memcpy(to, (void __force *)from, n), 0) +#define __arch_copy_to_user(to, from, n) \ + (memcpy((void __force *)to, from, n), 0) #define __clear_user(addr, n) (memset((void __force *)addr, 0, n), 0) #endif -static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; + + check_object_size(to, n, false); + if (likely(access_ok(VERIFY_READ, from, n))) - res = __copy_from_user(to, from, n); + res = __arch_copy_from_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; } -static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); + + return __arch_copy_to_user(to, from, n); +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } -- GitLab From f14f6fabfb54defc4224cc8d8b008af2d45cfd94 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 15 Feb 2017 11:38:58 +0100 Subject: [PATCH 0219/1398] vti6: Don't report path MTU below IPV6_MIN_MTU. [ Upstream commit e3dc847a5f85b43ee2bfc8eae407a7e383483228 ] In vti6_xmit(), the check for IPV6_MIN_MTU before we send a ICMPV6_PKT_TOOBIG message is missing. So we might report a PMTU below 1280. Fix this by adding the required check. Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 67e882d49195..912333586de6 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -485,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); - if (skb->protocol == htons(ETH_P_IPV6)) + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - else + } else { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } return -EMSGSIZE; } -- GitLab From 93247ff1fafe4ab5111942d3e44efac139346497 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 11 Feb 2017 14:02:49 +0100 Subject: [PATCH 0220/1398] ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure [ Upstream commit 7807e086a2d1f69cc1a57958cac04fea79fc2112 ] gpmc_probe_onenand_child returns success even on gpmc_onenand_init failure. Fix that. Signed-off-by: Ladislav Michl Acked-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/gpmc-onenand.c | 10 ++++++---- drivers/memory/omap-gpmc.c | 4 +--- include/linux/omap-gpmc.h | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 8633c703546a..2944af820558 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr) return ret; } -void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) +int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) { int err; struct device *dev = &gpmc_onenand_device.dev; @@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) if (err < 0) { dev_err(dev, "Cannot request GPMC CS %d, error %d\n", gpmc_onenand_data->cs, err); - return; + return err; } gpmc_onenand_resource.end = gpmc_onenand_resource.start + ONENAND_IO_SIZE - 1; - if (platform_device_register(&gpmc_onenand_device) < 0) { + err = platform_device_register(&gpmc_onenand_device); + if (err) { dev_err(dev, "Unable to register OneNAND device\n"); gpmc_cs_free(gpmc_onenand_data->cs); - return; } + + return err; } diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 5457c361ad58..bf0fe0137dfe 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev, if (!of_property_read_u32(child, "dma-channel", &val)) gpmc_onenand_data->dma_channel = val; - gpmc_onenand_init(gpmc_onenand_data); - - return 0; + return gpmc_onenand_init(gpmc_onenand_data); } #else static int gpmc_probe_onenand_child(struct platform_device *pdev, diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 35d0fd7a4948..e821a3132a3e 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif -- GitLab From b2cb09597b1f1f9cf2a515e47b54bd9bd37e4df4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 13 Feb 2017 13:13:36 +0300 Subject: [PATCH 0221/1398] x86/selftests: Add clobbers for int80 on x86_64 [ Upstream commit 2a4d0c627f5374f365a873dea4e10ae0bb437680 ] Kernel erases R8..R11 registers prior returning to userspace from int80: https://lkml.org/lkml/2009/10/1/164 GCC can reuse these registers and doesn't expect them to change during syscall invocation. I met this kind of bug in CRIU once GCC 6.1 and CLANG stored local variables in those registers and the kernel zerofied them during syscall: https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa2 By that reason I suggest to add those registers to clobbers in selftests. Also, as noted by Andy - removed unneeded clobber for flags in INT $0x80 inline asm. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/fsgsbase.c | 2 +- tools/testing/selftests/x86/ldt_gdt.c | 16 +++++++++++----- tools/testing/selftests/x86/ptrace_syscall.c | 3 ++- .../testing/selftests/x86/single_step_syscall.c | 5 ++++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 9b4610c6d3fb..f249e042b3b5 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index f936a3cd3e35..ac1a7a3f87b2 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -649,7 +655,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -718,7 +724,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -749,7 +755,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -782,7 +788,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -835,7 +841,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* -- GitLab From 29dc610cfc7c2df653c25b798513ed9cc9b16e4b Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Fri, 17 Feb 2017 11:07:49 -0600 Subject: [PATCH 0222/1398] x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register [ Upstream commit 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 ] Writing to the software acknowledge clear register when there are no pending messages causes a HUB error to assert. The original intent of this write was to clear the pending bits before start of operation, but this is an incorrect method and has been determined to be unnecessary. Signed-off-by: Andrew Banman Acked-by: Mike Travis Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: rja@hpe.com Cc: sivanich@hpe.com Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/uv/tlb_uv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 9e42842e924a..0f0175186f1b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1848,7 +1848,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); -- GitLab From 4e4a9ebe33a6d2908b46151d57ffa0dc68e2e195 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 11:24:35 +0100 Subject: [PATCH 0223/1398] sched/fair: Make select_idle_cpu() more aggressive [ Upstream commit 4c77b18cf8b7ab37c7d5737b4609010d2ceec5f0 ] Kitsunyan reported desktop latency issues on his Celeron 887 because of commit: 1b568f0aabf2 ("sched/core: Optimize SCHED_SMT") ... even though his CPU doesn't do SMT. The effect of running the SMT code on a !SMT part is basically a more aggressive select_idle_cpu(). Removing the avg condition fixed things for him. I also know FB likes this test gone, even though other workloads like having it. For now, take it out by default, until we get a better idea. Reported-by: kitsunyan Signed-off-by: Peter Zijlstra (Intel) Cc: Chris Mason Cc: Linus Torvalds Cc: Mike Galbraith Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 2 +- kernel/sched/features.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a68c631d5b5..3d862f5b0331 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5451,7 +5451,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are -- GitLab From 8c6ebeb4087e1765ae2ee2aea048a9b37a3a8ca8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2017 21:10:17 +0100 Subject: [PATCH 0224/1398] x86/hpet: Prevent might sleep splat on resume [ Upstream commit bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 ] Sergey reported a might sleep warning triggered from the hpet resume path. It's caused by the call to disable_irq() from interrupt disabled context. The problem with the low level resume code is that it is not accounted as a special system_state like we do during the boot process. Calling the same code during system boot would not trigger the warning. That's inconsistent at best. In this particular case it's trivial to replace the disable_irq() with disable_hardirq() because this particular code path is solely used from system resume and the involved hpet interrupts can never be force threaded. Reported-and-tested-by: Sergey Senozhatsky Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Sergey Senozhatsky Cc: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 932348fbb6ea..9512529e8eab 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } -- GitLab From 0bf8f6e7205ab20884dcf249f59ae8a9309f5c98 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Feb 2017 14:32:41 +1100 Subject: [PATCH 0225/1398] powerpc/64: Invalidate process table caching after setting process table [ Upstream commit 7a70d7288c926ae88e0c773fbb506aa374e99c2d ] The POWER9 MMU reads and caches entries from the process table. When we kexec from one kernel to another, the second kernel sets its process table pointer but doesn't currently do anything to make the CPU invalidate any cached entries from the old process table. This adds a tlbie (TLB invalidate entry) instruction with parameters to invalidate caching of the process table after the new process table is installed. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/pgtable-radix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 9a25dce87875..44c33ee397a0 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -173,6 +173,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) -- GitLab From cd662c8e51a1056407f851a02c08e1a74a3f9d0c Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Sun, 26 Feb 2017 11:38:39 +0530 Subject: [PATCH 0226/1398] selftest/powerpc: Fix false failures for skipped tests [ Upstream commit a6d8a21596df041f36f4c2ccc260c459e3e851f1 ] Tests under alignment subdirectory are skipped when executed on previous generation hardware, but harness still marks them as failed. test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [FAIL] The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till the program exit which causes the test to be marked as failed. This patch resets the value before returning to the main() routine. With this patch the test o/p is as follows: test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [PASS] Signed-off-by: Sachin Sant Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/harness.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; -- GitLab From 7ae7408c4352747e0f5e4fde32bf1c692e38baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2016 13:46:20 +1100 Subject: [PATCH 0227/1398] powerpc: Fix compiling a BE kernel with a powerpc64le toolchain [ Upstream commit 4dc831aa88132f835cefe876aa0206977c4d7710 ] GCC can compile with either endian, but the default ABI version is set based on the default endianness of the toolchain. Alan Modra says: you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc generate powerpc64 code The opposite is true for powerpc64 when generating -mlittle it requires -mabi=elfv2 to generate v2 ABI, which we were already doing. This change adds ABI annotations together with endianness for all cases, LE and BE. This fixes the case of building a BE kernel with a toolchain that is LE by default. Signed-off-by: Nicholas Piggin Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 617dece67924..a60c9c6e5cc1 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) -- GitLab From f348a1030eb6d1d1f569a316667719c2280f14e4 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 13 Feb 2017 10:35:44 -0200 Subject: [PATCH 0228/1398] lirc: fix dead lock between open and wakeup_filter [ Upstream commit db5b15b74ed9a5c04bb808d18ffa2c773f5c18c0 ] The locking in lirc needs improvement, but for now just fix this potential deadlock. ====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-rc1+ #1 Not tainted ------------------------------------------------------- bash/2502 is trying to acquire lock: (ir_raw_handler_lock){+.+.+.}, at: [] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] but task is already holding lock: (&dev->lock){+.+.+.}, at: [] store_filter+0x9f/0x240 [rc_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&dev->lock){+.+.+.}: [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] rc_open+0x2a/0x80 [rc_core] [] lirc_dev_fop_open+0xda/0x1e0 [lirc_dev] [] chrdev_open+0xb0/0x210 [] do_dentry_open+0x20a/0x2f0 [] vfs_open+0x4c/0x80 [] path_openat+0x5bc/0xc00 [] do_filp_open+0x91/0x100 [] do_sys_open+0x130/0x220 [] SyS_open+0x1e/0x20 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #1 (lirc_dev_lock){+.+.+.}: [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] lirc_register_driver+0x67/0x59b [lirc_dev] [] ir_lirc_register+0x1f4/0x260 [ir_lirc_codec] [] ir_raw_handler_register+0x7c/0xb0 [rc_core] [] 0xffffffffc0398010 [] do_one_initcall+0x52/0x1b0 [] do_init_module+0x5f/0x1fa [] load_module+0x2675/0x2b00 [] SYSC_finit_module+0xdf/0x110 [] SyS_finit_module+0xe/0x10 [] do_syscall_64+0x6c/0x1f0 [] return_from_SYSCALL_64+0x0/0x7a -> #0 (ir_raw_handler_lock){+.+.+.}: [] __lock_acquire+0x10f7/0x1290 [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] [] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] [] store_filter+0x1aa/0x240 [rc_core] [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x45/0x60 [] kernfs_fop_write+0x155/0x1e0 [] __vfs_write+0x37/0x160 [] vfs_write+0xc8/0x1e0 [] SyS_write+0x58/0xc0 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 other info that might help us debug this: Chain exists of: ir_raw_handler_lock --> lirc_dev_lock --> &dev->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->lock); lock(lirc_dev_lock); lock(&dev->lock); lock(ir_raw_handler_lock); *** DEADLOCK *** 4 locks held by bash/2502: #0: (sb_writers#4){.+.+.+}, at: [] vfs_write+0x195/0x1e0 #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x11f/0x1e0 #2: (s_active#215){.+.+.+}, at: [] kernfs_fop_write+0x128/0x1e0 #3: (&dev->lock){+.+.+.}, at: [] store_filter+0x9f/0x240 [rc_core] stack backtrace: CPU: 3 PID: 2502 Comm: bash Not tainted 4.10.0-rc1+ #1 Hardware name: /DG45ID, BIOS IDG4510H.86A.0135.2011.0225.1100 02/25/2011 Call Trace: dump_stack+0x86/0xc3 print_circular_bug+0x1be/0x210 __lock_acquire+0x10f7/0x1290 lock_acquire+0xfd/0x200 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] mutex_lock_nested+0x77/0x6d0 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? loop_set_wakeup_filter+0x44/0xbd [rc_loopback] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] ? loop_set_tx_duty_cycle+0x70/0x70 [rc_loopback] store_filter+0x1aa/0x240 [rc_core] dev_attr_store+0x18/0x30 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x155/0x1e0 __vfs_write+0x37/0x160 ? rcu_read_lock_sched_held+0x4a/0x80 ? rcu_sync_lockdep_assert+0x2f/0x60 ? __sb_start_write+0x10c/0x220 ? vfs_write+0x195/0x1e0 ? security_file_permission+0x3b/0xc0 vfs_write+0xc8/0x1e0 SyS_write+0x58/0xc0 entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/lirc_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 6ebe89551961..f4509ef9922b 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -446,6 +446,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) return -ERESTARTSYS; ir = irctls[iminor(inode)]; + mutex_unlock(&lirc_dev_lock); + if (!ir) { retval = -ENODEV; goto error; @@ -486,8 +488,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) } error: - mutex_unlock(&lirc_dev_lock); - nonseekable_open(inode, file); return retval; -- GitLab From d57cb693c592d5148551b3ac858f14eb9e4812f3 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 1 Mar 2017 14:04:53 -0800 Subject: [PATCH 0229/1398] module: set __jump_table alignment to 8 [ Upstream commit ab42632156becd35d3884ee5c14da2bedbf3149a ] For powerpc the __jump_table section in modules is not aligned, this causes a WARN_ON() splat when loading a module containing a __jump_table. Strict alignment became necessary with commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key"), currently in linux-next, which uses the two least significant bits of pointers to __jump_table elements. Fix by forcing __jump_table to 8, which is the same alignment used for this section in the kernel proper. Link: http://lkml.kernel.org/r/20170301220453.4756-1-david.daney@cavium.com Reviewed-by: Jason Baron Acked-by: Jessica Yu Acked-by: Michael Ellerman (powerpc) Tested-by: Sachin Sant Signed-off-by: David Daney Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/module-common.lds | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 73a2c7da0e55..53234e85192a 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -19,4 +19,6 @@ SECTIONS { . = ALIGN(8); .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + + __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } } -- GitLab From 4aff028047156e58b08b06346a19063c723c565a Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 4 Feb 2017 17:03:40 +0800 Subject: [PATCH 0230/1398] powerpc/64: Fix checksum folding in csum_add() [ Upstream commit 6ad966d7303b70165228dba1ee8da1a05c10eefe ] Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold") missed a case in csum_add(). Fix it. Signed-off-by: Shile Zhang Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1e8fceb308a5..a67bb09585f4 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -100,7 +100,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" -- GitLab From d3954c5ced8eb8c8548312f49c9bb28717812057 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 4 Mar 2017 07:02:10 -0800 Subject: [PATCH 0231/1398] ARM: OMAP2+: Fix device node reference counts [ Upstream commit 10e5778f54765c96fe0c8f104b7a030e5b35bc72 ] After commit 0549bde0fcb1 ("of: fix of_node leak caused in of_find_node_opts_by_path"), the following error may be reported when running omap images. OF: ERROR: Bad of_node_put() on /ocp@68000000 CPU: 0 PID: 0 Comm: swapper Not tainted 4.10.0-rc7-next-20170210 #1 Hardware name: Generic OMAP3-GP (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xac) [] (dump_stack) from [] (kobject_release+0x48/0x7c) [] (kobject_release) from [] (of_find_node_by_name+0x74/0x94) [] (of_find_node_by_name) from [] (omap3xxx_hwmod_is_hs_ip_block_usable+0x24/0x2c) [] (omap3xxx_hwmod_is_hs_ip_block_usable) from [] (omap3xxx_hwmod_init+0x180/0x274) [] (omap3xxx_hwmod_init) from [] (omap3_init_early+0xa0/0x11c) [] (omap3_init_early) from [] (omap3430_init_early+0x8/0x30) [] (omap3430_init_early) from [] (setup_arch+0xc04/0xc34) [] (setup_arch) from [] (start_kernel+0x68/0x38c) [] (start_kernel) from [<8020807c>] (0x8020807c) of_find_node_by_name() drops the reference to the passed device node. The commit referenced above exposes this problem. To fix the problem, use of_get_child_by_name() instead of of_find_node_by_name(); of_get_child_by_name() does not drop the reference count of passed device nodes. While semantically different, we only look for immediate children of the passed device node, so of_get_child_by_name() is a more appropriate function to use anyway. Release the reference to the device node obtained with of_get_child_by_name() after it is no longer needed to avoid another device node leak. While at it, clean up the code and change the return type of omap3xxx_hwmod_is_hs_ip_block_usable() to bool to match its use and the return type of of_device_is_available(). Cc: Qi Hou Cc: Peter Rosin Cc: Rob Herring Signed-off-by: Guenter Roeck Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 1cc4a6f3954e..483658d86f80 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3828,16 +3828,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = { * Return: 0 if device named @dev_name is not likely to be accessible, * or 1 if it is likely to be accessible. */ -static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, - const char *dev_name) +static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, + const char *dev_name) { + struct device_node *node; + bool available; + if (!bus) - return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0; + return omap_type() == OMAP2_DEVICE_TYPE_GP; - if (of_device_is_available(of_find_node_by_name(bus, dev_name))) - return 1; + node = of_get_child_by_name(bus, dev_name); + available = of_device_is_available(node); + of_node_put(node); - return 0; + return available; } int __init omap3xxx_hwmod_init(void) -- GitLab From 202c73946455e29db412cd594c0d7e47efcb28fa Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 4 Mar 2017 07:02:11 -0800 Subject: [PATCH 0232/1398] ARM: OMAP2+: Release device node after it is no longer needed. [ Upstream commit b92675d998a9fa37fe9e0e35053a95b4a23c158b ] The device node returned by of_find_node_by_name() needs to be released after it is no longer needed to avoid a device node leak. Signed-off-by: Guenter Roeck Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 483658d86f80..bca54154e14f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3910,15 +3910,20 @@ int __init omap3xxx_hwmod_init(void) if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) { r = omap_hwmod_register_links(h_sham); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) { r = omap_hwmod_register_links(h_aes); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } + of_node_put(bus); /* * Register hwmod links specific to certain ES levels of a -- GitLab From c7c8667117996129fb14d10e99c5f45d9946c670 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 3 Mar 2017 04:25:09 +0000 Subject: [PATCH 0233/1398] ASoC: rcar: avoid SSI_MODEx settings for SSI8 [ Upstream commit 4b30eebfc35c67771b5f58d9274d3e321b72d7a8 ] SSI8 is is sharing pin with SSI7, and nothing to do for SSI_MODEx. It is special pin and it needs special settings whole system, but we can't confirm it, because we never have SSI8 available board. This patch fixup SSI_MODEx settings error for SSI8 on connection test, but should be confirmed behavior on real board in the future. Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/ssiu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 6f9b388ec5a8..3f95d6b88f8c 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -44,7 +44,11 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod, mask1 = (1 << 4) | (1 << 20); /* mask sync bit */ mask2 = (1 << 4); /* mask sync bit */ val1 = val2 = 0; - if (rsnd_ssi_is_pin_sharing(io)) { + if (id == 8) { + /* + * SSI8 pin is sharing with SSI7, nothing to do. + */ + } else if (rsnd_ssi_is_pin_sharing(io)) { int shift = -1; switch (id) { -- GitLab From 1bc827aabc70aa14b8a8be0b5b53901edb3b3daa Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Mon, 20 Feb 2017 09:41:45 +0800 Subject: [PATCH 0234/1398] gpio: altera: Use handle_level_irq when configured as a level_high [ Upstream commit f759921cfbf4847319d197a6ed7c9534d593f8bc ] When a threaded irq handler is chained attached to one of the gpio pins when configure for level irq the altera_gpio_irq_leveL_high_handler does not mask the interrupt while being handled by the chained irq. This resulting in the threaded irq not getting enough cycles to complete quickly enough before the irq was disabled as faulty. handle_level_irq should be used in this situation instead of handle_simple_irq. In gpiochip_irqchip_add set default handler to handle_bad_irq as per Documentation/gpio/driver.txt. Then set the correct handler in the set_type callback. Signed-off-by: Phil Reid Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-altera.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 5bddbd507ca9..3fe6a21e05a5 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d, altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); - if (type == IRQ_TYPE_NONE) + if (type == IRQ_TYPE_NONE) { + irq_set_handler_locked(d, handle_bad_irq); return 0; - if (type == IRQ_TYPE_LEVEL_HIGH && - altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) - return 0; - if (type == IRQ_TYPE_EDGE_RISING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) - return 0; - if (type == IRQ_TYPE_EDGE_FALLING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING) - return 0; - if (type == IRQ_TYPE_EDGE_BOTH && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH) + } + if (type == altera_gc->interrupt_trigger) { + if (type == IRQ_TYPE_LEVEL_HIGH) + irq_set_handler_locked(d, handle_level_irq); + else + irq_set_handler_locked(d, handle_simple_irq); return 0; - + } + irq_set_handler_locked(d, handle_bad_irq); return -EINVAL; } @@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } - static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; @@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->interrupt_trigger = reg; ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(&pdev->dev, "could not add irqchip\n"); -- GitLab From 881b5d4841fc0d37d0d7362182d0819db927b712 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Fri, 17 Feb 2017 07:40:52 -0600 Subject: [PATCH 0235/1398] HID: chicony: Add support for another ASUS Zen AiO keyboard [ Upstream commit f2f10b7e722a75c6d75a7f7cd06b0eee3ae20f7c ] Add support for media keys on the keyboard that comes with the Asus V221ID and ZN241IC All In One computers. The keys to support here are WLAN, BRIGHTNESSDOWN and BRIGHTNESSUP. This device is not visibly branded as Chicony, and the USB Vendor ID suggests that it is a JESS device. However this seems like the right place to put it: the usage codes are identical to the currently supported devices, and this driver already supports the ASUS AIO keyboard AK1D. Signed-off-by: Daniel Drake Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/Kconfig | 4 ++-- drivers/hid/hid-chicony.c | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index cd4599c0523b..db607d51ee2b 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -175,11 +175,11 @@ config HID_CHERRY Support for Cherry Cymotion keyboard. config HID_CHICONY - tristate "Chicony Tactical pad" + tristate "Chicony devices" depends on HID default !EXPERT ---help--- - Support for Chicony Tactical pad. + Support for Chicony Tactical pad and special keys on Chicony keyboards. config HID_CORSAIR tristate "Corsair devices" diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index bc3cec199fee..f04ed9aabc3f 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { } }; MODULE_DEVICE_TABLE(hid, ch_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 4f3f5749b0c1..bdde8859e191 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1906,6 +1906,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 08fd3f831d62..433d5f675c03 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -558,6 +558,7 @@ #define USB_VENDOR_ID_JESS 0x0c45 #define USB_DEVICE_ID_JESS_YUREX 0x1010 +#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112 #define USB_VENDOR_ID_JESS2 0x0f30 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 -- GitLab From 59682e70c98054eb8f97e3670625992a0c85d24e Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 28 Feb 2017 10:55:30 +0000 Subject: [PATCH 0236/1398] usb: gadget: configs: plug memory leak [ Upstream commit 38355b2a44776c25b0f2ad466e8c51bb805b3032 ] When binding a gadget to a device, "name" is stored in gi->udc_name, but this does not happen when unregistering and the string is leaked. Signed-off-by: John Keeping Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 502a096fc380..a5ca409dc97e 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, ret = unregister_gadget(gi); if (ret) goto err; + kfree(name); } else { if (gi->composite.gadget_driver.udc_name) { ret = -EBUSY; -- GitLab From 8fe9ea81b1e9b8ad1b7b53a8c4295e4f4c47464d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Feb 2017 22:33:11 +0100 Subject: [PATCH 0237/1398] USB: gadgetfs: Fix a potential memory leak in 'dev_config()' [ Upstream commit b6e7aeeaf235901c42ec35de4633c7c69501d303 ] 'kbuf' is allocated just a few lines above using 'memdup_user()'. If the 'if (dev->buf)' test fails, this memory is never released. Signed-off-by: Christophe JAILLET Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index f69dbd4bcd18..b8534d3f8bb0 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1819,8 +1819,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; - if (dev->buf) + if (dev->buf) { + kfree(kbuf); goto fail; + } dev->buf = kbuf; /* full or low speed config */ -- GitLab From 7abf66d78e89800eb3a5759bf7592b6e53721c20 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Feb 2017 14:16:26 +0200 Subject: [PATCH 0238/1398] usb: dwc3: gadget: Fix system suspend/resume on TI platforms [ Upstream commit 1551e35ea4189c1f7199fe278395fc94196715f2 ] On TI platforms (dra7, am437x), the DWC3_DSTS_DEVCTRLHLT bit is not set after the device controller is stopped via DWC3_DCTL_RUN_STOP. If we don't disconnect and stop the gadget, it stops working after a system resume with the trace below. There is no point in preventing gadget disconnect and gadget stop during system suspend/resume as we're going to suspend in any case, whether DEVCTRLHLT timed out or not. [ 141.727480] ------------[ cut here ]------------ [ 141.732349] WARNING: CPU: 1 PID: 2135 at drivers/usb/dwc3/gadget.c:2384 dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3] [ 141.744299] Modules linked in: usb_f_ss_lb g_zero libcomposite xhci_plat_hcd xhci_hcd usbcore dwc3 evdev udc_core m25p80 usb_common spi_nor snd_soc_davinci_mcasp snd_soc_simple_card snd_soc_edma snd_soc_tlv3e [ 141.792163] CPU: 1 PID: 2135 Comm: irq/456-dwc3 Not tainted 4.10.0-rc8 #1138 [ 141.799547] Hardware name: Generic DRA74X (Flattened Device Tree) [ 141.805940] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 141.814066] [] (show_stack) from [] (dump_stack+0xac/0xe0) [ 141.821648] [] (dump_stack) from [] (__warn+0xd8/0x104) [ 141.828955] [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [ 141.836902] [] (warn_slowpath_null) from [] (dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3]) [ 141.848329] [] (dwc3_stop_active_transfer.constprop.4 [dwc3]) from [] (__dwc3_gadget_ep_disable+0x64/0x528 [dwc3]) [ 141.861034] [] (__dwc3_gadget_ep_disable [dwc3]) from [] (dwc3_gadget_ep_disable+0x3c/0xc8 [dwc3]) [ 141.872280] [] (dwc3_gadget_ep_disable [dwc3]) from [] (usb_ep_disable+0x11c/0x18c [udc_core]) [ 141.883160] [] (usb_ep_disable [udc_core]) from [] (disable_ep+0x18/0x54 [usb_f_ss_lb]) [ 141.893408] [] (disable_ep [usb_f_ss_lb]) from [] (disable_endpoints+0x18/0x50 [usb_f_ss_lb]) [ 141.904168] [] (disable_endpoints [usb_f_ss_lb]) from [] (disable_source_sink+0x2c/0x34 [usb_f_ss_lb]) [ 141.915771] [] (disable_source_sink [usb_f_ss_lb]) from [] (reset_config+0x48/0x7c [libcomposite]) [ 141.927012] [] (reset_config [libcomposite]) from [] (composite_disconnect+0x2c/0x54 [libcomposite]) [ 141.938444] [] (composite_disconnect [libcomposite]) from [] (usb_gadget_udc_reset+0x10/0x34 [udc_core]) [ 141.950237] [] (usb_gadget_udc_reset [udc_core]) from [] (dwc3_gadget_reset_interrupt+0x64/0x698 [dwc3]) [ 141.962022] [] (dwc3_gadget_reset_interrupt [dwc3]) from [] (dwc3_thread_interrupt+0x618/0x1a3c [dwc3]) [ 141.973723] [] (dwc3_thread_interrupt [dwc3]) from [] (irq_thread_fn+0x1c/0x54) [ 141.983215] [] (irq_thread_fn) from [] (irq_thread+0x120/0x1f0) [ 141.991247] [] (irq_thread) from [] (kthread+0xf8/0x138) [ 141.998641] [] (kthread) from [] (ret_from_fork+0x14/0x24) [ 142.006213] ---[ end trace b4ecfe9f175b9a9c ]--- Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c61ddbf94bc7..16c67120d72b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3092,15 +3092,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc) int dwc3_gadget_suspend(struct dwc3 *dwc) { - int ret; - if (!dwc->gadget_driver) return 0; - ret = dwc3_gadget_run_stop(dwc, false, false); - if (ret < 0) - return ret; - + dwc3_gadget_run_stop(dwc, false, false); dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); -- GitLab From 82f79a4423fc5f4ab16acb0170aeb06222e1c13b Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Fri, 24 Feb 2017 02:54:56 +0100 Subject: [PATCH 0239/1398] usb: gadget: pxa27x: Test for a valid argument pointer [ Upstream commit df7545719a14fa7b481896fb8689e23d0a00f682 ] A call usb_put_phy(udc->transceiver) must be tested for a valid pointer. Use an already existing test for usb_unregister_notifier call. Acked-by: Robert Jarzmik Reported-by: Robert Jarzmik Signed-off-by: Petr Cvek Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/pxa27x_udc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index 7fa60f5b7ae4..afd6b86458c5 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev) usb_del_gadget_udc(&udc->gadget); pxa_cleanup_debugfs(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) + if (!IS_ERR_OR_NULL(udc->transceiver)) { usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy); - usb_put_phy(udc->transceiver); + usb_put_phy(udc->transceiver); + } udc->transceiver = NULL; the_controller = NULL; -- GitLab From 2afb2d892c1d54e560c0dc047919bff6129baa1a Mon Sep 17 00:00:00 2001 From: Raz Manor Date: Thu, 9 Feb 2017 09:41:08 +0200 Subject: [PATCH 0240/1398] usb: gadget: udc: net2280: Fix tmp reusage in net2280 driver [ Upstream commit ef5e2fa9f65befa12f1113c734602d2c1964d2a5 ] In the function scan_dma_completions() there is a reusage of tmp variable. That coused a wrong value being used in some case when reading a short packet terminated transaction from an endpoint, in 2 concecutive reads. This was my logic for the patch: The req->td->dmadesc equals to 0 iff: -- There was a transaction ending with a short packet, and -- The read() to read it was shorter than the transaction length, and -- The read() to complete it is longer than the residue. I believe this is true from the printouts of various cases, but I can't be positive it is correct. Entering this if, there should be no more data in the endpoint (a short packet terminated the transaction). If there is, the transaction wasn't really done and we should exit and wait for it to finish entirely. That is the inner if. That inner if should never happen, but it is there to be on the safe side. That is why it is marked with the comment /* paranoia */. The size of the data available in the endpoint is ep->dma->dmacount and it is read to tmp. This entire clause is based on my own educated guesses. If we passed that inner if without breaking in the original code, than tmp & DMA_BYTE_MASK_COUNT== 0. That means we will always pass dma bytes count of 0 to dma_done(), meaning all the requested bytes were read. dma_done() reports back to the upper layer that the request (read()) was done and how many bytes were read. In the original code that would always be the request size, regardless of the actual size of the data. That did not make sense to me at all. However, the original value of tmp is req->td->dmacount, which is the dmacount value when the request's dma transaction was finished. And that is a much more reasonable value to report back to the caller. To recreate the problem: Read from a bulk out endpoint in a loop, 1024 * n bytes in each iteration. Connect the PLX to a host you can control. Send to that endpoint 1024 * n + x bytes, such that 0 < x < 1024 * n and (x % 1024) != 0 You would expect the first read() to return 1024 * n and the second read() to return x. But you will get the first read to return 1024 * n and the second one to return 1024 * n. That is true for every positive integer n. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Raz Manor Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/net2280.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 33f3987218f7..d133252ef2c3 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep) */ while (!list_empty(&ep->queue)) { struct net2280_request *req; - u32 tmp; + u32 req_dma_count; req = list_entry(ep->queue.next, struct net2280_request, queue); if (!req->valid) break; rmb(); - tmp = le32_to_cpup(&req->td->dmacount); - if ((tmp & BIT(VALID_BIT)) != 0) + req_dma_count = le32_to_cpup(&req->td->dmacount); + if ((req_dma_count & BIT(VALID_BIT)) != 0) break; /* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short" @@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep) */ if (unlikely(req->td->dmadesc == 0)) { /* paranoia */ - tmp = readl(&ep->dma->dmacount); - if (tmp & DMA_BYTE_COUNT_MASK) + u32 const ep_dmacount = readl(&ep->dma->dmacount); + + if (ep_dmacount & DMA_BYTE_COUNT_MASK) break; /* single transfer mode */ - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; break; } else if (!ep->is_in && (req->req.length % ep->ep.maxpacket) && !(ep->dev->quirks & PLX_PCIE)) { - tmp = readl(&ep->regs->ep_stat); + u32 const ep_stat = readl(&ep->regs->ep_stat); /* AVOID TROUBLE HERE by not issuing short reads from * your gadget driver. That helps avoids errata 0121, * 0122, and 0124; not all cases trigger the warning. */ - if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) { + if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) { ep_warn(ep->dev, "%s lost packet sync!\n", ep->ep.name); req->req.status = -EOVERFLOW; } else { - tmp = readl(&ep->regs->ep_avail); - if (tmp) { + u32 const ep_avail = readl(&ep->regs->ep_avail); + if (ep_avail) { /* fifo gets flushed later */ ep->out_overflow = 1; ep_dbg(ep->dev, "%s dma, discard %d len %d\n", - ep->ep.name, tmp, + ep->ep.name, ep_avail, req->req.length); req->req.status = -EOVERFLOW; } } } - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; } -- GitLab From 29deec403d5614e67443cf9bf6ca3260c4f97d8a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 2 Mar 2017 12:41:48 -0800 Subject: [PATCH 0241/1398] kvm: nVMX: VMCLEAR should not cause the vCPU to shut down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 587d7e72aedca91cee80c0a56811649c3efab765 ] VMCLEAR should silently ignore a failure to clear the launch state of the VMCS referenced by the operand. Signed-off-by: Jim Mattson [Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".] Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2e65760e546f..6b15b48d905f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7203,9 +7203,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7216,22 +7215,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); -- GitLab From da6a95b0ef8202ab541ece19c8b1844b1fadcd9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Mar 2017 15:26:54 -0500 Subject: [PATCH 0242/1398] libata: drop WARN from protocol error in ata_sff_qc_issue() [ Upstream commit 0580b762a4d6b70817476b90042813f8573283fa ] ata_sff_qc_issue() expects upper layers to never issue commands on a command protocol that it doesn't implement. While the assumption holds fine with the usual IO path, nothing filters based on the command protocol in the passthrough path (which was added later), allowing the warning to be tripped with a passthrough command with the right (well, wrong) protocol. Failing with AC_ERR_SYSTEM is the right thing to do anyway. Remove the unnecessary WARN. Reported-by: Dmitry Vyukov Link: http://lkml.kernel.org/r/CACT4Y+bXkvevNZU8uP6X0QVqsj6wNoUA_1exfTSOzc+SmUtMOA@mail.gmail.com Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-sff.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 051b6158d1b7..8d22acdf90f0 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1481,7 +1481,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) break; default: - WARN_ON_ONCE(1); return AC_ERR_SYSTEM; } -- GitLab From 757e1845d6c3e66da90c00e4e487d053b62ce646 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Mar 2017 15:33:42 -0500 Subject: [PATCH 0243/1398] workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq [ Upstream commit 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d ] If queue_delayed_work() gets called with NULL @wq, the kernel will oops asynchronuosly on timer expiration which isn't too helpful in tracking down the offender. This actually happened with smc. __queue_delayed_work() already does several input sanity checks synchronously. Add NULL @wq check. Reported-by: Dave Jones Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 296dcca77f33..181c2ad0cb54 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1506,6 +1506,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); -- GitLab From 1e7208d7592859b9ee15baa9fb884afe68116445 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 2 Mar 2017 17:14:47 -0800 Subject: [PATCH 0244/1398] scsi: qla2xxx: Fix ql_dump_buffer [ Upstream commit 23456565acf6d452e0368f7380aecd584c019c67 ] Recent printk changes for KERN_CONT cause this logging to be defectively emitted on multiple lines. Fix it. Also reduces object size a trivial amount. $ size drivers/scsi/qla2xxx/qla_dbg.o* text data bss dec hex filename 39125 0 0 39125 98d5 drivers/scsi/qla2xxx/qla_dbg.o.new 39164 0 0 39164 98fc drivers/scsi/qla2xxx/qla_dbg.o.old Signed-off-by: Joe Perches Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_dbg.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 658e4d15cb71..ce4ac769a9a2 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id, "%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size); ql_dbg(level, vha, id, "----- -----------------------------------------------\n"); - for (cnt = 0; cnt < size; cnt++, buf++) { - if (cnt % 16 == 0) - ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU); - printk(" %02x", *buf); - if (cnt % 16 == 15) - printk("\n"); + for (cnt = 0; cnt < size; cnt += 16) { + ql_dbg(level, vha, id, "%04x: ", cnt); + print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, + buf + cnt, min(16U, size - cnt), false); } - if (cnt % 16 != 0) - printk("\n"); } -- GitLab From d32c4dedcc6d989c63a2f31ac736c5f02e81f122 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:25 -0800 Subject: [PATCH 0245/1398] scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters [ Upstream commit 5d181531bc6169e19a02a27d202cf0e982db9d0e ] if REG_VPI fails, the driver was incorrectly issuing INIT_VFI (a SLI4 command) on a SLI3 adapter. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_els.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 4df3cdcf88ce..9c9563312a3d 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8185,11 +8185,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; spin_unlock_irq(shost->host_lock); - if (vport->port_type == LPFC_PHYSICAL_PORT - && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) - lpfc_issue_init_vfi(vport); - else + if (mb->mbxStatus == MBX_NOT_FINISHED) + break; + if ((vport->port_type == LPFC_PHYSICAL_PORT) && + !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) { + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_issue_init_vfi(vport); + else + lpfc_initial_flogi(vport); + } else { lpfc_initial_fdisc(vport); + } break; } } else { -- GitLab From 54e1ae1f21f5206eaf1e2558a93fb9644358dc65 Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Mon, 6 Mar 2017 14:41:06 +0100 Subject: [PATCH 0246/1398] irqchip/crossbar: Fix incorrect type of register size [ Upstream commit 4b9de5da7e120c7f02395da729f0ec77ce7a6044 ] The 'size' variable is unsigned according to the dt-bindings. As this variable is used as integer in other places, create a new variable that allows to fix the following sparse issue (-Wtypesign): drivers/irqchip/irq-crossbar.c:279:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:279:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:279:52: got int * Signed-off-by: Franck Demathieu Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-crossbar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 05bbf171df37..1070b7b959f2 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { int i, size, reserved = 0; - u32 max = 0, entry; + u32 max = 0, entry, reg_size; const __be32 *irqsr; int ret = -ENOMEM; @@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node) if (!cb->register_offsets) goto err_irq_map; - of_property_read_u32(node, "ti,reg-size", &size); + of_property_read_u32(node, "ti,reg-size", ®_size); - switch (size) { + switch (reg_size) { case 1: cb->write = crossbar_writeb; break; @@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node) continue; cb->register_offsets[i] = reserved; - reserved += size; + reserved += reg_size; } of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map); -- GitLab From c886f281b9e016f695df0a4d4aa91a2ead93869f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 6 Mar 2017 04:03:28 -0800 Subject: [PATCH 0247/1398] KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2f707d97982286b307ef2a9b034e19aabc1abb56 ] Reported by syzkaller: WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline] vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324 kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099 do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128 __msr_io arch/x86/kvm/x86.c:2577 [inline] msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614 kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497 kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683 SYSC_ioctl fs/ioctl.c:698 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689 entry_SYSCALL_64_fastpath+0x1f/0xc2 The syzkaller folks reported a nested_run_pending warning during userspace clear VMX capability which is exposed to L1 before. The warning gets thrown while doing (*(uint32_t*)0x20aecfe8 = (uint32_t)0x1); (*(uint32_t*)0x20aecfec = (uint32_t)0x0); (*(uint32_t*)0x20aecff0 = (uint32_t)0x3a); (*(uint32_t*)0x20aecff4 = (uint32_t)0x0); (*(uint64_t*)0x20aecff8 = (uint64_t)0x0); r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul, 0x20aecfe8ul, 0, 0, 0, 0, 0, 0); i.e. KVM_SET_MSR ioctl with struct kvm_msrs { .nmsrs = 1, .pad = 0, .entries = { {.index = MSR_IA32_FEATURE_CONTROL, .reserved = 0, .data = 0} } } The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to reset here. This patch resets the nested_run_pending since the CPU is going to be reset hence there should be nothing pending. Reported-by: Dmitry Vyukov Suggested-by: Radim Krčmář Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: David Hildenbrand Signed-off-by: Wanpeng Li Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6b15b48d905f..9aa62ab13ae8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10884,8 +10884,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } -- GitLab From 5e366aaec185c83157a126dad227a75e59899c62 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 20 Feb 2017 12:30:11 +0000 Subject: [PATCH 0248/1398] arm: KVM: Survive unknown traps from guests [ Upstream commit f050fe7a9164945dd1c28be05bf00e8cfb082ccf ] Currently we BUG() if we see a HSR.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise. While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently unallocated HSR EC encodings are reserved, and per ARM DDI 0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions. The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace. Cc: Dave Martin Cc: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/kvm/handle_exit.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 5f5defdad025..98d6de177b7a 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -208,6 +208,7 @@ #define HSR_EC_IABT_HYP (0x21) #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_EC_MAX (0x3f) #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 066b6d4508ce..42f5daf715d0 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", + hsr); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, @@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } -- GitLab From aafb72d2ed5579208191cfc3fb9425570c1303ce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 20 Feb 2017 12:30:12 +0000 Subject: [PATCH 0249/1398] arm64: KVM: Survive unknown traps from guests [ Upstream commit ba4dd156eabdca93501d92a980ba27fa5f4bbd27 ] Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise. While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page D7-1937, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions. The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace. Cc: Dave Martin Cc: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/handle_exit.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a204adf29f0a..85baadab02d3 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -125,7 +125,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -151,13 +163,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } -- GitLab From b1f71147a1883fff0535f1bd12fca3839e54aadc Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 16 Feb 2017 10:41:20 +0000 Subject: [PATCH 0250/1398] KVM: arm/arm64: VGIC: Fix command handling while ITS being disabled [ Upstream commit a5e1e6ca94a8cec51571fd62e3eaec269717969c ] The ITS spec says that ITS commands are only processed when the ITS is enabled (section 8.19.4, Enabled, bit[0]). Our emulation was not taking this into account. Fix this by checking the enabled state before handling CWRITER writes. On the other hand that means that CWRITER could advance while the ITS is disabled, and enabling it would need those commands to be processed. Fix this case as well by refactoring actual command processing and calling this from both the GITS_CWRITER and GITS_CTLR handlers. Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-its.c | 109 +++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 6af22fe9e082..ebcaf4641d2b 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) return ret; } -static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) @@ -1162,33 +1139,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, #define ITS_CMD_SIZE 32 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) -/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) { gpa_t cbaser; u64 cmd_buf[4]; - u32 reg; - if (!its) - return; - - mutex_lock(&its->cmd_lock); - - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) return; - } - its->cwriter = reg; cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { @@ -1208,6 +1168,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) its->creadr = 0; } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); mutex_unlock(&its->cmd_lock); } @@ -1288,6 +1276,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, *regptr = reg; } +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + its->enabled = !!(val & GITS_CTLR_ENABLE); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ { \ .reg_offset = off, \ -- GitLab From 1ace4dabf62472549744480d27f37d11c1a7cf9a Mon Sep 17 00:00:00 2001 From: "Blomme, Maarten" Date: Thu, 2 Mar 2017 13:08:36 +0100 Subject: [PATCH 0251/1398] spi_ks8995: fix "BUG: key accdaa28 not in .data!" [ Upstream commit 4342696df764ec65dcdfbd0c10d90ea52505f8ba ] Signed-off-by: Maarten Blomme Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/spi_ks8995.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 93ffedfa2994..aa7209d794f3 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -498,6 +498,7 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; + sysfs_attr_init(&ks->regs_attr.attr); err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr); if (err) { dev_err(&spi->dev, "unable to create sysfs file, err=%d\n", -- GitLab From c72c7f2442654d68d38d22cc5a36dd2644d2848b Mon Sep 17 00:00:00 2001 From: "Blomme, Maarten" Date: Thu, 2 Mar 2017 13:08:49 +0100 Subject: [PATCH 0252/1398] spi_ks8995: regs_size incorrect for some devices [ Upstream commit 239870f2a0ebf75cc8f6d987dc528c5243f93d69 ] Signed-off-by: Maarten Blomme Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/spi_ks8995.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index aa7209d794f3..1e2d4f1179da 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -491,8 +491,8 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; - ks->regs_attr.size = ks->chip->regs_size; memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); + ks->regs_attr.size = ks->chip->regs_size; err = ks8995_reset(ks); if (err) -- GitLab From ae6b3452faf741c4127288c71d6edd0f23d6b0bb Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:28 +0100 Subject: [PATCH 0253/1398] bnx2x: prevent crash when accessing PTP with interface down [ Upstream commit 466e8bf10ac104d96e1ea813e8126e11cb72ea20 ] It is possible to crash the kernel by accessing a PTP device while its associated bnx2x interface is down. Before the interface is brought up, the timecounter is not initialized, so accessing it results in NULL dereference. Fix it by checking if the interface is up. Use -ENETDOWN as the error code when the interface is down. -EFAULT in bnx2x_ptp_adjfreq() did not seem right. Tested using phc_ctl get/set/adj/freq commands. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 4febe60eadc2..dcc620549f78 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13735,7 +13735,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) if (!netif_running(bp->dev)) { DP(BNX2X_MSG_PTP, "PTP adjfreq called while the interface is down\n"); - return -EFAULT; + return -ENETDOWN; } if (ppb < 0) { @@ -13794,6 +13794,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP adjtime called while the interface is down\n"); + return -ENETDOWN; + } + DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta); timecounter_adjtime(&bp->timecounter, delta); @@ -13806,6 +13812,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns; + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP gettime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timecounter_read(&bp->timecounter); DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns); @@ -13821,6 +13833,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp, struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns; + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP settime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timespec64_to_ns(ts); DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns); -- GitLab From f73fcb25f4bba5c8df9de1693ea382f21f8f6818 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:30 +0100 Subject: [PATCH 0254/1398] bnx2x: fix possible overrun of VFPF multicast addresses array [ Upstream commit 22118d861cec5da6ed525aaf12a3de9bfeffc58f ] It is too late to check for the limit of the number of VF multicast addresses after they have already been copied to the req->multicast[] array, possibly overflowing it. Do the check before copying. Also fix the error path to not skip unlocking vf2pf_mutex. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index bfae300cf25f..c2d327d9dff0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) struct bnx2x *bp = netdev_priv(dev); struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; - int rc, i = 0; + int rc = 0, i = 0; struct netdev_hw_addr *ha; if (bp->state != BNX2X_STATE_OPEN) { @@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) /* Get Rx mode requested */ DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags); + /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */ + if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) { + DP(NETIF_MSG_IFUP, + "VF supports not more than %d multicast MAC addresses\n", + PFVF_MAX_MULTICAST_PER_VF); + rc = -EINVAL; + goto out; + } + netdev_for_each_mc_addr(ha, dev) { DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n", bnx2x_mc_addr(ha)); @@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) i++; } - /* We support four PFVF_MAX_MULTICAST_PER_VF mcast - * addresses tops - */ - if (i >= PFVF_MAX_MULTICAST_PER_VF) { - DP(NETIF_MSG_IFUP, - "VF supports not more than %d multicast MAC addresses\n", - PFVF_MAX_MULTICAST_PER_VF); - return -EINVAL; - } - req->n_multicast = i; req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED; req->vf_qid = 0; @@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) out: bnx2x_vfpf_finalize(bp, &req->first_tlv); - return 0; + return rc; } /* request pf to add a vlan for the vf */ -- GitLab From cdef3be8c7bb2caabfe9886ac88f60b0c1dcd370 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:31 +0100 Subject: [PATCH 0255/1398] bnx2x: fix detection of VLAN filtering feature for VF [ Upstream commit 83bd9eb8fc69cdd5135ed6e1f066adc8841800fd ] VFs are currently missing the VLAN filtering feature, because we were checking the PF's acquire response before actually performing the acquire. Fix it by setting the feature flag later when we have the PF response. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index dcc620549f78..5d958b5bb8b1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13293,17 +13293,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA; - /* VF with OLD Hypervisor or old PF do not support filtering */ if (IS_PF(bp)) { if (chip_is_e1x) bp->accept_any_vlan = true; else dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#ifdef CONFIG_BNX2X_SRIOV - } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#endif } + /* For VF we'll know whether to enable VLAN filtering after + * getting a response to CHANNEL_TLV_ACQUIRE from PF. + */ dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= NETIF_F_HIGHDMA; @@ -14006,6 +14004,14 @@ static int bnx2x_init_one(struct pci_dev *pdev, rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count); if (rc) goto init_one_freemem; + +#ifdef CONFIG_BNX2X_SRIOV + /* VF with OLD Hypervisor or old PF do not support filtering */ + if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } +#endif } /* Enable SRIOV if capability found in configuration space */ -- GitLab From d8f147ac75bbca8a6efadb0beb451f3717b1dd26 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:32 +0100 Subject: [PATCH 0256/1398] bnx2x: do not rollback VF MAC/VLAN filters we did not configure [ Upstream commit 78d5505432436516456c12abbe705ec8dee7ee2b ] On failure to configure a VF MAC/VLAN filter we should not attempt to rollback filters that we failed to configure with -EEXIST. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 8 +++++++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 3f77d0863543..c6e059119b22 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, /* Add/Remove the filter */ rc = bnx2x_config_vlan_mac(bp, &ramrod); - if (rc && rc != -EEXIST) { + if (rc == -EEXIST) + return 0; + if (rc) { BNX2X_ERR("Failed to %s %s\n", filter->add ? "add" : "delete", (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ? @@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, return rc; } + filter->applied = true; + return 0; } @@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf, BNX2X_ERR("Managed only %d/%d filters - rolling back\n", i, filters->count + 1); while (--i >= 0) { + if (!filters->filters[i].applied) + continue; filters->filters[i].add = !filters->filters[i].add; bnx2x_vf_mac_vlan_config(bp, vf, qid, &filters->filters[i], diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 7a6d406f4c11..888d0b6632e8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter { (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/ bool add; + bool applied; u8 *mac; u16 vid; }; -- GitLab From 742e67d2b7eec559e2dcec47970a3471b49f0b5e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Mar 2017 08:57:35 -0800 Subject: [PATCH 0257/1398] rds: tcp: Sequence teardown of listen and acceptor sockets to avoid races [ Upstream commit b21dd4506b71bdb9c5a20e759255cd2513ea7ebe ] Commit a93d01f5777e ("RDS: TCP: avoid bad page reference in rds_tcp_listen_data_ready") added the function rds_tcp_listen_sock_def_readable() to handle the case when a partially set-up acceptor socket drops into rds_tcp_listen_data_ready(). However, if the listen socket (rtn->rds_tcp_listen_sock) is itself going through a tear-down via rds_tcp_listen_stop(), the (*ready)() will be null and we would hit a panic of the form BUG: unable to handle kernel NULL pointer dereference at (null) IP: (null) : ? rds_tcp_listen_data_ready+0x59/0xb0 [rds_tcp] tcp_data_queue+0x39d/0x5b0 tcp_rcv_established+0x2e5/0x660 tcp_v4_do_rcv+0x122/0x220 tcp_v4_rcv+0x8b7/0x980 : In the above case, it is not fatal to encounter a NULL value for ready- we should just drop the packet and let the flush of the acceptor thread finish gracefully. In general, the tear-down sequence for listen() and accept() socket that is ensured by this commit is: rtn->rds_tcp_listen_sock = NULL; /* prevent any new accepts */ In rds_tcp_listen_stop(): serialize with, and prevent, further callbacks using lock_sock() flush rds_wq flush acceptor workq sock_release(listen socket) Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/rds/tcp.c | 15 ++++++++++----- net/rds/tcp.h | 2 +- net/rds/tcp_listen.c | 9 +++++++-- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 20e2923dc827..78f976d32018 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -478,9 +478,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net) * we do need to clean up the listen socket here. */ if (rtn->rds_tcp_listen_sock) { - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + struct socket *lsock = rtn->rds_tcp_listen_sock; + rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); } } @@ -517,10 +518,10 @@ static void rds_tcp_kill_sock(struct net *net) struct rds_tcp_connection *tc, *_tc; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -540,8 +541,12 @@ static void rds_tcp_kill_sock(struct net *net) void *rds_tcp_listen_sock_def_readable(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; + + if (!lsock) + return NULL; - return rtn->rds_tcp_listen_sock->sk->sk_user_data; + return lsock->sk->sk_user_data; } static int rds_tcp_dev_event(struct notifier_block *this, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9a1cc8906576..56ea6620fcf9 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ struct socket *rds_tcp_listen_init(struct net *); -void rds_tcp_listen_stop(struct socket *); +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 525b624fec8b..185a56b1e29c 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -227,6 +227,9 @@ void rds_tcp_listen_data_ready(struct sock *sk) * before it has been accepted and the accepter has set up their * data_ready.. we only want to queue listen work for our listening * socket + * + * (*ready)() may be null if we are racing with netns delete, and + * the listen socket is being torn down. */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); @@ -235,7 +238,8 @@ void rds_tcp_listen_data_ready(struct sock *sk) out: read_unlock_bh(&sk->sk_callback_lock); - ready(sk); + if (ready) + ready(sk); } struct socket *rds_tcp_listen_init(struct net *net) @@ -275,7 +279,7 @@ struct socket *rds_tcp_listen_init(struct net *net) return NULL; } -void rds_tcp_listen_stop(struct socket *sock) +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor) { struct sock *sk; @@ -296,5 +300,6 @@ void rds_tcp_listen_stop(struct socket *sock) /* wait for accepts to stop and close the socket */ flush_workqueue(rds_wq); + flush_work(acceptor); sock_release(sock); } -- GitLab From f3b1f93ed23b0e3bdd99c8304726041fc68b2055 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Sun, 5 Mar 2017 12:18:41 -0600 Subject: [PATCH 0258/1398] ibmvnic: Fix overflowing firmware/hardware TX queue [ Upstream commit 142c0ac445792c492579cb01f1cfd4e32e6dfcce ] Use a counter to track the number of outstanding transmissions sent that have not received completions. If the counter reaches the maximum number of queue entries, stop transmissions on that queue. As we receive more completions from firmware, wake the queue once the counter reaches an acceptable level. This patch prevents hardware/firmware TX queue from filling up and and generating errors. Since incorporating this fix, internal testing has reported that these firmware errors have stopped. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++++++++- drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b8778e7b1f79..2eec76e88ead 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_buff *tx_buff = NULL; + struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; @@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) int ret = 0; tx_pool = &adapter->tx_pool[queue_num]; + tx_scrq = adapter->tx_scrq[queue_num]; txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf-> @@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_BUSY; goto out; } + + atomic_inc(&tx_scrq->used); + + if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { + netdev_info(netdev, "Stopping queue %d\n", queue_num); + netif_stop_subqueue(netdev, queue_num); + } + tx_packets++; tx_bytes += skb->len; txq->trans_start = jiffies; @@ -1220,6 +1230,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); scrq->cur = 0; + atomic_set(&scrq->used, 0); scrq->rx_skb_top = NULL; spin_lock_init(&scrq->lock); @@ -1368,8 +1379,22 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, DMA_TO_DEVICE); } - if (txbuff->last_frag) + if (txbuff->last_frag) { + atomic_dec(&scrq->used); + + if (atomic_read(&scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + netif_subqueue_stopped(adapter->netdev, + txbuff->skb)) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + dev_kfree_skb_any(txbuff->skb); + } adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. producer_index] = index; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index dd775d951b73..892eda346e54 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue { spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; + atomic_t used; }; struct ibmvnic_long_term_buff { -- GitLab From f191e15e4acdd43d7476b3ad16a3fe58d948be07 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Sun, 5 Mar 2017 12:18:42 -0600 Subject: [PATCH 0259/1398] ibmvnic: Allocate number of rx/tx buffers agreed on by firmware [ Upstream commit 068d9f90a6978c3e3a662d9e85204a7d6be240d2 ] The amount of TX/RX buffers that the vNIC driver currently allocates is different from the amount agreed upon in negotiation with firmware. Correct that by allocating the requested number of buffers confirmed by firmware. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2eec76e88ead..7c6c1468628b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev) send_map_query(adapter); for (i = 0; i < rxadd_subcrqs; i++) { init_rx_pool(adapter, &adapter->rx_pool[i], - IBMVNIC_BUFFS_PER_POOL, i, + adapter->req_rx_add_entries_per_subcrq, i, be64_to_cpu(size_array[i]), 1); if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) { dev_err(dev, "Couldn't alloc rx pool\n"); @@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev) for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; tx_pool->tx_buff = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) goto tx_pool_alloc_failed; if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - adapter->max_tx_entries_per_subcrq * + adapter->req_tx_entries_per_subcrq * adapter->req_mtu)) goto tx_ltb_alloc_failed; tx_pool->free_map = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) goto tx_fm_alloc_failed; - for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++) + for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++) tx_pool->free_map[j] = j; tx_pool->consumer_index = 0; @@ -746,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->consumer_index = (tx_pool->consumer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; tx_buff = &tx_pool->tx_buff[index]; tx_buff->skb = skb; @@ -819,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (tx_pool->consumer_index == 0) tx_pool->consumer_index = - adapter->max_tx_entries_per_subcrq - 1; + adapter->req_tx_entries_per_subcrq - 1; else tx_pool->consumer_index--; @@ -1400,7 +1400,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, producer_index] = index; adapter->tx_pool[pool].producer_index = (adapter->tx_pool[pool].producer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; } /* remove tx_comp scrq*/ next->tx_comp.first = 0; -- GitLab From 05a59bc2f3c0e3414e45b0cc642e1548b8eb3e75 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 5 Mar 2017 12:34:53 -0800 Subject: [PATCH 0260/1398] ipv6: reorder icmpv6_init() and ip6_mr_init() [ Upstream commit 15e668070a64bb97f102ad9cf3bccbca0545cda8 ] Andrey reported the following kernel crash: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 14446 Comm: syz-executor6 Not tainted 4.10.0+ #82 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88001f311700 task.stack: ffff88001f6e8000 RIP: 0010:ip6mr_sk_done+0x15a/0x3d0 net/ipv6/ip6mr.c:1618 RSP: 0018:ffff88001f6ef418 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 1ffff10003edde8c RCX: ffffc900043ee000 RDX: 0000000000000004 RSI: ffffffff83e3b3f8 RDI: 0000000000000020 RBP: ffff88001f6ef508 R08: fffffbfff0dcc5d8 R09: 0000000000000000 R10: ffffffff86e62ec0 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88001f6ef4e0 R15: ffff8800380a0040 FS: 00007f7a52cec700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000061c500 CR3: 000000001f1ae000 CR4: 00000000000006f0 DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: rawv6_close+0x4c/0x80 net/ipv6/raw.c:1217 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:432 sock_release+0x8d/0x1e0 net/socket.c:597 __sock_create+0x39d/0x880 net/socket.c:1226 sock_create_kern+0x3f/0x50 net/socket.c:1243 inet_ctl_sock_create+0xbb/0x280 net/ipv4/af_inet.c:1526 icmpv6_sk_init+0x163/0x500 net/ipv6/icmp.c:954 ops_init+0x10a/0x550 net/core/net_namespace.c:115 setup_net+0x261/0x660 net/core/net_namespace.c:291 copy_net_ns+0x27e/0x540 net/core/net_namespace.c:396 9pnet_virtio: no channels available for device ./file1 create_new_namespaces+0x437/0x9b0 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] SyS_unshare+0x64e/0x1000 kernel/fork.c:2231 entry_SYSCALL_64_fastpath+0x1f/0xc2 This is because net->ipv6.mr6_tables is not initialized at that point, ip6mr_rules_init() is not called yet, therefore on the error path when we iterator the list, we trigger this oops. Fix this by reordering ip6mr_rules_init() before icmpv6_sk_init(). Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..8285a1c108c9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -909,12 +909,12 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - err = icmpv6_init(); - if (err) - goto icmp_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; + err = icmpv6_init(); + if (err) + goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1044,10 +1044,10 @@ static int __init inet6_init(void) ndisc_cleanup(); ndisc_fail: ip6_mr_cleanup(); -ipmr_fail: - icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); +ipmr_fail: + icmpv6_cleanup(); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); -- GitLab From e46126e51ee88600bd8dfb5b6e3710750c1ded2f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Mar 2017 19:14:07 +0200 Subject: [PATCH 0261/1398] crypto: s5p-sss - Fix completing crypto request in IRQ handler [ Upstream commit 07de4bc88ce6a4d898cad9aa4c99c1df7e87702d ] In a regular interrupt handler driver was finishing the crypt/decrypt request by calling complete on crypto request. This is disallowed since converting to skcipher in commit b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") and causes a warning: WARNING: CPU: 0 PID: 0 at crypto/skcipher.c:430 skcipher_walk_first+0x13c/0x14c The interrupt is marked shared but in fact there are no other users sharing it. Thus the simplest solution seems to be to just use a threaded interrupt handler, after converting it to oneshot. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/s5p-sss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index dce1af0ce85c..a668286d62cb 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -805,8 +805,9 @@ static int s5p_aes_probe(struct platform_device *pdev) dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; } - err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt, - IRQF_SHARED, pdev->name, pdev); + err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL, + s5p_aes_interrupt, IRQF_ONESHOT, + pdev->name, pdev); if (err < 0) { dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; -- GitLab From 567df7459e127ded8e50add7ab938335b2dfd92f Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Mon, 6 Mar 2017 15:20:51 -0500 Subject: [PATCH 0262/1398] i2c: riic: fix restart condition [ Upstream commit 2501c1bb054290679baad0ff7f4f07c714251f4c ] While modifying the driver to use the STOP interrupt, the completion of the intermediate transfers need to wake the driver back up in order to initiate the next transfer (restart condition). Otherwise you get never ending interrupts and only the first transfer sent. Fixes: 71ccea095ea1 ("i2c: riic: correctly finish transfers") Reported-by: Simon Horman Signed-off-by: Chris Brandt Tested-by: Simon Horman Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-riic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index 8f11d347b3ec..c811af4c8d81 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data) } if (riic->is_last || riic->err) { - riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER); + riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER); writeb(ICCR2_SP, riic->base + RIIC_ICCR2); + } else { + /* Transfer is complete, but do not send STOP */ + riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER); + complete(&riic->msg_done); } return IRQ_HANDLED; -- GitLab From bc8859174d984975d10f47c55ade762bbf46b5f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:13:59 +0800 Subject: [PATCH 0263/1398] blk-mq: initialize mq kobjects in blk_mq_init_allocated_queue() [ Upstream commit 737f98cfe7de8df7433a4d846850aa8efa44bd48 ] Both q->mq_kobj and sw queues' kobjects should have been initialized once, instead of doing that each add_disk context. Also this patch removes clearing of ctx in blk_mq_init_cpu_queues() because percpu allocator fills zero to allocated variable. This patch fixes one issue[1] reported from Omar. [1] kernel wearning when doing unbind/bind on one scsi-mq device [ 19.347924] kobject (ffff8800791ea0b8): tried to init an initialized object, something is seriously wrong. [ 19.349781] CPU: 1 PID: 84 Comm: kworker/u8:1 Not tainted 4.10.0-rc7-00210-g53f39eeaa263 #34 [ 19.350686] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-20161122_114906-anatol 04/01/2014 [ 19.350920] Workqueue: events_unbound async_run_entry_fn [ 19.350920] Call Trace: [ 19.350920] dump_stack+0x63/0x83 [ 19.350920] kobject_init+0x77/0x90 [ 19.350920] blk_mq_register_dev+0x40/0x130 [ 19.350920] blk_register_queue+0xb6/0x190 [ 19.350920] device_add_disk+0x1ec/0x4b0 [ 19.350920] sd_probe_async+0x10d/0x1c0 [sd_mod] [ 19.350920] async_run_entry_fn+0x48/0x150 [ 19.350920] process_one_work+0x1d0/0x480 [ 19.350920] worker_thread+0x48/0x4e0 [ 19.350920] kthread+0x101/0x140 [ 19.350920] ? process_one_work+0x480/0x480 [ 19.350920] ? kthread_create_on_node+0x60/0x60 [ 19.350920] ret_from_fork+0x2c/0x40 Cc: Omar Sandoval Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-sysfs.c | 4 +--- block/blk-mq.c | 4 +++- block/blk-mq.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 01fb455d3377..8c0894e0713b 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -429,7 +429,7 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -449,8 +449,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq.c b/block/blk-mq.c index 7b597ec4e9c5..10f8f94b7f20 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1707,7 +1707,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -1970,6 +1969,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) diff --git a/block/blk-mq.h b/block/blk-mq.h index e5d25249028c..c55bcf67b956 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -50,6 +50,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -- GitLab From 36a14374b5684ad2a09d9f89adef3ab5e114cf0d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 6 Mar 2017 11:23:35 +0100 Subject: [PATCH 0264/1398] zram: set physical queue limits to avoid array out of bounds accesses [ Upstream commit 0bc315381fe9ed9fb91db8b0e82171b645ac008f ] zram can handle at most SECTORS_PER_PAGE sectors in a bio's bvec. When using the NVMe over Fabrics loopback target which potentially sends a huge bulk of pages attached to the bio's bvec this results in a kernel panic because of array out of bounds accesses in zram_decompress_page(). Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Sergey Senozhatsky Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c9914d653968..b7c0b69a02f5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1286,6 +1286,8 @@ static int zram_add(void) blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; + zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); /* * zram_bio_discard() will clear all logical blocks if logical block -- GitLab From 8818eb851c2d9c9e503208f5b054e42b4a83fe24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Mar 2017 21:44:00 +0100 Subject: [PATCH 0265/1398] netfilter: don't track fragmented packets [ Upstream commit 7b4fdf77a450ec0fdcb2f677b080ddbf2c186544 ] Andrey reports syzkaller splat caused by NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); in ipv4 nat. But this assertion (and the comment) are wrong, this function does see fragments when IP_NODEFRAG setsockopt is used. As conntrack doesn't track packets without complete l4 header, only the first fragment is tracked. Because applying nat to first packet but not the rest makes no sense this also turns off tracking of all fragments. Reported-by: Andrey Konovalov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++++ net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 713c09a74b90..0c9ded247ebb 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -158,6 +158,10 @@ static unsigned int ipv4_conntrack_local(void *priv, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + + if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ + return NF_ACCEPT; + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index f8aad03d674b..6f5e8d01b876 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); - /* We never see fragments: conntrack defrags on pre-routing - * and local-out, and nf_nat_out protects post-routing. - */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to -- GitLab From e8d5b115c6ea552d720315b4d6413ea6650d9966 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 14:56:05 +0100 Subject: [PATCH 0266/1398] axonram: Fix gendisk handling [ Upstream commit 672a2c87c83649fb0167202342ce85af9a3b4f1c ] It is invalid to call del_gendisk() when disk->queue is NULL. Fix error handling in axon_ram_probe() to avoid doing that. Also del_gendisk() does not drop a reference to gendisk allocated by alloc_disk(). That has to be done by put_disk(). Add that call where needed. Reported-by: Dan Carpenter Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/axonram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ static int axon_ram_probe(struct platform_device *device) if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); -- GitLab From 92527dc32a69030dc3334e3da0bdb18023f2cefe Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Wed, 1 Mar 2017 15:53:29 +0800 Subject: [PATCH 0267/1398] drm/amd/amdgpu: fix console deadlock if late init failed [ Upstream commit c085bd5119d5d0bdf3ef591a5563566be7dedced ] Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e41d4baebf86..ce9797b6f9c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2020,8 +2020,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_late_init(adev); - if (r) + if (r) { + if (fbcon) + console_unlock(); return r; + } /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { -- GitLab From d28faebe8b091bec9bdb287b4ab029bee3a7d716 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Feb 2017 15:43:59 +1100 Subject: [PATCH 0268/1398] powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested [ Upstream commit 7aafac11e308d37ed3c509829bb43d80c1811ac3 ] The IODA2 specification says that a 64 DMA address cannot use top 4 bits (3 are reserved and one is a "TVE select"); bottom page_shift bits cannot be used for multilevel table addressing either. The existing IODA2 table allocation code aligns the minimum TCE table size to PAGE_SIZE so in the case of 64K system pages and 4K IOMMU pages, we have 64-4-12=48 bits. Since 64K page stores 8192 TCEs, i.e. needs 13 bits, the maximum number of levels is 48/13 = 3 so we physically cannot address more and EEH happens on DMA accesses. This adds a check that too many levels were requested. It is still possible to have 5 levels in the case of 4K system page size. Signed-off-by: Alexey Kardashevskiy Acked-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index dcdfee0cd4f2..f602307a4386 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2623,6 +2623,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, levels, tce_table_size, &offset, &total_allocated); -- GitLab From 62c12d671c49e15cd847380b7692e60711f73f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Wed, 8 Mar 2017 20:18:09 -0500 Subject: [PATCH 0269/1398] EDAC, i5000, i5400: Fix use of MTR_DRAM_WIDTH macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e61555c29c28a4a3b6ba6207f4a0883ee236004d ] The MTR_DRAM_WIDTH macro returns the data width. It is sometimes used as if it returned a boolean true if the width if 8. Fix the tests where MTR_DRAM_WIDTH is misused. Signed-off-by: Jérémy Lefaure Cc: linux-edac Link: http://lkml.kernel.org/r/20170309011809.8340-1-jeremy.lefaure@lse.epita.fr Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i5000_edac.c | 2 +- drivers/edac/i5400_edac.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 72e07e3cf718..2a09be5f4f86 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) dimm->mtype = MEM_FB_DDR2; /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) dimm->dtype = DEV_X8; else dimm->dtype = DEV_X4; diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 6ef6ad1ba16e..029dfe07b734 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) dimm->nr_pages = size_mb << 8; dimm->grain = 8; - dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ? + DEV_X8 : DEV_X4; dimm->mtype = MEM_FB_DDR2; /* * The eccc mechanism is SDDC (aka SECC), with * is similar to Chipkill. */ - dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ? EDAC_S8ECD8ED : EDAC_S4ECD4ED; ndimms++; } -- GitLab From 7290cfeb4436e62d74f38af4f733a0a35b09c4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Wed, 28 Jun 2017 20:57:29 -0400 Subject: [PATCH 0270/1398] EDAC, i5000, i5400: Fix definition of NRECMEMB register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8c8261425649da58bdf08221570e5335ad33a31 ] In the i5000 and i5400 drivers, the NRECMEMB register is defined as a 16-bit value, which results in wrong shifts in the code, as reported by sparse. In the datasheets ([1], section 3.9.22.20 and [2], section 3.9.22.21), this register is a 32-bit register. A u32 value for the register fixes the wrong shifts warnings and matches the datasheet. Also fix the mask to access to the CAS bits [27:16] in the i5000 driver. [1]: https://www.intel.com/content/dam/doc/datasheet/5000p-5000v-5000z-chipset-memory-controller-hub-datasheet.pdf [2]: https://www.intel.se/content/dam/doc/datasheet/5400-chipset-memory-controller-hub-datasheet.pdf Signed-off-by: Jérémy Lefaure Cc: linux-edac Link: http://lkml.kernel.org/r/20170629005729.8478-1-jeremy.lefaure@lse.epita.fr Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i5000_edac.c | 6 +++--- drivers/edac/i5400_edac.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 2a09be5f4f86..16e0eb523439 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -227,7 +227,7 @@ #define NREC_RDWR(x) (((x)>>11) & 1) #define NREC_RANK(x) (((x)>>8) & 0x7) #define NRECMEMB 0xC0 -#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define NREC_CAS(x) (((x)>>16) & 0xFFF) #define NREC_RAS(x) ((x) & 0x7FFF) #define NRECFGLOG 0xC4 #define NREEECFBDA 0xC8 @@ -371,7 +371,7 @@ struct i5000_error_info { /* These registers are input ONLY if there was a * Non-Recoverable Error */ u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ + u32 nrecmemb; /* Non-Recoverable Mem log B */ }; @@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci, NERR_FAT_FBD, &info->nerr_fat_fbd); pci_read_config_word(pvt->branchmap_werrors, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_dword(pvt->branchmap_werrors, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 029dfe07b734..2ea2f32e608b 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -368,7 +368,7 @@ struct i5400_error_info { /* These registers are input ONLY if there was a Non-Rec Error */ u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ + u32 nrecmemb; /* Non-Recoverable Mem log B */ }; @@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci, NERR_FAT_FBD, &info->nerr_fat_fbd); pci_read_config_word(pvt->branchmap_werrors, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_dword(pvt->branchmap_werrors, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ -- GitLab From 55042e28b9854816ee69ef00903b5a3f2339a64f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 15 Nov 2017 18:17:07 +0900 Subject: [PATCH 0271/1398] kbuild: pkg: use --transform option to prefix paths in tar [ Upstream commit 2dbc644ac62bbcb9ee78e84719953f611be0413d ] For rpm-pkg and deb-pkg, a source tar file is created. All paths in the archive must be prefixed with the base name of the tar so that everything is contained in the directory when you extract it. Currently, scripts/package/Makefile uses a symlink for that, and removes it after the tar is created. If you terminate the build during the tar creation, the symlink is left over. Then, at the next package build, you will see a warning like follows: ln: '.' and 'kernel-4.14.0+/.' are the same file It is possible to fix it by adding -n (--no-dereference) option to the "ln" command, but a cleaner way is to use --transform option of "tar" command. This option is GNU extension, but it should not hurt to use it in the Linux build system. The 'S' flag is needed to exclude symlinks from the path fixup. Without it, symlinks in the kernel are broken. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/package/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/package/Makefile b/scripts/package/Makefile index 71b4a8af9d4d..7badec3498b8 100644 --- a/scripts/package/Makefile +++ b/scripts/package/Makefile @@ -39,10 +39,9 @@ if test "$(objtree)" != "$(srctree)"; then \ false; \ fi ; \ $(srctree)/scripts/setlocalversion --save-scmversion; \ -ln -sf $(srctree) $(2); \ tar -cz $(RCS_TAR_IGNORE) -f $(2).tar.gz \ - $(addprefix $(2)/,$(TAR_CONTENT) $(3)); \ -rm -f $(2) $(objtree)/.scmversion + --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3); \ +rm -f $(objtree)/.scmversion # rpm-pkg # --------------------------------------------------------------------------- -- GitLab From b96d06e6d6e0aaef37e0cd79b7b0bfc79745fa50 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 14 Nov 2017 20:38:07 +0900 Subject: [PATCH 0272/1398] coccinelle: fix parallel build with CHECK=scripts/coccicheck [ Upstream commit d7059ca0147adcd495f3c5b41f260e1ac55bb679 ] The command "make -j8 C=1 CHECK=scripts/coccicheck" produces lots of "coccicheck failed" error messages. Julia Lawall explained the Coccinelle behavior as follows: "The problem on the Coccinelle side is that it uses a subdirectory with the name of the semantic patch to store standard output and standard error for the different threads. I didn't want to use a name with the pid, so that one could easily find this information while Coccinelle is running. Normally the subdirectory is cleaned up when Coccinelle completes, so there is only one of them at a time. Maybe it is best to just add the pid. There is the risk that these subdirectories will accumulate if Coccinelle crashes in a way such that they don't get cleaned up, but Coccinelle could print a warning if it detects this case, rather than failing." When scripts/coccicheck is used as CHECK tool and -j option is given to Make, the whole of build process runs in parallel. So, multiple processes try to get access to the same subdirectory. I notice spatch creates the subdirectory only when it runs in parallel (i.e. --jobs is given and is greater than 1). Setting NPROC=1 is a reasonable solution; spatch does not create the subdirectory. Besides, ONLINE=1 mode takes a single file input for each spatch invocation, so there is no reason to parallelize it in the first place. Signed-off-by: Masahiro Yamada Acked-by: Julia Lawall Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/coccicheck | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/coccicheck b/scripts/coccicheck index ec487b8e7051..c36b04b41686 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -29,12 +29,6 @@ else VERBOSE=0 fi -if [ -z "$J" ]; then - NPROC=$(getconf _NPROCESSORS_ONLN) -else - NPROC="$J" -fi - FLAGS="--very-quiet" # You can use SPFLAGS to append extra arguments to coccicheck or override any @@ -69,6 +63,9 @@ if [ "$C" = "1" -o "$C" = "2" ]; then # Take only the last argument, which is the C file to test shift $(( $# - 1 )) OPTIONS="$COCCIINCLUDE $1" + + # No need to parallelize Coccinelle since this mode takes one input file. + NPROC=1 else ONLINE=0 if [ "$KBUILD_EXTMOD" = "" ] ; then @@ -76,6 +73,12 @@ else else OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE" fi + + if [ -z "$J" ]; then + NPROC=$(getconf _NPROCESSORS_ONLN) + else + NPROC="$J" + fi fi if [ "$KBUILD_EXTMOD" != "" ] ; then -- GitLab From 0965ed575190d85b191cd8baa076121201d710d1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 10 Nov 2017 16:12:29 -0800 Subject: [PATCH 0273/1398] x86/mpx/selftests: Fix up weird arrays [ Upstream commit a6400120d042397675fcf694060779d21e9e762d ] The MPX hardware data structurse are defined in a weird way: they define their size in bytes and then union that with the type with which we want to access them. Yes, this is weird, but it does work. But, new GCC's complain that we are accessing the array out of bounds. Just make it a zero-sized array so gcc will stop complaining. There was not really a bug here. Signed-off-by: Dave Hansen Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171111001229.58A7933D@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/mpx-hw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h index 093c190178a9..28b3c7c553a4 100644 --- a/tools/testing/selftests/x86/mpx-hw.h +++ b/tools/testing/selftests/x86/mpx-hw.h @@ -51,14 +51,14 @@ struct mpx_bd_entry { union { char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; - void *contents[1]; + void *contents[0]; }; } __attribute__((packed)); struct mpx_bt_entry { union { char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; - unsigned long contents[1]; + unsigned long contents[0]; }; } __attribute__((packed)); -- GitLab From 6a8ab06660dccd42cdba409c3b1af045304a9ff7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Nov 2017 18:48:50 +0000 Subject: [PATCH 0274/1398] mac80211_hwsim: Fix memory leak in hwsim_new_radio_nl() [ Upstream commit 67bd52386125ce1159c0581cbcd2740addf33cd4 ] hwsim_new_radio_nl() now copies the name attribute in order to add a null-terminator. mac80211_hwsim_new_radio() (indirectly) copies it again into the net_device structure, so the first copy is not used or freed later. Free the first copy before returning. Fixes: ff4dd73dd2b4 ("mac80211_hwsim: check HWSIM_ATTR_RADIO_NAME length") Signed-off-by: Ben Hutchings Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d2a28a9d3209..4b462dc21c41 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3047,6 +3047,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_new_radio_params param = { 0 }; const char *hwname = NULL; + int ret; param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG]; param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE]; @@ -3086,7 +3087,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) param.regd = hwsim_world_regdom_custom[idx]; } - return mac80211_hwsim_new_radio(info, ¶m); + ret = mac80211_hwsim_new_radio(info, ¶m); + kfree(hwname); + return ret; } static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) -- GitLab From 2388d52d72561bd364ffbdf9f718fea2281f727d Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 17 Nov 2017 19:16:17 +0300 Subject: [PATCH 0275/1398] gre6: use log_ecn_error module parameter in ip6_tnl_rcv() [ Upstream commit 981542c526ecd846920bc500e9989da906ee9fb9 ] After commit 308edfdf1563 ("gre6: Cleanup GREv6 receive path, call common GRE functions") it's not used anywhere in the module, but previously was used in ip6gre_rcv(). Fixes: 308edfdf1563 ("gre6: Cleanup GREv6 receive path, call common GRE functions") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e9b14e3493f2..c46066c5dc27 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -461,7 +461,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) &ipv6h->saddr, &ipv6h->daddr, tpi->key, tpi->proto); if (tunnel) { - ip6_tnl_rcv(tunnel, skb, tpi, NULL, false); + ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); return PACKET_RCVD; } -- GitLab From 7c4fa0c1c9cae3890efe867bbf0055ed03da4228 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Nov 2017 14:27:18 +0800 Subject: [PATCH 0276/1398] route: also update fnhe_genid when updating a route cache [ Upstream commit cebe84c6190d741045a322f5343f717139993c08 ] Now when ip route flush cache and it turn out all fnhe_genid != genid. If a redirect/pmtu icmp packet comes and the old fnhe is found and all it's members but fnhe_genid will be updated. Then next time when it looks up route and tries to rebind this fnhe to the new dst, the fnhe will be flushed due to fnhe_genid != genid. It causes this redirect/pmtu icmp packet acutally not to be applied. This patch is to also reset fnhe_genid when updating a route cache. Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a5b7783932e..d2dbc15c8ec7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -630,9 +630,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; + u32 genid, hval; unsigned int i; int depth; - u32 hval = fnhe_hashfun(daddr); + + genid = fnhe_genid(dev_net(nh->nh_dev)); + hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); @@ -655,6 +658,8 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, } if (fnhe) { + if (fnhe->fnhe_genid != genid) + fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; if (pmtu) { @@ -679,7 +684,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } - fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; -- GitLab From 007e20bd0fddd4fee4c6d9f897acbca55066c23b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Nov 2017 14:27:06 +0800 Subject: [PATCH 0277/1398] route: update fnhe_expires for redirect when the fnhe exists [ Upstream commit e39d5246111399dbc6e11cd39fd8580191b86c47 ] Now when creating fnhe for redirect, it sets fnhe_expires for this new route cache. But when updating the exist one, it doesn't do it. It will cause this fnhe never to be expired. Paolo already noticed it before, in Jianlin's test case, it became even worse: When ip route flush cache, the old fnhe is not to be removed, but only clean it's members. When redirect comes again, this fnhe will be found and updated, but never be expired due to fnhe_expires not being set. So fix it by simply updating fnhe_expires even it's for redirect. Fixes: aee06da6726d ("ipv4: use seqlock for nh_exceptions") Reported-by: Jianlin Shi Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d2dbc15c8ec7..7ac319222558 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -662,10 +662,9 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) { + if (pmtu) fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = max(1UL, expires); - } + fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt) -- GitLab From e9672477a5f059e88e3c3b886c0a61253447254a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 17 Nov 2017 15:37:57 -0800 Subject: [PATCH 0278/1398] drivers/rapidio/devices/rio_mport_cdev.c: fix resource leak in error handling path in 'rio_dma_transfer()' [ Upstream commit b1402dcb5643b7a27d46a05edd7491d49ba0e248 ] If 'dma_map_sg()', we should branch to the existing error handling path to free some resources before returning. Link: http://lkml.kernel.org/r/61292a4f369229eee03394247385e955027283f8.1505687047.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Reviewed-by: Logan Gunthorpe Cc: Matt Porter Cc: Alexandre Bounine Cc: Lorenzo Stoakes Cc: Jesper Nilsson Cc: Christian K_nig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/devices/rio_mport_cdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 9013a585507e..f32fc704cb7e 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -964,7 +964,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, req->sgt.sgl, req->sgt.nents, dir); if (nents == -EFAULT) { rmcd_error("Failed to map SG list"); - return -EFAULT; + ret = -EFAULT; + goto err_pg; } ret = do_dma_request(req, xfer, sync, nents); -- GitLab From a77c116075936faa6e8b8622e22acc9cbf47fe9e Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 17 Nov 2017 15:28:16 -0800 Subject: [PATCH 0279/1398] lib/genalloc.c: make the avail variable an atomic_long_t [ Upstream commit 36a3d1dd4e16bcd0d2ddfb4a2ec7092f0ae0d931 ] If the amount of resources allocated to a gen_pool exceeds 2^32 then the avail atomic overflows and this causes problems when clients try and borrow resources from the pool. This is only expected to be an issue on 64 bit systems. Add the header to pull in atomic_long* operations. So that 32 bit systems continue to use atomic32_t but 64 bit systems can use atomic64_t. Link: http://lkml.kernel.org/r/1509033843-25667-1-git-send-email-sbates@raithlin.com Signed-off-by: Stephen Bates Reviewed-by: Logan Gunthorpe Reviewed-by: Mathieu Desnoyers Reviewed-by: Daniel Mentz Cc: Jonathan Corbet Cc: Andrew Morton Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/genalloc.h | 3 ++- lib/genalloc.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 29d4385903d4..206fe3bccccc 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -32,6 +32,7 @@ #include #include +#include struct device; struct device_node; @@ -70,7 +71,7 @@ struct gen_pool { */ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ - atomic_t avail; + atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ diff --git a/lib/genalloc.c b/lib/genalloc.c index 144fe6b1a03e..ca06adc4f445 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size - 1; - atomic_set(&chunk->avail, size); + atomic_long_set(&chunk->avail, size); spin_lock(&pool->lock); list_add_rcu(&chunk->next_chunk, &pool->chunks); @@ -304,7 +304,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (size > atomic_read(&chunk->avail)) + if (size > atomic_long_read(&chunk->avail)) continue; start_bit = 0; @@ -324,7 +324,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, addr = chunk->start_addr + ((unsigned long)start_bit << order); size = nbits << order; - atomic_sub(size, &chunk->avail); + atomic_long_sub(size, &chunk->avail); break; } rcu_read_unlock(); @@ -390,7 +390,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); BUG_ON(remain); size = nbits << order; - atomic_add(size, &chunk->avail); + atomic_long_add(size, &chunk->avail); rcu_read_unlock(); return; } @@ -464,7 +464,7 @@ size_t gen_pool_avail(struct gen_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) - avail += atomic_read(&chunk->avail); + avail += atomic_long_read(&chunk->avail); rcu_read_unlock(); return avail; } -- GitLab From a780a728475d4d9d2189ae6cfe719a175a5c9231 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Nov 2017 15:27:35 -0800 Subject: [PATCH 0280/1398] dynamic-debug-howto: fix optional/omitted ending line number to be LARGE instead of 0 [ Upstream commit 1f3c790bd5989fcfec9e53ad8fa09f5b740c958f ] line-range is supposed to treat "1-" as "1-endoffile", so handle the special case by setting last_lineno to UINT_MAX. Fixes this error: dynamic_debug:ddebug_parse_query: last-line:0 < 1st-line:1 dynamic_debug:ddebug_exec_query: query parse failed Link: http://lkml.kernel.org/r/10a6a101-e2be-209f-1f41-54637824788e@infradead.org Signed-off-by: Randy Dunlap Acked-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index da796e2dc4f5..c7c96bc7654a 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -360,6 +360,10 @@ static int ddebug_parse_query(char *words[], int nwords, if (parse_lineno(last, &query->last_lineno) < 0) return -EINVAL; + /* special case for last lineno not specified */ + if (query->last_lineno == 0) + query->last_lineno = UINT_MAX; + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, -- GitLab From 509e9b805c52d799c9fc44ca0a32fa79fc03befc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:04 -0500 Subject: [PATCH 0281/1398] NFS: Fix a typo in nfs_rename() [ Upstream commit d803224c84be067754db7fa58a93f36f61566493 ] On successful rename, the "old_dentry" is retained and is attached to the "new_dir", so we need to call nfs_set_verifier() accordingly. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 65566d5fcf39..1e5321d1ed22 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2098,7 +2098,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode != NULL) nfs_drop_nlink(new_inode); d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, + nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry); -- GitLab From acbe10423b1aabf04f2c34a153905ee3d54e4ff3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Nov 2017 13:46:06 -0400 Subject: [PATCH 0282/1398] sunrpc: Fix rpc_task_begin trace point [ Upstream commit b2bfe5915d5fe7577221031a39ac722a0a2a1199 ] The rpc_task_begin trace point always display a task ID of zero. Move the trace point call site so that it picks up the new task ID. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5db68b371db2..600eacce653a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - trace_rpc_task_begin(task->tk_client, task, NULL); - rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); + trace_rpc_task_begin(task->tk_client, task, NULL); } /* -- GitLab From 1eeb1edfd3c95f1826be3349f7dc0b41c396b469 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 14 Nov 2017 16:34:44 -0800 Subject: [PATCH 0283/1398] xfs: fix forgotten rcu read unlock when skipping inode reclaim [ Upstream commit 962cc1ad6caddb5abbb9f0a43e5abe7131a71f18 ] In commit f2e9ad21 ("xfs: check for race with xfs_reclaim_inode"), we skip an inode if we're racing with freeing the inode via xfs_reclaim_inode, but we forgot to release the rcu read lock when dumping the inode, with the result that we exit to userspace with a lock held. Don't do that; generic/320 with a 1k block size fails this very occasionally. ================================================ WARNING: lock held when returning to user space! 4.14.0-rc6-djwong #4 Tainted: G W ------------------------------------------------ rm/30466 is leaving the kernel with locks still held! 1 lock held by rm/30466: #0: (rcu_read_lock){....}, at: [] xfs_ifree_cluster.isra.17+0x2c3/0x6f0 [xfs] ------------[ cut here ]------------ WARNING: CPU: 1 PID: 30466 at kernel/rcu/tree_plugin.h:329 rcu_note_context_switch+0x71/0x700 Modules linked in: deadline_iosched dm_snapshot dm_bufio ext4 mbcache jbd2 dm_flakey xfs libcrc32c dax_pmem device_dax nd_pmem sch_fq_codel af_packet [last unloaded: scsi_debug] CPU: 1 PID: 30466 Comm: rm Tainted: G W 4.14.0-rc6-djwong #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-1ubuntu1djwong0 04/01/2014 task: ffff880037680000 task.stack: ffffc90001064000 RIP: 0010:rcu_note_context_switch+0x71/0x700 RSP: 0000:ffffc90001067e50 EFLAGS: 00010002 RAX: 0000000000000001 RBX: ffff880037680000 RCX: ffff88003e73d200 RDX: 0000000000000002 RSI: ffffffff819e53e9 RDI: ffffffff819f4375 RBP: 0000000000000000 R08: 0000000000000000 R09: ffff880062c900d0 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880037680000 R13: 0000000000000000 R14: ffffc90001067eb8 R15: ffff880037680690 FS: 00007fa3b8ce8700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f69bf77c000 CR3: 000000002450a000 CR4: 00000000000006e0 Call Trace: __schedule+0xb8/0xb10 schedule+0x40/0x90 exit_to_usermode_loop+0x6b/0xa0 prepare_exit_to_usermode+0x7a/0x90 retint_user+0x8/0x20 RIP: 0033:0x7fa3b87fda87 RSP: 002b:00007ffe41206568 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff02 RAX: 0000000000000000 RBX: 00000000010e88c0 RCX: 00007fa3b87fda87 RDX: 0000000000000000 RSI: 00000000010e89c8 RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000000 R10: 000000000000015e R11: 0000000000000246 R12: 00000000010c8060 R13: 00007ffe41206690 R14: 0000000000000000 R15: 0000000000000000 ---[ end trace e88f83bf0cfbd07d ]--- Fixes: f2e9ad212def50bcf4c098c6288779dd97fff0f0 Cc: Omar Sandoval Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fe9a9a183b2d..98ca9f1b6a07 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2386,6 +2386,7 @@ xfs_ifree_cluster( */ if (ip->i_ino != inum + i) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + rcu_read_unlock(); continue; } } -- GitLab From c31bfe7de24387dfc0b4c3c1a34f4f8d1554c907 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 9 Nov 2017 18:07:17 +0100 Subject: [PATCH 0284/1398] dt-bindings: usb: fix reg-property port-number range [ Upstream commit f42ae7b0540937e00fe005812997f126aaac4bc2 ] The USB hub port-number range for USB 2.0 is 1-255 and not 1-31 which reflects an arbitrary limit set by the current Linux implementation. Note that for USB 3.1 hubs the valid range is 1-15. Increase the documented valid range in the binding to 255, which is the maximum allowed by the specifications. Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb-device.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt b/Documentation/devicetree/bindings/usb/usb-device.txt index 1c35e7b665e1..03ab8f5eab40 100644 --- a/Documentation/devicetree/bindings/usb/usb-device.txt +++ b/Documentation/devicetree/bindings/usb/usb-device.txt @@ -11,7 +11,7 @@ Required properties: be used, but a device adhering to this binding may leave out all except for usbVID,PID. - reg: the port number which this device is connecting to, the range - is 1-31. + is 1-255. Example: -- GitLab From 1a5a4c6e868faf01cda18538ea72c6586dfee647 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Nov 2017 08:08:44 +0800 Subject: [PATCH 0285/1398] block: wake up all tasks blocked in get_request() [ Upstream commit 34d9715ac1edd50285168dd8d80c972739a4f6a4 ] Once blk_set_queue_dying() is done in blk_cleanup_queue(), we call blk_freeze_queue() and wait for q->q_usage_counter becoming zero. But if there are tasks blocked in get_request(), q->q_usage_counter can never become zero. So we have to wake up all these tasks in blk_set_queue_dying() first. Fixes: 3ef28e83ab157997 ("block: generic request_queue reference counting") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index b1c76aa73492..23daf40be371 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -527,8 +527,8 @@ void blk_set_queue_dying(struct request_queue *q) blk_queue_for_each_rl(rl, q) { if (rl->rq_pool) { - wake_up(&rl->wait[BLK_RW_SYNC]); - wake_up(&rl->wait[BLK_RW_ASYNC]); + wake_up_all(&rl->wait[BLK_RW_SYNC]); + wake_up_all(&rl->wait[BLK_RW_ASYNC]); } } } -- GitLab From 7da67d1d98f609b0f24fb6c1a317523c965898b7 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 15 Nov 2017 17:36:18 -0800 Subject: [PATCH 0286/1398] sparc64/mm: set fields in deferred pages [ Upstream commit 2a20aa171071a334d80c4e5d5af719d8374702fc ] Without deferred struct page feature (CONFIG_DEFERRED_STRUCT_PAGE_INIT), flags and other fields in "struct page"es are never changed prior to first initializing struct pages by going through __init_single_page(). With deferred struct page feature enabled there is a case where we set some fields prior to initializing: mem_init() { register_page_bootmem_info(); free_all_bootmem(); ... } When register_page_bootmem_info() is called only non-deferred struct pages are initialized. But, this function goes through some reserved pages which might be part of the deferred, and thus are not yet initialized. mem_init register_page_bootmem_info register_page_bootmem_info_node get_page_bootmem .. setting fields here .. such as: page->freelist = (void *)type; free_all_bootmem() free_low_memory_core_early() for_each_reserved_mem_region() reserve_bootmem_region() init_reserved_page() <- Only if this is deferred reserved page __init_single_pfn() __init_single_page() memset(0) <-- Loose the set fields here We end up with similar issue as in the previous patch, where currently we do not observe problem as memory is zeroed. But, if flag asserts are changed we can start hitting issues. Also, because in this patch series we will stop zeroing struct page memory during allocation, we must make sure that struct pages are properly initialized prior to using them. The deferred-reserved pages are initialized in free_all_bootmem(). Therefore, the fix is to switch the above calls. Link: http://lkml.kernel.org/r/20171013173214.27300-4-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Acked-by: David S. Miller Acked-by: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mark Rutland Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Sam Ravnborg Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/mm/init_64.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 57154c638e71..0f183ffe3416 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2391,9 +2391,16 @@ void __init mem_init(void) { high_memory = __va(last_valid_pfn << PAGE_SHIFT); - register_page_bootmem_info(); free_all_bootmem(); + /* + * Must be done after boot memory is put on freelist, because here we + * might set fields in deferred struct pages that have not yet been + * initialized, and free_all_bootmem() initializes all the reserved + * deferred pages for us. + */ + register_page_bootmem_info(); + /* * Set up the zero page, mark it reserved, so that page count * is not manipulated when freeing the page from user ptes. -- GitLab From 1618400444d0130b593ba1218380c0df7c1c7456 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 15 Nov 2017 17:34:03 -0800 Subject: [PATCH 0287/1398] zsmalloc: calling zs_map_object() from irq is a bug [ Upstream commit 1aedcafbf32b3f232c159b14cd0d423fcfe2b861 ] Use BUG_ON(in_interrupt()) in zs_map_object(). This is not a new BUG_ON(), it's always been there, but was recently changed to VM_BUG_ON(). There are several problems there. First, we use use per-CPU mappings both in zsmalloc and in zram, and interrupt may easily corrupt those buffers. Second, and more importantly, we believe it's possible to start leaking sensitive information. Consider the following case: -> process P swap out zram per-cpu mapping CPU1 compress page A -> IRQ swap out zram per-cpu mapping CPU1 compress page B write page from per-cpu mapping CPU1 to zsmalloc pool iret -> process P write page from per-cpu mapping CPU1 to zsmalloc pool [*] return * so we store overwritten data that actually belongs to another page (task) and potentially contains sensitive data. And when process P will page fault it's going to read (swap in) that other task's data. Link: http://lkml.kernel.org/r/20170929045140.4055-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 1689bb58e0d1..d3548c48369f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1407,7 +1407,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, * pools/users, we can't allow mapping in interrupt context * because it can corrupt another users mappings. */ - WARN_ON_ONCE(in_interrupt()); + BUG_ON(in_interrupt()); /* From now on, migration cannot move the object */ pin_tag(handle); -- GitLab From 1158ecd51ec163396d649daedb36440cdaf85443 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 15 Nov 2017 16:55:54 +0800 Subject: [PATCH 0288/1398] sctp: do not free asoc when it is already dead in sctp_sendmsg [ Upstream commit ca3af4dd28cff4e7216e213ba3b671fbf9f84758 ] Now in sctp_sendmsg sctp_wait_for_sndbuf could schedule out without holding sock sk. It means the current asoc can be freed elsewhere, like when receiving an abort packet. If the asoc is just created in sctp_sendmsg and sctp_wait_for_sndbuf returns err, the asoc will be freed again due to new_asoc is not nil. An use-after-free issue would be triggered by this. This patch is to fix it by setting new_asoc with nil if the asoc is already dead when cpu schedules back, so that it will not be freed again in sctp_sendmsg. v1->v2: set new_asoc as nil in sctp_sendmsg instead of sctp_wait_for_sndbuf. Suggested-by: Neil Horman Reported-by: Dmitry Vyukov Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c062ceae19e6..121e204d8e4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,8 +1958,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ + new_asoc = NULL; + err = -EPIPE; + } goto out_free; + } } /* If an address is passed with the sendto/sendmsg call, it is used @@ -7457,10 +7463,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, for (;;) { prepare_to_wait_exclusive(&asoc->wait, &wait, TASK_INTERRUPTIBLE); + if (asoc->base.dead) + goto do_dead; if (!*timeo_p) goto do_nonblock; - if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || - asoc->base.dead) + if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; if (signal_pending(current)) goto do_interrupted; @@ -7485,6 +7492,10 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, return err; +do_dead: + err = -ESRCH; + goto out; + do_error: err = -EPIPE; goto out; -- GitLab From 9904da5a09cec1dbcfc434b79950a2608ea23907 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 15 Nov 2017 16:57:26 +0800 Subject: [PATCH 0289/1398] sctp: use the right sk after waking up from wait_buf sleep [ Upstream commit cea0cc80a6777beb6eb643d4ad53690e1ad1d4ff ] Commit dfcb9f4f99f1 ("sctp: deny peeloff operation on asocs with threads sleeping on it") fixed the race between peeloff and wait sndbuf by checking waitqueue_active(&asoc->wait) in sctp_do_peeloff(). But it actually doesn't work, as even if waitqueue_active returns false the waiting sndbuf thread may still not yet hold sk lock. After asoc is peeled off, sk is not asoc->base.sk any more, then to hold the old sk lock couldn't make assoc safe to access. This patch is to fix this by changing to hold the new sk lock if sk is not asoc->base.sk, meanwhile, also set the sk in sctp_sendmsg with the new sk. With this fix, there is no more race between peeloff and waitbuf, the check 'waitqueue_active' in sctp_do_peeloff can be removed. Thanks Marcelo and Neil for making this clear. v1->v2: fix it by changing to lock the new sock instead of adding a flag in asoc. Suggested-by: Neil Horman Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 121e204d8e4b..c2ab864da50d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -82,8 +82,8 @@ /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); -static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, - size_t msg_len); +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, + size_t msg_len, struct sock **orig_sk); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1957,7 +1957,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + /* sk can be changed by peel off when waiting for buf. */ + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); if (err) { if (err == -ESRCH) { /* asoc is already dead. */ @@ -4777,12 +4778,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL; - /* If there is a thread waiting on more sndbuf space for - * sending on this asoc, it cannot be peeled. - */ - if (waitqueue_active(&asoc->wait)) - return -EBUSY; - /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -7446,7 +7441,7 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len) + size_t msg_len, struct sock **orig_sk) { struct sock *sk = asoc->base.sk; int err = 0; @@ -7480,11 +7475,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); + if (sk != asoc->base.sk) { + release_sock(sk); + sk = asoc->base.sk; + lock_sock(sk); + } *timeo_p = current_timeo; } out: + *orig_sk = sk; finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ -- GitLab From f45f4f8a7cd89570b987ff101bcc510e9cfc2a9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Nov 2017 17:15:50 -0800 Subject: [PATCH 0290/1398] bpf: fix lockdep splat [ Upstream commit 89ad2fa3f043a1e8daae193bcb5fe34d5f8caf28 ] pcpu_freelist_pop() needs the same lockdep awareness than pcpu_freelist_populate() to avoid a false positive. [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] switchto-defaul/12508 [HC0[0]:SC0[6]:HE0:SE0] is trying to acquire: (&htab->buckets[i].lock){......}, at: [] __htab_percpu_map_update_elem+0x1cb/0x300 and this task is already holding: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...}, at: [] __dev_queue_xmit+0 x868/0x1240 which would create a new lock dependency: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...} -> (&htab->buckets[i].lock){......} but this new dependency connects a SOFTIRQ-irq-safe lock: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...} ... which became SOFTIRQ-irq-safe at: [] __lock_acquire+0x42b/0x1f10 [] lock_acquire+0xbc/0x1b0 [] _raw_spin_lock+0x38/0x50 [] __dev_queue_xmit+0x868/0x1240 [] dev_queue_xmit+0x10/0x20 [] ip_finish_output2+0x439/0x590 [] ip_finish_output+0x150/0x2f0 [] ip_output+0x7d/0x260 [] ip_local_out+0x5e/0xe0 [] ip_queue_xmit+0x205/0x620 [] tcp_transmit_skb+0x5a8/0xcb0 [] tcp_write_xmit+0x242/0x1070 [] __tcp_push_pending_frames+0x3c/0xf0 [] tcp_rcv_established+0x312/0x700 [] tcp_v4_do_rcv+0x11c/0x200 [] tcp_v4_rcv+0xaa2/0xc30 [] ip_local_deliver_finish+0xa7/0x240 [] ip_local_deliver+0x66/0x200 [] ip_rcv_finish+0xdd/0x560 [] ip_rcv+0x295/0x510 [] __netif_receive_skb_core+0x988/0x1020 [] __netif_receive_skb+0x21/0x70 [] process_backlog+0x6f/0x230 [] net_rx_action+0x229/0x420 [] __do_softirq+0xd8/0x43d [] do_softirq_own_stack+0x1c/0x30 [] do_softirq+0x55/0x60 [] __local_bh_enable_ip+0xa8/0xb0 [] cpu_startup_entry+0x1c7/0x500 [] start_secondary+0x113/0x140 to a SOFTIRQ-irq-unsafe lock: (&head->lock){+.+...} ... which became SOFTIRQ-irq-unsafe at: ... [] __lock_acquire+0x82f/0x1f10 [] lock_acquire+0xbc/0x1b0 [] _raw_spin_lock+0x38/0x50 [] pcpu_freelist_pop+0x7a/0xb0 [] htab_map_alloc+0x50c/0x5f0 [] SyS_bpf+0x265/0x1200 [] entry_SYSCALL_64_fastpath+0x12/0x17 other info that might help us debug this: Chain exists of: dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2 --> &htab->buckets[i].lock --> &head->lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&head->lock); local_irq_disable(); lock(dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2); lock(&htab->buckets[i].lock); lock(dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2); *** DEADLOCK *** Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/percpu_freelist.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 5c51d1985b51..673fa6fe2d73 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -78,8 +78,10 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; + unsigned long flags; int orig_cpu, cpu; + local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -87,14 +89,16 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock(&head->lock); + raw_spin_unlock_irqrestore(&head->lock, flags); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) + if (cpu == orig_cpu) { + local_irq_restore(flags); return NULL; + } } } -- GitLab From c488c2e141bcce57a02d308df01d6f4172393637 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 5 Oct 2017 11:32:59 +0900 Subject: [PATCH 0291/1398] clk: uniphier: fix DAPLL2 clock rate of Pro5 [ Upstream commit 67affb78a4e4feb837953e3434c8402a5c3b272f ] The parent of DAPLL2 should be DAPLL1. Fix the clock connection. Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/uniphier/clk-uniphier-sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/uniphier/clk-uniphier-sys.c b/drivers/clk/uniphier/clk-uniphier-sys.c index 5d029991047d..481225adef87 100644 --- a/drivers/clk/uniphier/clk-uniphier-sys.c +++ b/drivers/clk/uniphier/clk-uniphier-sys.c @@ -98,7 +98,7 @@ const struct uniphier_clk_data uniphier_sld8_sys_clk_data[] = { const struct uniphier_clk_data uniphier_pro5_sys_clk_data[] = { UNIPHIER_CLK_FACTOR("spll", -1, "ref", 120, 1), /* 2400 MHz */ UNIPHIER_CLK_FACTOR("dapll1", -1, "ref", 128, 1), /* 2560 MHz */ - UNIPHIER_CLK_FACTOR("dapll2", -1, "ref", 144, 125), /* 2949.12 MHz */ + UNIPHIER_CLK_FACTOR("dapll2", -1, "dapll1", 144, 125), /* 2949.12 MHz */ UNIPHIER_CLK_FACTOR("uart", 0, "dapll2", 1, 40), UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48), UNIPHIER_PRO5_SYS_CLK_SD, -- GitLab From 22d2456faefaed01ef3021fb8e6752e8c2f91a74 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 14 Nov 2017 13:42:38 +0530 Subject: [PATCH 0292/1398] atm: horizon: Fix irq release error [ Upstream commit bde533f2ea607cbbbe76ef8738b36243939a7bc2 ] atm_dev_register() can fail here and passed parameters to free irq which is not initialised. Initialization of 'dev->irq' happened after the 'goto out_free_irq'. So using 'irq' insted of 'dev->irq' in free_irq(). Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/atm/horizon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 5fc81e240c24..e55f418d6ab9 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2802,7 +2802,7 @@ static int hrz_probe(struct pci_dev *pci_dev, return err; out_free_irq: - free_irq(dev->irq, dev); + free_irq(irq, dev); out_free: kfree(dev); out_release: -- GitLab From 74b470ce478a69bb3ce36b38e6c834d0578c3195 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 13 Nov 2017 16:48:47 -0500 Subject: [PATCH 0293/1398] jump_label: Invoke jump_label_test() via early_initcall() [ Upstream commit 92ee46efeb505ead3ab06d3c5ce695637ed5f152 ] Fengguang Wu reported that running the rcuperf test during boot can cause the jump_label_test() to hit a WARN_ON(). The issue is that the core jump label code relies on kernel_text_address() to detect when it can no longer update branches that may be contained in __init sections. The kernel_text_address() in turn assumes that if the system_state variable is greter than or equal to SYSTEM_RUNNING then __init sections are no longer valid (since the assumption is that they have been freed). However, when rcuperf is setup to run in early boot it can call kernel_power_off() which sets the system_state to SYSTEM_POWER_OFF. Since rcuperf initialization is invoked via a module_init(), we can make the dependency of jump_label_test() needing to complete before rcuperf explicit by calling it via early_initcall(). Reported-by: Fengguang Wu Signed-off-by: Jason Baron Acked-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1510609727-2238-1-git-send-email-jbaron@akamai.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/jump_label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index a9b8cf500591..def4548ea40c 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -612,7 +612,7 @@ static __init int jump_label_test(void) return 0; } -late_initcall(jump_label_test); +early_initcall(jump_label_test); #endif /* STATIC_KEYS_SELFTEST */ #endif /* HAVE_JUMP_LABEL */ -- GitLab From 6ceabde6664331f096462ff8fcb6f79f303a78ce Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Nov 2017 14:14:06 +1100 Subject: [PATCH 0294/1398] xfrm: Copy policy family in clone_policy [ Upstream commit 0e74aa1d79a5bbc663e03a2804399cae418a0321 ] The syzbot found an ancient bug in the IPsec code. When we cloned a socket policy (for example, for a child TCP socket derived from a listening socket), we did not copy the family field. This results in a live policy with a zero family field. This triggers a BUG_ON check in the af_key code when the cloned policy is retrieved. This patch fixes it by copying the family field over. Reported-by: syzbot Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8ce5711ea21b..f19e6a57e118 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1393,6 +1393,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; newp->type = old->type; + newp->family = old->family; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); -- GitLab From 06fea09c095920c01e79eb913f1eea9fc1792ba8 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 2 Nov 2017 15:22:26 +0200 Subject: [PATCH 0295/1398] IB/mlx4: Increase maximal message size under UD QP [ Upstream commit 5f22a1d87c5315a98981ecf93cd8de226cffe6ca ] Maximal message should be used as a limit to the max message payload allowed, without the headers. The ConnectX-3 check is done against this value includes the headers. When the payload is 4K this will cause the NIC to drop packets. Increase maximal message to 8K as workaround, this shouldn't change current behaviour because we continue to set the MTU to 4k. To reproduce; set MTU to 4296 on the corresponding interface, for example: ifconfig eth0 mtu 4296 (both server and client) On server: ib_send_bw -c UD -d mlx4_0 -s 4096 -n 1000000 -i1 -m 4096 On client: ib_send_bw -d mlx4_0 -c UD -s 4096 -n 1000000 -i 1 -m 4096 Fixes: 6e0d733d9215 ("IB/mlx4: Allow 4K messages for UD QPs") Signed-off-by: Mark Bloch Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c22454383976..709d6491d243 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1669,7 +1669,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else - context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + context->mtu_msgmax = (IB_MTU_4096 << 5) | 13; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { pr_err("path MTU (%u) is invalid\n", -- GitLab From 9048b2420e32bc66dd19f64a9ccb3329d0eaa1bd Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 30 Oct 2017 14:23:13 +0200 Subject: [PATCH 0296/1398] IB/mlx5: Assign send CQ and recv CQ of UMR QP [ Upstream commit 31fde034a8bd964a5c7c1a5663fc87a913158db2 ] The UMR's QP is created by calling mlx5_ib_create_qp directly, and therefore the send CQ and the recv CQ on the ibqp weren't assigned. Assign them right after calling the mlx5_ib_create_qp to assure that any access to those pointers will work as expected and won't crash the system as might happen as part of reset flow. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Majd Dibbiny Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 786f640fc462..a2120ff0ef4c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2514,6 +2514,8 @@ static int create_umr_res(struct mlx5_ib_dev *dev) qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; attr->qp_state = IB_QPS_INIT; attr->port_num = 1; -- GitLab From 15b175223e53da5597ea9e7c96691518b2ecaeca Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:48 +0000 Subject: [PATCH 0297/1398] afs: Connect up the CB.ProbeUuid [ Upstream commit f4b3526d83c40dd8bf5948b9d7a1b2c340f0dcc8 ] The handler for the CB.ProbeUuid operation in the cache manager is implemented, but isn't listed in the switch-statement of operation selection, so won't be used. Fix this by adding it. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/cmservice.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index d764236072b1..8d2c5180e015 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -106,6 +106,9 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; + case CBProbeUuid: + call->type = &afs_SRXCBProbeUuid; + return true; case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true; -- GitLab From c91efc7862b3f3c61cb76e926990cdbff5354b57 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Dec 2017 09:28:24 +0100 Subject: [PATCH 0298/1398] Linux 4.9.69 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dfe17af517b2..8f2819bf8135 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 68 +SUBLEVEL = 69 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 32436bf375b07e9126bcb0d46b7242bdea239286 Mon Sep 17 00:00:00 2001 From: Sebastian Sjoholm Date: Mon, 20 Nov 2017 19:05:17 +0100 Subject: [PATCH 0299/1398] net: qmi_wwan: add Quectel BG96 2c7c:0296 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f9409e7f086fa6c4623769b4b2f4f17a024d8143 ] Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel development board (EVB). The USB id is added to qmi_wwan.c to allow QMI communication with the BG96. Signed-off-by: Sebastian Sjoholm Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9cf11c83993a..97a16e23c19a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -936,6 +936,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ + {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ -- GitLab From 20610f5bbd23fd1bafa3c6d91c558ad9b08e346a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:17 +0200 Subject: [PATCH 0300/1398] s390/qeth: fix early exit from error path [ Upstream commit 83cf79a2fec3cf499eb6cb9eb608656fc2a82776 ] When the allocation of the addr buffer fails, we need to free our refcount on the inetdevice before returning. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 03a2619166ca..204edb0b69a5 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1593,7 +1593,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4); if (!addr) - return; + goto out; spin_lock_bh(&card->ip_lock); @@ -1607,6 +1607,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, spin_unlock_bh(&card->ip_lock); kfree(addr); +out: in_dev_put(in_dev); } @@ -1631,7 +1632,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6); if (!addr) - return; + goto out; spin_lock_bh(&card->ip_lock); @@ -1646,6 +1647,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, spin_unlock_bh(&card->ip_lock); kfree(addr); +out: in6_dev_put(in6_dev); #endif /* CONFIG_QETH_IPV6 */ } -- GitLab From 96b4a8ac9a55dcfd97b03a71776a3ec2b731a8bd Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 4 Dec 2017 22:00:20 +0100 Subject: [PATCH 0301/1398] tipc: fix memory leak in tipc_accept_from_sock() [ Upstream commit a7d5f107b4978e08eeab599ee7449af34d034053 ] When the function tipc_accept_from_sock() fails to create an instance of struct tipc_subscriber it omits to free the already created instance of struct tipc_conn instance before it returns. We fix that with this commit. Reported-by: David S. Miller Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/server.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/server.c b/net/tipc/server.c index 3cd6402e812c..f4c1b18c5fb0 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -313,6 +313,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) newcon->usr_data = s->tipc_conn_new(newcon->conid); if (!newcon->usr_data) { sock_release(newsock); + conn_put(newcon); return -ENOMEM; } -- GitLab From 3259862dd73bfb9d9b7a647ea77cb20ba8b179a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Wed, 6 Dec 2017 17:18:28 +0100 Subject: [PATCH 0302/1398] rds: Fix NULL pointer dereference in __rds_rdma_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f3069c6d33f6ae63a1668737bc78aaaa51bff7ca ] This is a fix for syzkaller719569, where memory registration was attempted without any underlying transport being loaded. Analysis of the case reveals that it is the setsockopt() RDS_GET_MR (2) and RDS_GET_MR_FOR_DEST (7) that are vulnerable. Here is an example stack trace when the bug is hit: BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0 IP: __rds_rdma_map+0x36/0x440 [rds] PGD 2f93d03067 P4D 2f93d03067 PUD 2f93d02067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: bridge stp llc tun rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rds binfmt_misc sb_edac intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul c rc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd iTCO_wdt mei_me sg iTCO_vendor_support ipmi_si mei ipmi_devintf nfsd shpchp pcspkr i2c_i801 ioatd ma ipmi_msghandler wmi lpc_ich mfd_core auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 mgag200 i2c_algo_bit drm_kms_helper ixgbe syscopyarea ahci sysfillrect sysimgblt libahci mdio fb_sys_fops ttm ptp libata sd_mod mlx4_core drm crc32c_intel pps_core megaraid_sas i2c_core dca dm_mirror dm_region_hash dm_log dm_mod CPU: 48 PID: 45787 Comm: repro_set2 Not tainted 4.14.2-3.el7uek.x86_64 #2 Hardware name: Oracle Corporation ORACLE SERVER X5-2L/ASM,MOBO TRAY,2U, BIOS 31110000 03/03/2017 task: ffff882f9190db00 task.stack: ffffc9002b994000 RIP: 0010:__rds_rdma_map+0x36/0x440 [rds] RSP: 0018:ffffc9002b997df0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff882fa2182580 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffc9002b997e40 RDI: ffff882fa2182580 RBP: ffffc9002b997e30 R08: 0000000000000000 R09: 0000000000000002 R10: ffff885fb29e3838 R11: 0000000000000000 R12: ffff882fa2182580 R13: ffff882fa2182580 R14: 0000000000000002 R15: 0000000020000ffc FS: 00007fbffa20b700(0000) GS:ffff882fbfb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c0 CR3: 0000002f98a66006 CR4: 00000000001606e0 Call Trace: rds_get_mr+0x56/0x80 [rds] rds_setsockopt+0x172/0x340 [rds] ? __fget_light+0x25/0x60 ? __fdget+0x13/0x20 SyS_setsockopt+0x80/0xe0 do_syscall_64+0x67/0x1b0 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fbff9b117f9 RSP: 002b:00007fbffa20aed8 EFLAGS: 00000293 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00000000000c84a4 RCX: 00007fbff9b117f9 RDX: 0000000000000002 RSI: 0000400000000114 RDI: 000000000000109b RBP: 00007fbffa20af10 R08: 0000000000000020 R09: 00007fbff9dd7860 R10: 0000000020000ffc R11: 0000000000000293 R12: 0000000000000000 R13: 00007fbffa20b9c0 R14: 00007fbffa20b700 R15: 0000000000000021 Code: 41 56 41 55 49 89 fd 41 54 53 48 83 ec 18 8b 87 f0 02 00 00 48 89 55 d0 48 89 4d c8 85 c0 0f 84 2d 03 00 00 48 8b 87 00 03 00 00 <48> 83 b8 c0 00 00 00 00 0f 84 25 03 00 0 0 48 8b 06 48 8b 56 08 The fix is to check the existence of an underlying transport in __rds_rdma_map(). Signed-off-by: Håkon Bugge Reported-by: syzbot Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 60e90f761838..de8496e60735 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, long i; int ret; - if (rs->rs_bound_addr == 0) { + if (rs->rs_bound_addr == 0 || !rs->rs_transport) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } -- GitLab From 5f218c3fd11a3198fe388603bad351fc3522fc7c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Nov 2017 10:41:14 +0800 Subject: [PATCH 0303/1398] sit: update frag_off info [ Upstream commit f859b4af1c52493ec21173ccc73d0b60029b5b88 ] After parsing the sit netlink change info, we forget to update frag_off in ipip6_tunnel_update(). Fix it by assigning frag_off with new value. Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 40d740572354..db6d437002a6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1085,6 +1085,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link) { t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); -- GitLab From 30c573affac8a7d93febe8ed6eff862b33beeb56 Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Tue, 28 Nov 2017 10:44:29 -0500 Subject: [PATCH 0304/1398] packet: fix crash in fanout_demux_rollover() syzkaller found a race condition fanout_demux_rollover() while removing a packet socket from a fanout group. po->rollover is read and operated on during packet_rcv_fanout(), via fanout_demux_rollover(), but the pointer is currently cleared before the synchronization in packet_release(). It is safer to delay the cleanup until after synchronize_net() has been called, ensuring all calls to packet_rcv_fanout() for this socket have finished. To further simplify synchronization around the rollover structure, set po->rollover in fanout_add() only if there are no errors. This removes the need for rcu in the struct and in the call to packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...). Crashing stack trace: fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392 packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487 dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953 xmit_one net/core/dev.c:2975 [inline] dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995 __dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476 dev_queue_xmit+0x17/0x20 net/core/dev.c:3509 neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379 neigh_output include/net/neighbour.h:482 [inline] ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120 ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146 NF_HOOK_COND include/linux/netfilter.h:239 [inline] ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163 dst_output include/net/dst.h:459 [inline] NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250 mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660 mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072 mld_send_initial_cr net/ipv6/mcast.c:2056 [inline] ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079 addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039 addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971 process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432 Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state") Fixes: 509c7a1ecc860 ("packet: avoid panic in packet_getsockopt()") Reported-by: syzbot Signed-off-by: Mike Maloney Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 32 ++++++++++---------------------- net/packet/internal.h | 1 - 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e7f6657269e0..036d77a69595 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1661,7 +1661,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); - po->rollover = rollover; } match = NULL; @@ -1706,6 +1705,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; + po->rollover = rollover; + rollover = NULL; atomic_inc(&match->sk_ref); __fanout_link(sk, po); err = 0; @@ -1719,10 +1720,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: - if (err && rollover) { - kfree_rcu(rollover, rcu); - po->rollover = NULL; - } + kfree(rollover); mutex_unlock(&fanout_mutex); return err; } @@ -1746,11 +1744,6 @@ static struct packet_fanout *fanout_release(struct sock *sk) list_del(&f->list); else f = NULL; - - if (po->rollover) { - kfree_rcu(po->rollover, rcu); - po->rollover = NULL; - } } mutex_unlock(&fanout_mutex); @@ -3039,6 +3032,7 @@ static int packet_release(struct socket *sock) synchronize_net(); if (f) { + kfree(po->rollover); fanout_release_data(f); kfree(f); } @@ -3853,7 +3847,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; - struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3932,18 +3925,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - rcu_read_lock(); - rollover = rcu_dereference(po->rollover); - if (rollover) { - rstats.tp_all = atomic_long_read(&rollover->num); - rstats.tp_huge = atomic_long_read(&rollover->num_huge); - rstats.tp_failed = atomic_long_read(&rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); - } - rcu_read_unlock(); - if (!rollover) + if (!po->rollover) return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; diff --git a/net/packet/internal.h b/net/packet/internal.h index 9ee46314b7d7..d55bfc34d6b3 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -92,7 +92,6 @@ struct packet_fanout { struct packet_rollover { int sock; - struct rcu_head rcu; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; -- GitLab From 5471afeef41388ec08e6cf610640aaf89805d6db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Nov 2017 08:03:30 -0800 Subject: [PATCH 0305/1398] net/packet: fix a race in packet_bind() and packet_notifier() [ Upstream commit 15fe076edea787807a7cdc168df832544b58eba6 ] syzbot reported crashes [1] and provided a C repro easing bug hunting. When/if packet_do_bind() calls __unregister_prot_hook() and releases po->bind_lock, another thread can run packet_notifier() and process an NETDEV_UP event. This calls register_prot_hook() and hooks again the socket right before first thread is able to grab again po->bind_lock. Fixes this issue by temporarily setting po->num to 0, as suggested by David Miller. [1] dev_remove_pack: ffff8801bf16fa80 not found ------------[ cut here ]------------ kernel BUG at net/core/dev.c:7945! ( BUG_ON(!list_empty(&dev->ptype_all)); ) invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: device syz0 entered promiscuous mode CPU: 0 PID: 3161 Comm: syzkaller404108 Not tainted 4.14.0+ #190 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801cc57a500 task.stack: ffff8801cc588000 RIP: 0010:netdev_run_todo+0x772/0xae0 net/core/dev.c:7945 RSP: 0018:ffff8801cc58f598 EFLAGS: 00010293 RAX: ffff8801cc57a500 RBX: dffffc0000000000 RCX: ffffffff841f75b2 RDX: 0000000000000000 RSI: 1ffff100398b1ede RDI: ffff8801bf1f8810 device syz0 entered promiscuous mode RBP: ffff8801cc58f898 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801bf1f8cd8 R13: ffff8801cc58f870 R14: ffff8801bf1f8780 R15: ffff8801cc58f7f0 FS: 0000000001716880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020b13000 CR3: 0000000005e25000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:106 tun_detach drivers/net/tun.c:670 [inline] tun_chr_close+0x49/0x60 drivers/net/tun.c:2845 __fput+0x333/0x7f0 fs/file_table.c:210 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x199/0x270 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x9bb/0x1ae0 kernel/exit.c:865 do_group_exit+0x149/0x400 kernel/exit.c:968 SYSC_exit_group kernel/exit.c:979 [inline] SyS_exit_group+0x1d/0x20 kernel/exit.c:977 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x44ad19 Fixes: 30f7ea1c2b5f ("packet: race condition in packet_bind") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Francesco Ruggeri Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 036d77a69595..267db0d603bc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3101,6 +3101,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { if (po->running) { rcu_read_unlock(); + /* prevents packet_notifier() from calling + * register_prot_hook() + */ + po->num = 0; __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3109,6 +3113,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } + BUG_ON(po->running); po->num = proto; po->prot_hook.type = proto; -- GitLab From 80ad5bd1b45f5d64fba26046556882fa989b775e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 6 Dec 2017 20:21:24 +0100 Subject: [PATCH 0306/1398] usbnet: fix alignment for frames with no ethernet header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a4abd7a80addb4a9547f7dfc7812566b60ec505c ] The qmi_wwan minidriver support a 'raw-ip' mode where frames are received without any ethernet header. This causes alignment issues because the skbs allocated by usbnet are "IP aligned". Fix by allowing minidrivers to disable the additional alignment offset. This is implemented using a per-device flag, since the same minidriver also supports 'ethernet' mode. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Reported-and-tested-by: Jay Foster Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 ++ drivers/net/usb/usbnet.c | 5 ++++- include/linux/usb/usbnet.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 97a16e23c19a..62725655d8e4 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -74,9 +74,11 @@ static void qmi_wwan_netdev_setup(struct net_device *net) net->hard_header_len = 0; net->addr_len = 0; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + set_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index d5071e364d40..4ab82b998a0f 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -485,7 +485,10 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) return -ENOLINK; } - skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); + if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags)) + skb = __netdev_alloc_skb(dev->net, size, flags); + else + skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); if (!skb) { netif_dbg(dev, rx_err, dev->net, "no rx skb\n"); usbnet_defer_kevent (dev, EVENT_RX_MEMORY); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 6e0ce8c7b8cb..fde7550754df 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -79,6 +79,7 @@ struct usbnet { # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 +# define EVENT_NO_IP_ALIGN 13 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- GitLab From 564fe3e0e95e0b800ca376ec5c0f20c578494234 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2017 12:45:56 -0800 Subject: [PATCH 0307/1398] net: remove hlist_nulls_add_tail_rcu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 ] Alexander Potapenko reported use of uninitialized memory [1] This happens when inserting a request socket into TCP ehash, in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized. Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") missed the opportunity to get rid of hlist_nulls_add_tail_rcu() : Both UDP sockets and TCP/DCCP listeners no longer use __sk_nulls_add_node_rcu() for their hash insertion. Since all other sockets have unique 4-tuple, the reuseport status has no special meaning, so we can always use hlist_nulls_add_head_rcu() for them and save few cycles/instructions. [1] ================================================================== BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace:    __dump_stack lib/dump_stack.c:16  dump_stack+0x185/0x1d0 lib/dump_stack.c:52  kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016  __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766  __sk_nulls_add_node_rcu ./include/net/sock.h:684  inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413  reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754  inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765  tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414  tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314  tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917  tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483  tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763  ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216  NF_HOOK ./include/linux/netfilter.h:248  ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257  dst_input ./include/net/dst.h:477  ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397  NF_HOOK ./include/linux/netfilter.h:248  ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488  __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298  __netif_receive_skb net/core/dev.c:4336  netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497  napi_skb_finish net/core/dev.c:4858  napi_gro_receive+0x629/0xa50 net/core/dev.c:4889  e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018  e1000_clean_rx_irq+0x1492/0x1d30 drivers/net/ethernet/intel/e1000/e1000_main.c:4474  e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819  napi_poll net/core/dev.c:5500  net_rx_action+0x73c/0x1820 net/core/dev.c:5566  __do_softirq+0x4b4/0x8dd kernel/softirq.c:284  invoke_softirq kernel/softirq.c:364  irq_exit+0x203/0x240 kernel/softirq.c:405  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638  do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263  common_interrupt+0x86/0x86 Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") Signed-off-by: Eric Dumazet Reported-by: Alexander Potapenko Acked-by: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/rculist_nulls.h | 38 ----------------------------------- include/net/sock.h | 6 +----- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 4ae95f7e8597..6224a0ab0b1e 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -99,44 +99,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } -/** - * hlist_nulls_add_tail_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the end of the specified hlist_nulls, - * while permitting racing traversals. NOTE: tail insertion requires - * list traversal. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() - * or hlist_nulls_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, - struct hlist_nulls_head *h) -{ - struct hlist_nulls_node *i, *last = NULL; - - for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); - i = hlist_nulls_next_rcu(i)) - last = i; - - if (last) { - n->next = last->next; - n->pprev = &last->next; - rcu_assign_pointer(hlist_nulls_next_rcu(last), n); - } else { - hlist_nulls_add_head_rcu(n, h); - } -} - /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/net/sock.h b/include/net/sock.h index 92b269709b9a..6d42ed883bf9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -648,11 +648,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); - else - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) -- GitLab From 30985e3beb739c37b8879fe4e1cd4791260b94ce Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Fri, 1 Dec 2017 11:12:44 +0100 Subject: [PATCH 0308/1398] stmmac: reset last TSO segment size after device open [ Upstream commit 45ab4b13e46325d00f4acdb365d406e941a15f81 ] The mss variable tracks the last max segment size sent to the TSO engine. We do not update the hardware as long as we receive skb:s with the same value in gso_size. During a network device down/up cycle (mapped to stmmac_release() and stmmac_open() callbacks) we issue a reset to the hardware and it forgets the setting for mss. However we did not zero out our mss variable so the next transmission of a gso packet happens with an undefined hardware setting. This triggers a hang in the TSO engine and eventuelly the netdev watchdog will bark. Fixes: f748be531d70 ("stmmac: support new GMAC4") Signed-off-by: Lars Persson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ef6bff820cf6..adf61a7b1b01 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1795,6 +1795,7 @@ static int stmmac_open(struct net_device *dev) priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); priv->rx_copybreak = STMMAC_RX_COPYBREAK; + priv->mss = 0; ret = alloc_dma_desc_resources(priv); if (ret < 0) { -- GitLab From aa0080f1ad0880d1c68108b38579c32756838f51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Dec 2017 10:06:56 -0800 Subject: [PATCH 0309/1398] tcp/dccp: block bh before arming time_wait timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cfac7f836a715b91f08c851df915d401a4d52783 ] Maciej Żenczykowski reported some panics in tcp_twsk_destructor() that might be caused by the following bug. timewait timer is pinned to the cpu, because we want to transition timwewait refcount from 0 to 4 in one go, once everything has been initialized. At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer handling") was merged, TCP was always running from BH habdler. After commit 5413d1babe8f ("net: do not block BH while processing socket backlog") we definitely can run tcp_time_wait() from process context. We need to block BH in the critical section so that the pinned timer has still its purpose. This bug is more likely to happen under stress and when very small RTO are used in datacenter flows. Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/minisocks.c | 6 ++++++ net/ipv4/tcp_minisocks.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 39e7e2bca8db..62522b8d2f97 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 64e1ba49c3e2..830a5645d8c1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -328,10 +328,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than -- GitLab From fbf0dfe7ad9f75e2b09f4f4c84219ed0d0b9b05f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:51 +0100 Subject: [PATCH 0310/1398] s390/qeth: build max size GSO skbs on L2 devices [ Upstream commit 0cbff6d4546613330a1c5f139f5c368e4ce33ca1 ] The current GSO skb size limit was copy&pasted over from the L3 path, where it is needed due to a TSO limitation. As L2 devices don't offer TSO support (and thus all GSO skbs are segmented before they reach the driver), there's no reason to restrict the stack in how large it may build the GSO skbs. Fixes: d52aec97e5bc ("qeth: enable scatter/gather in layer 2 mode") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l2_main.c | 2 -- drivers/s390/net/qeth_l3_main.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8530477caab8..8ac76074c9ec 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1140,8 +1140,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) } card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 204edb0b69a5..50a2dda53b8a 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3147,8 +3147,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; netif_keep_dst(card->dev); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; + netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * + PAGE_SIZE); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT); -- GitLab From 1d55222b14bd13724eae360976d4df430ec09495 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:50 +0100 Subject: [PATCH 0311/1398] s390/qeth: fix GSO throughput regression [ Upstream commit 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb ] Using GSO with small MTUs currently results in a substantial throughput regression - which is caused by how qeth needs to map non-linear skbs into its IO buffer elements: compared to a linear skb, each GSO-segmented skb effectively consumes twice as many buffer elements (ie two instead of one) due to the additional header-only part. This causes the Output Queue to be congested with low-utilized IO buffers. Fix this as follows: If the MSS is low enough so that a non-SG GSO segmentation produces order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is where we anticipate the biggest savings, since an SG-enabled GSO segmentation produces skbs that always consume at least two buffer elements. Larger MSS values continue to get a SG-enabled GSO segmentation, since 1) the relative overhead of the additional header-only buffer element becomes less noticeable, and 2) the linearization overhead increases. With the throughput regression fixed, re-enable NETIF_F_SG by default to reap the significant CPU savings of GSO. Fixes: 5722963a8e83 ("qeth: do not turn on SG per default") Reported-by: Nils Hoppmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core.h | 3 +++ drivers/s390/net/qeth_core_main.c | 31 +++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 2 ++ 4 files changed, 38 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index d55e6438bb5e..e2bd2ad01b15 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1004,6 +1004,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, int qeth_set_features(struct net_device *, netdev_features_t); int qeth_recover_features(struct net_device *); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); /* exports for OSN */ int qeth_osn_assist(struct net_device *, void *, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 21ef8023430f..b5fa6bb56b29 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include +#include +#include + #include #include @@ -6240,6 +6245,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev, } EXPORT_SYMBOL_GPL(qeth_fix_features); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* GSO segmentation builds skbs with + * a (small) linear part for the headers, and + * page frags for the data. + * Compared to a linear skb, the header-only part consumes an + * additional buffer element. This reduces buffer utilization, and + * hurts throughput. So compress small segments into one element. + */ + if (netif_needs_gso(skb, features)) { + /* match skb_segment(): */ + unsigned int doffset = skb->data - skb_mac_header(skb); + unsigned int hsize = skb_shinfo(skb)->gso_size; + unsigned int hroom = skb_headroom(skb); + + /* linearize only if resulting skb allocations are order-0: */ + if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0)) + features &= ~NETIF_F_SG; + } + + return vlan_features_check(skb, features); +} +EXPORT_SYMBOL_GPL(qeth_features_check); + static int __init qeth_core_init(void) { int rc; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8ac76074c9ec..ac33f6c999b1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1084,6 +1084,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_stop = qeth_l2_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_l2_do_ioctl, @@ -1128,6 +1129,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; card->dev->vlan_features = NETIF_F_SG; + card->dev->features |= NETIF_F_SG; /* OSA 3S and earlier has no RX/TX support */ if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) { card->dev->hw_features |= NETIF_F_IP_CSUM; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 50a2dda53b8a..07555a24fade 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3066,6 +3066,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = { .ndo_stop = qeth_l3_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_multicast_list, .ndo_do_ioctl = qeth_l3_do_ioctl, @@ -3122,6 +3123,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; + card->dev->features |= NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { -- GitLab From 0cfe6df9383481ab5bda053e2333cf29a471b2d6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:49 +0100 Subject: [PATCH 0312/1398] s390/qeth: fix thinko in IPv4 multicast address tracking [ Upsteam commit bc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 ] Commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") reworked how secondary addresses are managed for qeth devices. Instead of dropping & subsequently re-adding all addresses on every ndo_set_rx_mode() call, qeth now keeps track of the addresses that are currently registered with the HW. On a ndo_set_rx_mode(), we thus only need to do (de-)registration requests for the addresses that have actually changed. On L3 devices, the lookup for IPv4 Multicast addresses checks the wrong hashtable - and thus never finds a match. As a result, we first delete *all* such addresses, and then re-add them again. So each set_rx_mode() causes a short period where the IPv4 Multicast addresses are not registered, and the card stops forwarding inbound traffic for them. Fix this by setting the ->is_multicast flag on the lookup object, thus enabling qeth_l3_ip_from_hash() to search the correct hashtable and find a match there. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 07555a24fade..5735fc3be6c7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1416,6 +1416,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev) tmp->u.a4.addr = im4->multiaddr; memcpy(tmp->mac, buf, sizeof(tmp->mac)); + tmp->is_multicast = 1; ipm = qeth_l3_ip_from_hash(card, tmp); if (ipm) { -- GitLab From cf00fd3d526cdabaf558e911d71d0a2c303cf0e7 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 29 Nov 2017 12:48:42 +0200 Subject: [PATCH 0313/1398] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv() [ Upstream commit c7799c067c2ae33e348508c8afec354f3257ff25 ] Remove the second tipc_rcv() call in tipc_udp_recv(). We have just checked that the bearer is not up, and calling tipc_rcv() with a bearer that is not up leads to a TIPC div-by-zero crash in tipc_node_calculate_timer(). The crash is rare in practice, but can happen like this: We're enabling a bearer, but it's not yet up and fully initialized. At the same time we receive a discovery packet, and in tipc_udp_recv() we end up calling tipc_rcv() with the not-yet-initialized bearer, causing later the div-by-zero crash in tipc_node_calculate_timer(). Jon Maloy explains the impact of removing the second tipc_rcv() call: "link setup in the worst case will be delayed until the next arriving discovery messages, 1 sec later, and this is an acceptable delay." As the tipc_rcv() call is removed, just leave the function via the rcu_out label, so that we will kfree_skb(). [ 12.590450] Own node address <1.1.1>, network identity 1 [ 12.668088] divide error: 0000 [#1] SMP [ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1 [ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 12.682095] task: ffff8c2a761edb80 task.stack: ffffa41cc0cac000 [ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] [ 12.686486] RSP: 0018:ffff8c2a7fc838a0 EFLAGS: 00010246 [ 12.688451] RAX: 0000000000000000 RBX: ffff8c2a5b382600 RCX: 0000000000000000 [ 12.691197] RDX: 0000000000000000 RSI: ffff8c2a5b382600 RDI: ffff8c2a5b382600 [ 12.693945] RBP: ffff8c2a7fc838b0 R08: 0000000000000001 R09: 0000000000000001 [ 12.696632] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2a5d8949d8 [ 12.699491] R13: ffffffff95ede400 R14: 0000000000000000 R15: ffff8c2a5d894800 [ 12.702338] FS: 0000000000000000(0000) GS:ffff8c2a7fc80000(0000) knlGS:0000000000000000 [ 12.705099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 12.706776] CR2: 0000000001bb9440 CR3: 00000000bd009001 CR4: 00000000003606e0 [ 12.708847] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 12.711016] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 12.712627] Call Trace: [ 12.713390] [ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc] [ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc] [ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc] [ 12.717396] ? minmax_running_min+0x2f/0x60 [ 12.718248] ? dst_alloc+0x4c/0xa0 [ 12.718964] ? tcp_ack+0xaf1/0x10b0 [ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc] [ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc] [ 12.721459] ? dst_alloc+0x4c/0xa0 [ 12.722130] udp_queue_rcv_skb+0x264/0x490 [ 12.722924] __udp4_lib_rcv+0x21e/0x990 [ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0 [ 12.724442] ? tcp_v4_rcv+0x958/0xa40 [ 12.725039] udp_rcv+0x1a/0x20 [ 12.725587] ip_local_deliver_finish+0x97/0x1d0 [ 12.726323] ip_local_deliver+0xaf/0xc0 [ 12.726959] ? ip_route_input_noref+0x19/0x20 [ 12.727689] ip_rcv_finish+0xdd/0x3b0 [ 12.728307] ip_rcv+0x2ac/0x360 [ 12.728839] __netif_receive_skb_core+0x6fb/0xa90 [ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0 [ 12.730274] __netif_receive_skb+0x1d/0x60 [ 12.730953] ? __netif_receive_skb+0x1d/0x60 [ 12.731637] netif_receive_skb_internal+0x37/0xd0 [ 12.732371] napi_gro_receive+0xc7/0xf0 [ 12.732920] receive_buf+0x3c3/0xd40 [ 12.733441] virtnet_poll+0xb1/0x250 [ 12.733944] net_rx_action+0x23e/0x370 [ 12.734476] __do_softirq+0xc5/0x2f8 [ 12.734922] irq_exit+0xfa/0x100 [ 12.735315] do_IRQ+0x4f/0xd0 [ 12.735680] common_interrupt+0xa2/0xa2 [ 12.736126] [ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10 [ 12.736925] RSP: 0018:ffffa41cc0cafe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff4d [ 12.737756] RAX: 0000000000000000 RBX: ffff8c2a761edb80 RCX: 0000000000000000 [ 12.738504] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 12.739258] RBP: ffffa41cc0cafe90 R08: 0000014b5b9795e5 R09: ffffa41cc12c7e88 [ 12.740118] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002 [ 12.740964] R13: ffff8c2a761edb80 R14: 0000000000000000 R15: 0000000000000000 [ 12.741831] default_idle+0x2a/0x100 [ 12.742323] arch_cpu_idle+0xf/0x20 [ 12.742796] default_idle_call+0x28/0x40 [ 12.743312] do_idle+0x179/0x1f0 [ 12.743761] cpu_startup_entry+0x1d/0x20 [ 12.744291] start_secondary+0x112/0x120 [ 12.744816] secondary_startup_64+0xa5/0xa5 [ 12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00 00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48 89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f [ 12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: ffff8c2a7fc838a0 [ 12.748555] ---[ end trace 1399ab83390650fd ]--- [ 12.749296] Kernel panic - not syncing: Fatal exception in interrupt [ 12.750123] Kernel Offset: 0x13200000 from 0xffffffff82000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 12.751215] Rebooting in 60 seconds.. Fixes: c9b64d492b1f ("tipc: add replicast peer discovery") Signed-off-by: Tommi Rantala Cc: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/udp_media.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index b58dc95f3d35..107375d80c70 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: -- GitLab From 0cab694ab7bca62eb42dbfae209c08edc8d229c4 Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Wed, 13 Dec 2017 15:33:37 -0500 Subject: [PATCH 0314/1398] Fix handling of verdicts after NF_QUEUE [This fix is only needed for v4.9 stable since v4.10+ does not have the issue] A verdict of NF_STOLEN after NF_QUEUE will cause an incorrect return value and a potential kernel panic via double free of skb's This was broken by commit 7034b566a4e7 ("netfilter: fix nf_queue handling") and subsequently fixed in v4.10 by commit c63cbc460419 ("netfilter: use switch() to handle verdict cases from nf_hook_slow()"). However that commit cannot be cleanly cherry-picked to v4.9 Signed-off-by: Debabrata Banerjee Acked-by: Pablo Neira Ayuso --- net/netfilter/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 004af030ef1a..d869ea50623e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -364,6 +364,11 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) ret = nf_queue(skb, state, &entry, verdict); if (ret == 1 && entry) goto next_hook; + } else { + /* Implicit handling for NF_STOLEN, as well as any other + * non conventional verdicts. + */ + ret = 0; } return ret; } -- GitLab From f8dac5bfbd8e6ff0de933a33975abd7e38818ddd Mon Sep 17 00:00:00 2001 From: Masamitsu Yamazaki Date: Wed, 15 Nov 2017 07:33:14 +0000 Subject: [PATCH 0315/1398] ipmi: Stop timers before cleaning up the module commit 4f7f5551a760eb0124267be65763008169db7087 upstream. System may crash after unloading ipmi_si.ko module because a timer may remain and fire after the module cleaned up resources. cleanup_one_si() contains the following processing. /* * Make sure that interrupts, the timer and the thread are * stopped and will not run again. */ if (to_clean->irq_cleanup) to_clean->irq_cleanup(to_clean); wait_for_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off * in the BMC. Note that timers and CPU interrupts are off, * so no need for locks. */ while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); } si_state changes as following in the while loop calling poll(to_clean). SI_GETTING_MESSAGES => SI_CHECKING_ENABLES => SI_SETTING_ENABLES => SI_GETTING_EVENTS => SI_NORMAL As written in the code comments above, timers are expected to stop before the polling loop and not to run again. But the timer is set again in the following process when si_state becomes SI_SETTING_ENABLES. => poll => smi_event_handler => handle_transaction_done // smi_info->si_state == SI_SETTING_ENABLES => start_getting_events => start_new_msg => smi_mod_timer => mod_timer As a result, before the timer set in start_new_msg() expires, the polling loop may see si_state becoming SI_NORMAL and the module clean-up finishes. For example, hard LOCKUP and panic occurred as following. smi_timeout was called after smi_event_handler, kcs_event and hangs at port_inb() trying to access I/O port after release. [exception RIP: port_inb+19] RIP: ffffffffc0473053 RSP: ffff88069fdc3d80 RFLAGS: 00000006 RAX: ffff8806800f8e00 RBX: ffff880682bd9400 RCX: 0000000000000000 RDX: 0000000000000ca3 RSI: 0000000000000ca3 RDI: ffff8806800f8e40 RBP: ffff88069fdc3d80 R8: ffffffff81d86dfc R9: ffffffff81e36426 R10: 00000000000509f0 R11: 0000000000100000 R12: 0000000000]:000000 R13: 0000000000000000 R14: 0000000000000246 R15: ffff8806800f8e00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 --- --- To fix the problem I defined a flag, timer_can_start, as member of struct smi_info. The flag is enabled immediately after initializing the timer and disabled immediately before waiting for timer deletion. Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs") Signed-off-by: Yamazaki Masamitsu [Adjusted for recent changes in the driver.] [Some fairly major changes went into the IPMI driver in 4.15, so this required a backport as the code had changed and moved to a different file. The 4.14 version of this patch moved some code under an if statement causing it to not apply to 4.7-4.13.] Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 44 +++++++++++++++++--------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index a112c0146012..e0a53156b782 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -241,6 +241,9 @@ struct smi_info { /* The timer for this si. */ struct timer_list si_timer; + /* This flag is set, if the timer can be set */ + bool timer_can_start; + /* This flag is set, if the timer is running (timer_pending() isn't enough) */ bool timer_running; @@ -416,6 +419,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) { + if (!smi_info->timer_can_start) + return; smi_info->last_timeout_jiffies = jiffies; mod_timer(&smi_info->si_timer, new_val); smi_info->timer_running = true; @@ -435,21 +440,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); } -static void start_check_enables(struct smi_info *smi_info, bool start_timer) +static void start_check_enables(struct smi_info *smi_info) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - if (start_timer) - start_new_msg(smi_info, msg, 2); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info, bool start_timer) +static void start_clear_flags(struct smi_info *smi_info) { unsigned char msg[3]; @@ -458,10 +460,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - if (start_timer) - start_new_msg(smi_info, msg, 3); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + start_new_msg(smi_info, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -496,11 +495,11 @@ static void start_getting_events(struct smi_info *smi_info) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) +static inline bool disable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info, start_timer); + start_check_enables(smi_info); return true; } return false; @@ -510,7 +509,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info, true); + start_check_enables(smi_info); return true; } return false; @@ -528,7 +527,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info, true)) + if (!disable_si_irq(smi_info)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -544,7 +543,7 @@ static void handle_flags(struct smi_info *smi_info) /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info, true); + start_clear_flags(smi_info); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -927,7 +926,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->irq) { - start_check_enables(smi_info, true); + start_check_enables(smi_info); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -1234,6 +1233,7 @@ static int smi_start_processing(void *send_info, /* Set up the timer that drives the interface. */ setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi); + new_smi->timer_can_start = true; smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); /* Try to claim any interrupts. */ @@ -3448,10 +3448,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info) check_set_rcv_irq(smi_info); } -static inline void wait_for_timer_and_thread(struct smi_info *smi_info) +static inline void stop_timer_and_thread(struct smi_info *smi_info) { if (smi_info->thread != NULL) kthread_stop(smi_info->thread); + + smi_info->timer_can_start = false; if (smi_info->timer_running) del_timer_sync(&smi_info->si_timer); } @@ -3593,7 +3595,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi, false); + start_clear_flags(new_smi); /* * IRQ is defined to be set when non-zero. req_events will @@ -3671,7 +3673,7 @@ static int try_smi_init(struct smi_info *new_smi) return 0; out_err_stop_timer: - wait_for_timer_and_thread(new_smi); + stop_timer_and_thread(new_smi); out_err: new_smi->interrupt_disabled = true; @@ -3865,7 +3867,7 @@ static void cleanup_one_si(struct smi_info *to_clean) */ if (to_clean->irq_cleanup) to_clean->irq_cleanup(to_clean); - wait_for_timer_and_thread(to_clean); + stop_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off @@ -3876,7 +3878,7 @@ static void cleanup_one_si(struct smi_info *to_clean) poll(to_clean); schedule_timeout_uninterruptible(1); } - disable_si_irq(to_clean, false); + disable_si_irq(to_clean); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); -- GitLab From 47273f0d398d8cc50b63d203230d7f9fb3df4ac0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Nov 2017 12:38:44 +0100 Subject: [PATCH 0316/1398] s390: always save and restore all registers on context switch commit fbbd7f1a51965b50dd12924841da0d478f3da71b upstream. The switch_to() macro has an optimization to avoid saving and restoring register contents that aren't needed for kernel threads. There is however the possibility that a kernel thread execve's a user space program. In such a case the execve'd process can partially see the contents of the previous process, which shouldn't be allowed. To avoid this, simply always save and restore register contents on context switch. Fixes: fdb6d070effba ("switch_to: dont restore/save access & fpu regs for kernel threads") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/switch_to.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index dde6b52359c5..ff2fbdafe689 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -29,17 +29,16 @@ static inline void restore_access_regs(unsigned int *acrs) } #define switch_to(prev,next,last) do { \ - if (prev->mm) { \ - save_fpu_regs(); \ - save_access_regs(&prev->thread.acrs[0]); \ - save_ri_cb(prev->thread.ri_cb); \ - } \ + /* save_fpu_regs() sets the CIF_FPU flag, which enforces \ + * a restore of the floating point / vector registers as \ + * soon as the next task returns to user space \ + */ \ + save_fpu_regs(); \ + save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ update_cr_regs(next); \ - if (next->mm) { \ - set_cpu_flag(CIF_FPU); \ - restore_access_regs(&next->thread.acrs[0]); \ - restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - } \ + restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ prev = __switch_to(prev,next); \ } while (0) -- GitLab From 16648cbcd3326d3abaa3e4239dfda9c7b39e644f Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sun, 26 Nov 2017 06:52:53 +0000 Subject: [PATCH 0317/1398] usb: gadget: ffs: Forbid usb_ep_alloc_request from sleeping commit 30bf90ccdec1da9c8198b161ecbff39ce4e5a9ba upstream. Found using DEBUG_ATOMIC_SLEEP while submitting an AIO read operation: [ 100.853642] BUG: sleeping function called from invalid context at mm/slab.h:421 [ 100.861148] in_atomic(): 1, irqs_disabled(): 1, pid: 1880, name: python [ 100.867954] 2 locks held by python/1880: [ 100.867961] #0: (&epfile->mutex){....}, at: [] ffs_mutex_lock+0x27/0x30 [usb_f_fs] [ 100.868020] #1: (&(&ffs->eps_lock)->rlock){....}, at: [] ffs_epfile_io.isra.17+0x24b/0x590 [usb_f_fs] [ 100.868076] CPU: 1 PID: 1880 Comm: python Not tainted 4.14.0-edison+ #118 [ 100.868085] Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 [ 100.868093] Call Trace: [ 100.868122] dump_stack+0x47/0x62 [ 100.868156] ___might_sleep+0xfd/0x110 [ 100.868182] __might_sleep+0x68/0x70 [ 100.868217] kmem_cache_alloc_trace+0x4b/0x200 [ 100.868248] ? dwc3_gadget_ep_alloc_request+0x24/0xe0 [dwc3] [ 100.868302] dwc3_gadget_ep_alloc_request+0x24/0xe0 [dwc3] [ 100.868343] usb_ep_alloc_request+0x16/0xc0 [udc_core] [ 100.868386] ffs_epfile_io.isra.17+0x444/0x590 [usb_f_fs] [ 100.868424] ? _raw_spin_unlock_irqrestore+0x27/0x40 [ 100.868457] ? kiocb_set_cancel_fn+0x57/0x60 [ 100.868477] ? ffs_ep0_poll+0xc0/0xc0 [usb_f_fs] [ 100.868512] ffs_epfile_read_iter+0xfe/0x157 [usb_f_fs] [ 100.868551] ? security_file_permission+0x9c/0xd0 [ 100.868587] ? rw_verify_area+0xac/0x120 [ 100.868633] aio_read+0x9d/0x100 [ 100.868692] ? __fget+0xa2/0xd0 [ 100.868727] ? __might_sleep+0x68/0x70 [ 100.868763] SyS_io_submit+0x471/0x680 [ 100.868878] do_int80_syscall_32+0x4e/0xd0 [ 100.868921] entry_INT80_32+0x2a/0x2a [ 100.868932] EIP: 0xb7fbb676 [ 100.868941] EFLAGS: 00000292 CPU: 1 [ 100.868951] EAX: ffffffda EBX: b7aa2000 ECX: 00000002 EDX: b7af8368 [ 100.868961] ESI: b7fbb660 EDI: b7aab000 EBP: bfb6c658 ESP: bfb6c638 [ 100.868973] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 346a630cebd5..7b107e43b1c4 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1015,7 +1015,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) else ret = ep->status; goto error_mutex; - } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_KERNEL))) { + } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) { ret = -ENOMEM; } else { req->buf = data; -- GitLab From 80c0f4777fd6d092537790903800dd352e5d89e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2017 23:27:57 +0000 Subject: [PATCH 0318/1398] fix kcm_clone() commit a5739435b5a3b8c449f8844ecd71a3b1e89f0a33 upstream. 1) it's fput() or sock_release(), not both 2) don't do fd_install() until the last failure exit. 3) not a bug per se, but... don't attach socket to struct file until it's set up. Take reserving descriptor into the caller, move fd_install() to the caller, sanitize failure exits and calling conventions. Acked-by: Tom Herbert Signed-off-by: Al Viro Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 71 +++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 7eb0e8fe3ca8..22785dc03051 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1624,60 +1624,35 @@ static struct proto kcm_proto = { }; /* Clone a kcm socket. */ -static int kcm_clone(struct socket *osock, struct kcm_clone *info, - struct socket **newsockp) +static struct file *kcm_clone(struct socket *osock) { struct socket *newsock; struct sock *newsk; - struct file *newfile; - int err, newfd; + struct file *file; - err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out; + return ERR_PTR(-ENFILE); newsock->type = osock->type; newsock->ops = osock->ops; __module_get(newsock->ops->owner); - newfd = get_unused_fd_flags(0); - if (unlikely(newfd < 0)) { - err = newfd; - goto out_fd_fail; - } - - newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (unlikely(IS_ERR(newfile))) { - err = PTR_ERR(newfile); - goto out_sock_alloc_fail; - } - newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, &kcm_proto, true); if (!newsk) { - err = -ENOMEM; - goto out_sk_alloc_fail; + sock_release(newsock); + return ERR_PTR(-ENOMEM); } - sock_init_data(newsock, newsk); init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); - fd_install(newfd, newfile); - *newsockp = newsock; - info->fd = newfd; - - return 0; + file = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); + if (IS_ERR(file)) + sock_release(newsock); -out_sk_alloc_fail: - fput(newfile); -out_sock_alloc_fail: - put_unused_fd(newfd); -out_fd_fail: - sock_release(newsock); -out: - return err; + return file; } static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) @@ -1707,21 +1682,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } case SIOCKCMCLONE: { struct kcm_clone info; - struct socket *newsock = NULL; + struct file *file; - if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - return -EFAULT; + info.fd = get_unused_fd_flags(0); + if (unlikely(info.fd < 0)) + return info.fd; - err = kcm_clone(sock, &info, &newsock); - - if (!err) { - if (copy_to_user((void __user *)arg, &info, - sizeof(info))) { - err = -EFAULT; - sys_close(info.fd); - } + file = kcm_clone(sock); + if (IS_ERR(file)) { + put_unused_fd(info.fd); + return PTR_ERR(file); } - + if (copy_to_user((void __user *)arg, &info, + sizeof(info))) { + put_unused_fd(info.fd); + fput(file); + return -EFAULT; + } + fd_install(info.fd, file); + err = 0; break; } default: -- GitLab From 9414a6309c7205c38ee3559a871ef165dd44c657 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:17 +0000 Subject: [PATCH 0319/1398] KVM: arm/arm64: vgic-its: Preserve the revious read from the pending table commit 64afe6e9eb4841f35317da4393de21a047a883b3 upstream. The current pending table parsing code assumes that we keep the previous read of the pending bits, but keep that variable in the current block, making sure it is discarded on each loop. We end-up using whatever is on the stack. Who knows, it might just be the right thing... Fixes: 33d3bc9556a7d ("KVM: arm64: vgic-its: Read initial LPI pending table") Cc: stable@vger.kernel.org # 4.8 Reported-by: AKASHI Takahiro Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index ebcaf4641d2b..31f562507915 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -322,6 +322,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) int ret = 0; u32 *intids; int nr_irqs, i; + u8 pendmask; nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); if (nr_irqs < 0) @@ -329,7 +330,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) for (i = 0; i < nr_irqs; i++) { int byte_offset, bit_nr; - u8 pendmask; byte_offset = intids[i] / BITS_PER_BYTE; bit_nr = intids[i] % BITS_PER_BYTE; -- GitLab From eae3f3ab7fb34732fbdd86fad79410aec48faf1d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 3 Nov 2016 16:10:55 +1100 Subject: [PATCH 0320/1398] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold commit b492f7e4e07a28e706db26cf4943bb0911435426 upstream. These functions compute an IP checksum by computing a 64-bit sum and folding it to 32 bits (the "nofold" in their names refers to folding down to 16 bits). However, doing (u32) (s + (s >> 32)) is not sufficient to fold a 64-bit sum to 32 bits correctly. The addition can produce a carry out from bit 31, which needs to be added in to the sum to produce the correct result. To fix this, we copy the from64to32() function from lib/checksum.c and use that. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/checksum.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index a67bb09585f4..430d038eb2a4 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,17 +53,25 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { #ifdef __powerpc64__ - unsigned long s = (__force u32)sum; + u64 s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; s += proto + len; - s += (s >> 32); - return (__force __wsum) s; + return (__force __wsum) from64to32(s); #else __asm__("\n\ addc %0,%0,%1 \n\ @@ -123,8 +131,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) for (i = 0; i < ihl - 1; i++, ptr++) s += *ptr; - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); #else __wsum sum, tmp; -- GitLab From 97c6687021262fc63d5482cb76b226ebb46feffd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Oct 2017 18:22:25 +0900 Subject: [PATCH 0321/1398] kbuild: do not call cc-option before KBUILD_CFLAGS initialization [ Upstream commit 433dc2ebe7d17dd21cba7ad5c362d37323592236 ] Some $(call cc-option,...) are invoked very early, even before KBUILD_CFLAGS, etc. are initialized. The returned string from $(call cc-option,...) depends on KBUILD_CPPFLAGS, KBUILD_CFLAGS, and GCC_PLUGINS_CFLAGS. Since they are exported, they are not empty when the top Makefile is recursively invoked. The recursion occurs in several places. For example, the top Makefile invokes itself for silentoldconfig. "make tinyconfig", "make rpm-pkg" are the cases, too. In those cases, the second call of cc-option from the same line runs a different shell command due to non-pristine KBUILD_CFLAGS. To get the same result all the time, KBUILD_* and GCC_PLUGINS_CFLAGS must be initialized before any call of cc-option. This avoids garbage data in the .cache.mk file. Move all calls of cc-option below the config targets because target compiler flags are unnecessary for Kconfig. Signed-off-by: Masahiro Yamada Reviewed-by: Douglas Anderson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Makefile | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 8f2819bf8135..b208e550b585 100644 --- a/Makefile +++ b/Makefile @@ -370,9 +370,6 @@ LDFLAGS_MODULE = CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = -CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) -CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) - # Use USERINCLUDE when you must reference the UAPI directories only. USERINCLUDE := \ @@ -393,21 +390,19 @@ LINUXINCLUDE := \ LINUXINCLUDE += $(filter-out $(LINUXINCLUDE),$(USERINCLUDE)) -KBUILD_CPPFLAGS := -D__KERNEL__ - +KBUILD_AFLAGS := -D__ASSEMBLY__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 $(call cc-option,-fno-PIE) - - + -std=gnu89 +KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := -KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE) KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds +GCC_PLUGINS_CFLAGS := # Read KERNELRELEASE from include/config/kernel.release (if it exists) KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) @@ -420,7 +415,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -620,6 +615,12 @@ endif # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS += $(call cc-option,-fno-PIE) +CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) +CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) +export CFLAGS_GCOV CFLAGS_KCOV + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := -- GitLab From a625a16c8aea00aeff6dd95aebe384bf309b261a Mon Sep 17 00:00:00 2001 From: Keefe Liu Date: Thu, 9 Nov 2017 20:09:31 +0800 Subject: [PATCH 0322/1398] ipvlan: fix ipv6 outbound device [ Upstream commit ca29fd7cce5a6444d57fb86517589a1a31c759e1 ] When process the outbound packet of ipv6, we should assign the master device to output device other than input device. Signed-off-by: Keefe Liu Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b4e990743e1d..980e38524418 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -404,7 +404,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) struct dst_entry *dst; int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { - .flowi6_iif = dev->ifindex, + .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, -- GitLab From 93dedcf5a17779c73a7609b9306b519555b9edb8 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 1 Sep 2017 09:44:34 -0400 Subject: [PATCH 0323/1398] audit: ensure that 'audit=1' actually enables audit for PID 1 [ Upstream commit 173743dd99a49c956b124a74c8aacb0384739a4c ] Prior to this patch we enabled audit in audit_init(), which is too late for PID 1 as the standard initcalls are run after the PID 1 task is forked. This means that we never allocate an audit_context (see audit_alloc()) for PID 1 and therefore miss a lot of audit events generated by PID 1. This patch enables audit as early as possible to help ensure that when PID 1 is forked it can allocate an audit_context if required. Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/audit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index f1ca11613379..da4e7c0e36f7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -79,13 +79,13 @@ static int audit_initialized; #define AUDIT_OFF 0 #define AUDIT_ON 1 #define AUDIT_LOCKED 2 -u32 audit_enabled; -u32 audit_ever_enabled; +u32 audit_enabled = AUDIT_OFF; +u32 audit_ever_enabled = !!AUDIT_OFF; EXPORT_SYMBOL_GPL(audit_enabled); /* Default state when kernel boots without any parameters. */ -static u32 audit_default; +static u32 audit_default = AUDIT_OFF; /* If auditing cannot proceed, audit_failure selects what happens. */ static u32 audit_failure = AUDIT_FAIL_PRINTK; @@ -1199,8 +1199,6 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; - audit_enabled = audit_default; - audit_ever_enabled |= !!audit_default; audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); @@ -1217,6 +1215,8 @@ static int __init audit_enable(char *str) audit_default = !!simple_strtol(str, NULL, 0); if (!audit_default) audit_initialized = AUDIT_DISABLED; + audit_enabled = audit_default; + audit_ever_enabled = !!audit_enabled; pr_info("%s\n", audit_default ? "enabled (after initialization)" : "disabled (until reboot)"); -- GitLab From b7d3f2b5dca97de98dddbd4992dbe49d5a7723fa Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Wed, 8 Nov 2017 13:44:56 +0100 Subject: [PATCH 0324/1398] md: free unused memory after bitmap resize [ Upstream commit 0868b99c214a3d55486c700de7c3f770b7243e7c ] When bitmap is resized, the old kalloced chunks just are not released once the resized bitmap starts to use new space. This fixes in particular kmemleak reports like this one: unreferenced object 0xffff8f4311e9c000 (size 4096): comm "lvm", pid 19333, jiffies 4295263268 (age 528.265s) hex dump (first 32 bytes): 02 80 02 80 02 80 02 80 02 80 02 80 02 80 02 80 ................ 02 80 02 80 02 80 02 80 02 80 02 80 02 80 02 80 ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0x14e/0x2e0 [] bitmap_checkpage+0x7c/0x110 [] bitmap_get_counter+0x45/0xd0 [] bitmap_set_memory_bits+0x43/0xe0 [] bitmap_init_from_disk+0x23c/0x530 [] bitmap_load+0xbe/0x160 [] raid_preresume+0x203/0x2f0 [dm_raid] [] dm_table_resume_targets+0x4f/0xe0 [] dm_resume+0x122/0x140 [] dev_suspend+0x18f/0x290 [] ctl_ioctl+0x287/0x560 [] dm_ctl_ioctl+0x13/0x20 [] do_vfs_ioctl+0xa6/0x750 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Zdenek Kabelac Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/bitmap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index fb02c3979bf4..f7ff408567ad 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -2084,6 +2084,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, for (k = 0; k < page; k++) { kfree(new_bp[k].map); } + kfree(new_bp); /* restore some fields from old_counts */ bitmap->counts.bp = old_counts.bp; @@ -2134,6 +2135,14 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, block += old_blocks; } + if (bitmap->counts.bp != old_counts.bp) { + unsigned long k; + for (k = 0; k < old_counts.pages; k++) + if (!old_counts.bp[k].hijacked) + kfree(old_counts.bp[k].map); + kfree(old_counts.bp); + } + if (!init) { int i; while (block < (chunks << chunkshift)) { -- GitLab From 349130bb039149f0c30a8f5528c483b0804023cd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 23:10:19 +0300 Subject: [PATCH 0325/1398] RDMA/cxgb4: Annotate r2 and stag as __be32 [ Upstream commit 7d7d065a5eec7e218174d5c64a9f53f99ffdb119 ] Chelsio cxgb4 HW is big-endian, hence there is need to properly annotate r2 and stag fields as __be32 and not __u32 to fix the following sparse warnings. drivers/infiniband/hw/cxgb4/qp.c:614:16: warning: incorrect type in assignment (different base types) expected unsigned int [unsigned] [usertype] r2 got restricted __be32 [usertype] drivers/infiniband/hw/cxgb4/qp.c:615:18: warning: incorrect type in assignment (different base types) expected unsigned int [unsigned] [usertype] stag got restricted __be32 [usertype] Cc: Steve Wise Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 010c709ba3bb..58c531db4f4a 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -675,8 +675,8 @@ struct fw_ri_fr_nsmr_tpte_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __u32 r2; - __u32 stag; + __be32 r2; + __be32 stag; struct fw_ri_tpte tpte; __u64 pbl[2]; }; -- GitLab From ee52d08d2e09539154f397c8a412c68189c4d6a0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 16 Dec 2017 16:26:04 +0100 Subject: [PATCH 0326/1398] Linux 4.9.70 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b208e550b585..7ad3271a1a1d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 69 +SUBLEVEL = 70 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 522e13a01752e565c6158d836a3e8df5c469baf7 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 12 Dec 2016 16:44:53 -0800 Subject: [PATCH 0327/1398] UPSTREAM: kasan: support panic_on_warn If user sets panic_on_warn, he wants kernel to panic if there is anything barely wrong with the kernel. KASAN-detected errors are definitely not less benign than an arbitrary kernel WARNING. Panic after KASAN errors if panic_on_warn is set. We use this for continuous fuzzing where we want kernel to stop and reboot on any error. Link: http://lkml.kernel.org/r/1476694764-31986-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 5c5c1f36cedfb51ec291181e71817f7fe7e03ee2) Change-Id: Iee7cbc4ffbce8eb8d827447fdf960a6520d10b00 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8ca412aebcf1..f479365530b6 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -137,6 +137,8 @@ static void kasan_end_report(unsigned long *flags) pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); + if (panic_on_warn) + panic("panic_on_warn set ...\n"); kasan_enable_current(); } -- GitLab From b0b36118b2e3d5ed554582a09e862f779ffebbc4 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 12 Dec 2016 16:44:56 -0800 Subject: [PATCH 0328/1398] UPSTREAM: kasan: eliminate long stalls during quarantine reduction Currently we dedicate 1/32 of RAM for quarantine and then reduce it by 1/4 of total quarantine size. This can be a significant amount of memory. For example, with 4GB of RAM total quarantine size is 128MB and it is reduced by 32MB at a time. With 128GB of RAM total quarantine size is 4GB and it is reduced by 1GB. This leads to several problems: - freeing 1GB can take tens of seconds, causes rcu stall warnings and just introduces unexpected long delays at random places - if kmalloc() is called under a mutex, other threads stall on that mutex while a thread reduces quarantine - threads wait on quarantine_lock while one thread grabs a large batch of objects to evict - we walk the uncached list of object to free twice which makes all of the above worse - when a thread frees objects, they are already not accounted against global_quarantine.bytes; as the result we can have quarantine_size bytes in quarantine + unbounded amount of memory in large batches in threads that are in process of freeing Reduce size of quarantine in smaller batches to reduce the delays. The only reason to reduce it in batches is amortization of overheads, the new batch size of 1MB should be well enough to amortize spinlock lock/unlock and few function calls. Plus organize quarantine as a FIFO array of batches. This allows to not walk the list in quarantine_reduce() under quarantine_lock, which in turn reduces contention and is just faster. This improves performance of heavy load (syzkaller fuzzing) by ~20% with 4 CPUs and 32GB of RAM. Also this eliminates frequent (every 5 sec) drops of CPU consumption from ~400% to ~100% (one thread reduces quarantine while others are waiting on a mutex). Some reference numbers: 1. Machine with 4 CPUs and 4GB of memory. Quarantine size 128MB. Currently we free 32MB at at time. With new code we free 1MB at a time (1024 batches, ~128 are used). 2. Machine with 32 CPUs and 128GB of memory. Quarantine size 4GB. Currently we free 1GB at at time. With new code we free 8MB at a time (1024 batches, ~512 are used). 3. Machine with 4096 CPUs and 1TB of memory. Quarantine size 32GB. Currently we free 8GB at at time. With new code we free 4MB at a time (16K batches, ~8K are used). Link: http://lkml.kernel.org/r/1478756952-18695-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Eric Dumazet Cc: Greg Thelen Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 64abdcb24351a27bed6e2b6a3c27348fe532c73f) Change-Id: Idf73cb292453ceffc437121b7a5e152cde1901ff Signed-off-by: Paul Lawrence --- mm/kasan/quarantine.c | 94 ++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index baabaad4a4aa..dae929c02bbb 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -86,24 +86,9 @@ static void qlist_move_all(struct qlist_head *from, struct qlist_head *to) qlist_init(from); } -static void qlist_move(struct qlist_head *from, struct qlist_node *last, - struct qlist_head *to, size_t size) -{ - if (unlikely(last == from->tail)) { - qlist_move_all(from, to); - return; - } - if (qlist_empty(to)) - to->head = from->head; - else - to->tail->next = from->head; - to->tail = last; - from->head = last->next; - last->next = NULL; - from->bytes -= size; - to->bytes += size; -} - +#define QUARANTINE_PERCPU_SIZE (1 << 20) +#define QUARANTINE_BATCHES \ + (1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS) /* * The object quarantine consists of per-cpu queues and a global queue, @@ -111,11 +96,22 @@ static void qlist_move(struct qlist_head *from, struct qlist_node *last, */ static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine); -static struct qlist_head global_quarantine; +/* Round-robin FIFO array of batches. */ +static struct qlist_head global_quarantine[QUARANTINE_BATCHES]; +static int quarantine_head; +static int quarantine_tail; +/* Total size of all objects in global_quarantine across all batches. */ +static unsigned long quarantine_size; static DEFINE_SPINLOCK(quarantine_lock); /* Maximum size of the global queue. */ -static unsigned long quarantine_size; +static unsigned long quarantine_max_size; + +/* + * Target size of a batch in global_quarantine. + * Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM. + */ +static unsigned long quarantine_batch_size; /* * The fraction of physical memory the quarantine is allowed to occupy. @@ -124,9 +120,6 @@ static unsigned long quarantine_size; */ #define QUARANTINE_FRACTION 32 -#define QUARANTINE_LOW_SIZE (READ_ONCE(quarantine_size) * 3 / 4) -#define QUARANTINE_PERCPU_SIZE (1 << 20) - static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink) { return virt_to_head_page(qlink)->slab_cache; @@ -191,21 +184,30 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) if (unlikely(!qlist_empty(&temp))) { spin_lock_irqsave(&quarantine_lock, flags); - qlist_move_all(&temp, &global_quarantine); + WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); + qlist_move_all(&temp, &global_quarantine[quarantine_tail]); + if (global_quarantine[quarantine_tail].bytes >= + READ_ONCE(quarantine_batch_size)) { + int new_tail; + + new_tail = quarantine_tail + 1; + if (new_tail == QUARANTINE_BATCHES) + new_tail = 0; + if (new_tail != quarantine_head) + quarantine_tail = new_tail; + } spin_unlock_irqrestore(&quarantine_lock, flags); } } void quarantine_reduce(void) { - size_t new_quarantine_size, percpu_quarantines; + size_t total_size, new_quarantine_size, percpu_quarantines; unsigned long flags; struct qlist_head to_free = QLIST_INIT; - size_t size_to_free = 0; - struct qlist_node *last; - if (likely(READ_ONCE(global_quarantine.bytes) <= - READ_ONCE(quarantine_size))) + if (likely(READ_ONCE(quarantine_size) <= + READ_ONCE(quarantine_max_size))) return; spin_lock_irqsave(&quarantine_lock, flags); @@ -214,24 +216,23 @@ void quarantine_reduce(void) * Update quarantine size in case of hotplug. Allocate a fraction of * the installed memory to quarantine minus per-cpu queue limits. */ - new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / + total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / QUARANTINE_FRACTION; percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus(); - new_quarantine_size = (new_quarantine_size < percpu_quarantines) ? - 0 : new_quarantine_size - percpu_quarantines; - WRITE_ONCE(quarantine_size, new_quarantine_size); - - last = global_quarantine.head; - while (last) { - struct kmem_cache *cache = qlink_to_cache(last); - - size_to_free += cache->size; - if (!last->next || size_to_free > - global_quarantine.bytes - QUARANTINE_LOW_SIZE) - break; - last = last->next; + new_quarantine_size = (total_size < percpu_quarantines) ? + 0 : total_size - percpu_quarantines; + WRITE_ONCE(quarantine_max_size, new_quarantine_size); + /* Aim at consuming at most 1/2 of slots in quarantine. */ + WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE, + 2 * total_size / QUARANTINE_BATCHES)); + + if (likely(quarantine_size > quarantine_max_size)) { + qlist_move_all(&global_quarantine[quarantine_head], &to_free); + WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes); + quarantine_head++; + if (quarantine_head == QUARANTINE_BATCHES) + quarantine_head = 0; } - qlist_move(&global_quarantine, last, &to_free, size_to_free); spin_unlock_irqrestore(&quarantine_lock, flags); @@ -275,13 +276,14 @@ static void per_cpu_remove_cache(void *arg) void quarantine_remove_cache(struct kmem_cache *cache) { - unsigned long flags; + unsigned long flags, i; struct qlist_head to_free = QLIST_INIT; on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); - qlist_move_cache(&global_quarantine, &to_free, cache); + for (i = 0; i < QUARANTINE_BATCHES; i++) + qlist_move_cache(&global_quarantine[i], &to_free, cache); spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); -- GitLab From cb61d5ab325f8f81b7673290a35c0f3e352f28cc Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 24 Feb 2017 15:00:05 -0800 Subject: [PATCH 0329/1398] UPSTREAM: kasan: drain quarantine of memcg slab objects Per memcg slab accounting and kasan have a problem with kmem_cache destruction. - kmem_cache_create() allocates a kmem_cache, which is used for allocations from processes running in root (top) memcg. - Processes running in non root memcg and allocating with either __GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg kmem_cache. - Kasan catches use-after-free by having kfree() and kmem_cache_free() defer freeing of objects. Objects are placed in a quarantine. - kmem_cache_destroy() destroys root and non root kmem_caches. It takes care to drain the quarantine of objects from the root memcg's kmem_cache, but ignores objects associated with non root memcg. This causes leaks because quarantined per memcg objects refer to per memcg kmem cache being destroyed. To see the problem: 1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,) 2) from non root memcg, allocate and free a few objects from cache 3) dispose of the cache with kmem_cache_destroy() kmem_cache_destroy() will trigger a "Slab cache still has objects" warning indicating that the per memcg kmem_cache structure was leaked. Fix the leak by draining kasan quarantined objects allocated from non root memcg. Racing memcg deletion is tricky, but handled. kmem_cache_destroy() => shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache() flushes per memcg quarantined objects, even if that memcg has been rmdir'd and gone through memcg_deactivate_kmem_caches(). This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is enabled. So I don't think it's worth patching stable kernels. Link: http://lkml.kernel.org/r/1482257462-36948-1-git-send-email-gthelen@google.com Signed-off-by: Greg Thelen Reviewed-by: Vladimir Davydov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from f9fa1d919c696e90c887d8742198023e7639d139) Change-Id: Ie054d9cde7fb1ce62e65776bff5a70f72925d037 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 4 ++-- mm/kasan/kasan.c | 2 +- mm/kasan/quarantine.c | 1 + mm/slab_common.c | 4 +++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 820c0ad54a01..c908b25bf5a5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); void kasan_cache_shrink(struct kmem_cache *cache); -void kasan_cache_destroy(struct kmem_cache *cache); +void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} -static inline void kasan_cache_destroy(struct kmem_cache *cache) {} +static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 0e9505f66ec1..8d020ad5b74a 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -428,7 +428,7 @@ void kasan_cache_shrink(struct kmem_cache *cache) quarantine_remove_cache(cache); } -void kasan_cache_destroy(struct kmem_cache *cache) +void kasan_cache_shutdown(struct kmem_cache *cache) { quarantine_remove_cache(cache); } diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index dae929c02bbb..6f1ed1630873 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -274,6 +274,7 @@ static void per_cpu_remove_cache(void *arg) qlist_free_all(&to_free, cache); } +/* Free all quarantined objects belonging to cache. */ void quarantine_remove_cache(struct kmem_cache *cache) { unsigned long flags, i; diff --git a/mm/slab_common.c b/mm/slab_common.c index 622f6b6ae844..0dc614d1d6f9 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -458,6 +458,9 @@ EXPORT_SYMBOL(kmem_cache_create); static int shutdown_cache(struct kmem_cache *s, struct list_head *release, bool *need_rcu_barrier) { + /* free asan quarantined objects */ + kasan_cache_shutdown(s); + if (__kmem_cache_shutdown(s) != 0) return -EBUSY; @@ -741,7 +744,6 @@ void kmem_cache_destroy(struct kmem_cache *s) get_online_cpus(); get_online_mems(); - kasan_cache_destroy(s); mutex_lock(&slab_mutex); s->refcount--; -- GitLab From 46ae3e4faa4cfc3e0882ca0cb2f2c10782e374a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 09:57:00 +0100 Subject: [PATCH 0330/1398] UPSTREAM: kasan, sched/headers: Uninline kasan_enable/disable_current() is a low level header that is included early in affected kernel headers. But it includes which complicates the cleanup of sched.h dependencies. But kasan.h has almost no need for sched.h: its only use of scheduler functionality is in two inline functions which are not used very frequently - so uninline kasan_enable_current() and kasan_disable_current(). Also add a dependency to a .c file that depended on kasan.h including it. This paves the way to remove the include from kasan.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Bug: 64145065 (cherry-picked from af8601ad420f6afa6445c927ad9f36d9700d96d6) Change-Id: I13fd2d3927f663d694ea0d5bf44f18e2c62ae013 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 10 ++-------- mm/kasan/kasan.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c908b25bf5a5..7793036eac80 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr) } /* Enable reporting bugs after kasan_disable_current() */ -static inline void kasan_enable_current(void) -{ - current->kasan_depth++; -} +extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ -static inline void kasan_disable_current(void) -{ - current->kasan_depth--; -} +extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 8d020ad5b74a..59bb048f8282 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -39,6 +39,16 @@ #include "kasan.h" #include "../slab.h" +void kasan_enable_current(void) +{ + current->kasan_depth++; +} + +void kasan_disable_current(void) +{ + current->kasan_depth--; +} + /* * Poisons the shadow memory for 'size' bytes starting from 'addr'. * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. -- GitLab From 650117f92967f00c46a4ca6d14e8d5dec01fed85 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 9 Mar 2017 16:17:28 -0800 Subject: [PATCH 0331/1398] UPSTREAM: kasan: resched in quarantine_remove_cache() We see reported stalls/lockups in quarantine_remove_cache() on machines with large amounts of RAM. quarantine_remove_cache() needs to scan whole quarantine in order to take out all objects belonging to the cache. Quarantine is currently 1/32-th of RAM, e.g. on a machine with 256GB of memory that will be 8GB. Moreover quarantine scanning is a walk over uncached linked list, which is slow. Add cond_resched() after scanning of each non-empty batch of objects. Batches are specifically kept of reasonable size for quarantine_put(). On a machine with 256GB of RAM we should have ~512 non-empty batches, each with 16MB of objects. Link: http://lkml.kernel.org/r/20170308154239.25440-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 68fd814a3391c7e017ae6ace8855788748edd981) Change-Id: I8a38466a9b9544bb303202c94bfba6201251e3f3 Signed-off-by: Paul Lawrence --- mm/kasan/quarantine.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 6f1ed1630873..4ac39f20757a 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -283,8 +283,15 @@ void quarantine_remove_cache(struct kmem_cache *cache) on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); - for (i = 0; i < QUARANTINE_BATCHES; i++) + for (i = 0; i < QUARANTINE_BATCHES; i++) { + if (qlist_empty(&global_quarantine[i])) + continue; qlist_move_cache(&global_quarantine[i], &to_free, cache); + /* Scanning whole quarantine can take a while. */ + spin_unlock_irqrestore(&quarantine_lock, flags); + cond_resched(); + spin_lock_irqsave(&quarantine_lock, flags); + } spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); -- GitLab From e9166aa87bb021f4f28a23da49f2d22b9aaa2d06 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 9 Mar 2017 16:17:32 -0800 Subject: [PATCH 0332/1398] UPSTREAM: kasan: fix races in quarantine_remove_cache() quarantine_remove_cache() frees all pending objects that belong to the cache, before we destroy the cache itself. However there are currently two possibilities how it can fail to do so. First, another thread can hold some of the objects from the cache in temp list in quarantine_put(). quarantine_put() has a windows of enabled interrupts, and on_each_cpu() in quarantine_remove_cache() can finish right in that window. These objects will be later freed into the destroyed cache. Then, quarantine_reduce() has the same problem. It grabs a batch of objects from the global quarantine, then unlocks quarantine_lock and then frees the batch. quarantine_remove_cache() can finish while some objects from the cache are still in the local to_free list in quarantine_reduce(). Fix the race with quarantine_put() by disabling interrupts for the whole duration of quarantine_put(). In combination with on_each_cpu() in quarantine_remove_cache() it ensures that quarantine_remove_cache() either sees the objects in the per-cpu list or in the global list. Fix the race with quarantine_reduce() by protecting quarantine_reduce() with srcu critical section and then doing synchronize_srcu() at the end of quarantine_remove_cache(). I've done some assessment of how good synchronize_srcu() works in this case. And on a 4 CPU VM I see that it blocks waiting for pending read critical sections in about 2-3% of cases. Which looks good to me. I suspect that these races are the root cause of some GPFs that I episodically hit. Previously I did not have any explanation for them. BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8 IP: qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155 PGD 6aeea067 PUD 60ed7067 PMD 0 Oops: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 13667 Comm: syz-executor2 Not tainted 4.10.0+ #60 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88005f948040 task.stack: ffff880069818000 RIP: 0010:qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155 RSP: 0018:ffff88006981f298 EFLAGS: 00010246 RAX: ffffea0000ffff00 RBX: 0000000000000000 RCX: ffffea0000ffff1f RDX: 0000000000000000 RSI: ffff88003fffc3e0 RDI: 0000000000000000 RBP: ffff88006981f2c0 R08: ffff88002fed7bd8 R09: 00000001001f000d R10: 00000000001f000d R11: ffff88006981f000 R12: ffff88003fffc3e0 R13: ffff88006981f2d0 R14: ffffffff81877fae R15: 0000000080000000 FS: 00007fb911a2d700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c8 CR3: 0000000060ed6000 CR4: 00000000000006f0 Call Trace: quarantine_reduce+0x10e/0x120 mm/kasan/quarantine.c:239 kasan_kmalloc+0xca/0xe0 mm/kasan/kasan.c:590 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:456 [inline] slab_alloc_node mm/slub.c:2718 [inline] kmem_cache_alloc_node+0x1d3/0x280 mm/slub.c:2754 __alloc_skb+0x10f/0x770 net/core/skbuff.c:219 alloc_skb include/linux/skbuff.h:932 [inline] _sctp_make_chunk+0x3b/0x260 net/sctp/sm_make_chunk.c:1388 sctp_make_data net/sctp/sm_make_chunk.c:1420 [inline] sctp_make_datafrag_empty+0x208/0x360 net/sctp/sm_make_chunk.c:746 sctp_datamsg_from_user+0x7e8/0x11d0 net/sctp/chunk.c:266 sctp_sendmsg+0x2611/0x3970 net/sctp/socket.c:1962 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1685 SyS_sendto+0x40/0x50 net/socket.c:1653 I am not sure about backporting. The bug is quite hard to trigger, I've seen it few times during our massive continuous testing (however, it could be cause of some other episodic stray crashes as it leads to memory corruption...). If it is triggered, the consequences are very bad -- almost definite bad memory corruption. The fix is non trivial and has chances of introducing new bugs. I am also not sure how actively people use KASAN on older releases. [dvyukov@google.com: - sorted includes[ Link: http://lkml.kernel.org/r/20170309094028.51088-1-dvyukov@google.com Link: http://lkml.kernel.org/r/20170308151532.5070-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from ce5bec54bb5debbbe51b40270d8f209a23cadae4) Change-Id: I9199861f005d7932c37397b3ae23a123a4cff89b Signed-off-by: Paul Lawrence --- mm/kasan/quarantine.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 4ac39f20757a..3a8ddf8baf7d 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,7 @@ static int quarantine_tail; /* Total size of all objects in global_quarantine across all batches. */ static unsigned long quarantine_size; static DEFINE_SPINLOCK(quarantine_lock); +DEFINE_STATIC_SRCU(remove_cache_srcu); /* Maximum size of the global queue. */ static unsigned long quarantine_max_size; @@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) struct qlist_head *q; struct qlist_head temp = QLIST_INIT; + /* + * Note: irq must be disabled until after we move the batch to the + * global quarantine. Otherwise quarantine_remove_cache() can miss + * some objects belonging to the cache if they are in our local temp + * list. quarantine_remove_cache() executes on_each_cpu() at the + * beginning which ensures that it either sees the objects in per-cpu + * lists or in the global quarantine. + */ local_irq_save(flags); q = this_cpu_ptr(&cpu_quarantine); qlist_put(q, &info->quarantine_link, cache->size); - if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) + if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); - local_irq_restore(flags); - - if (unlikely(!qlist_empty(&temp))) { - spin_lock_irqsave(&quarantine_lock, flags); + spin_lock(&quarantine_lock); WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); qlist_move_all(&temp, &global_quarantine[quarantine_tail]); if (global_quarantine[quarantine_tail].bytes >= @@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) if (new_tail != quarantine_head) quarantine_tail = new_tail; } - spin_unlock_irqrestore(&quarantine_lock, flags); + spin_unlock(&quarantine_lock); } + + local_irq_restore(flags); } void quarantine_reduce(void) { size_t total_size, new_quarantine_size, percpu_quarantines; unsigned long flags; + int srcu_idx; struct qlist_head to_free = QLIST_INIT; if (likely(READ_ONCE(quarantine_size) <= READ_ONCE(quarantine_max_size))) return; + /* + * srcu critical section ensures that quarantine_remove_cache() + * will not miss objects belonging to the cache while they are in our + * local to_free list. srcu is chosen because (1) it gives us private + * grace period domain that does not interfere with anything else, + * and (2) it allows synchronize_srcu() to return without waiting + * if there are no pending read critical sections (which is the + * expected case). + */ + srcu_idx = srcu_read_lock(&remove_cache_srcu); spin_lock_irqsave(&quarantine_lock, flags); /* @@ -237,6 +257,7 @@ void quarantine_reduce(void) spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, NULL); + srcu_read_unlock(&remove_cache_srcu, srcu_idx); } static void qlist_move_cache(struct qlist_head *from, @@ -280,6 +301,13 @@ void quarantine_remove_cache(struct kmem_cache *cache) unsigned long flags, i; struct qlist_head to_free = QLIST_INIT; + /* + * Must be careful to not miss any objects that are being moved from + * per-cpu list to the global quarantine in quarantine_put(), + * nor objects being freed in quarantine_reduce(). on_each_cpu() + * achieves the first goal, while synchronize_srcu() achieves the + * second. + */ on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); @@ -295,4 +323,6 @@ void quarantine_remove_cache(struct kmem_cache *cache) spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); + + synchronize_srcu(&remove_cache_srcu); } -- GitLab From 586b2bdb273c54af7584366afc92d59192060683 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Mar 2017 15:12:04 -0700 Subject: [PATCH 0333/1398] BACKPORT: kasan: report only the first error by default Disable kasan after the first report. There are several reasons for this: - Single bug quite often has multiple invalid memory accesses causing storm in the dmesg. - Write OOB access might corrupt metadata so the next report will print bogus alloc/free stacktraces. - Reports after the first easily could be not bugs by itself but just side effects of the first one. Given that multiple reports usually only do harm, it makes sense to disable kasan after the first one. If user wants to see all the reports, the boot-time parameter kasan_multi_shot must be used. [aryabinin@virtuozzo.com: wrote changelog and doc, added missing include] Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com Signed-off-by: Mark Rutland Signed-off-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from b0845ce58379d11dcad4cdb6824a6410de260216) Change-Id: Ia8c6d40dd0d4f5b944bf3501c08d7a825070b116 Signed-off-by: Paul Lawrence --- Documentation/kernel-parameters.txt | 6 +++++ include/linux/kasan.h | 3 +++ lib/test_kasan.c | 10 ++++++++ mm/kasan/kasan.h | 5 ---- mm/kasan/report.c | 36 +++++++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 92e629781279..5a572328a2e1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1909,6 +1909,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. kernel and module base offset ASLR (Address Space Layout Randomization). + kasan_multi_shot + [KNL] Enforce KASAN (Kernel Address Sanitizer) to print + report on every invalid memory access. Without this + parameter KASAN will print report only for the first + invalid access. + keepinitrd [HW,ARM] kernelcore= [KNL,X86,IA-64,PPC] diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7793036eac80..b37afd197eed 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -75,6 +75,9 @@ size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } size_t kasan_metadata_size(struct kmem_cache *cache); +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} diff --git a/lib/test_kasan.c b/lib/test_kasan.c index fbdf87920093..0e70ecc12fe2 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Note: test functions are marked noinline so that their names appear in @@ -441,6 +442,12 @@ static noinline void __init use_after_scope_test(void) static int __init kmalloc_tests_init(void) { + /* + * Temporarily enable multi-shot mode. Otherwise, we'd only get a + * report for the first case. + */ + bool multishot = kasan_save_enable_multi_shot(); + kmalloc_oob_right(); kmalloc_oob_left(); kmalloc_node_oob_right(); @@ -465,6 +472,9 @@ static int __init kmalloc_tests_init(void) ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); + + kasan_restore_multi_shot(multishot); + return -EAGAIN; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1c260e6b3b3c..dd2dea8eb077 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -96,11 +96,6 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) << KASAN_SHADOW_SCALE_SHIFT); } -static inline bool kasan_report_enabled(void) -{ - return !current->kasan_depth; -} - void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_double_free(struct kmem_cache *cache, void *object, diff --git a/mm/kasan/report.c b/mm/kasan/report.c index f479365530b6..ab42a0803f16 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -13,7 +13,9 @@ * */ +#include #include +#include #include #include #include @@ -293,6 +295,40 @@ static void kasan_report_error(struct kasan_access_info *info) kasan_end_report(&flags); } +static unsigned long kasan_flags; + +#define KASAN_BIT_REPORTED 0 +#define KASAN_BIT_MULTI_SHOT 1 + +bool kasan_save_enable_multi_shot(void) +{ + return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot); + +void kasan_restore_multi_shot(bool enabled) +{ + if (!enabled) + clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); + +static int __init kasan_set_multi_shot(char *str) +{ + set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); + return 1; +} +__setup("kasan_multi_shot", kasan_set_multi_shot); + +static inline bool kasan_report_enabled(void) +{ + if (current->kasan_depth) + return false; + if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + return true; + return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags); +} + void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) { -- GitLab From 828a17c94a6f3b3370a70f0f900dc834fb0ce067 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:25 -0700 Subject: [PATCH 0334/1398] UPSTREAM: kasan: introduce helper functions for determining bug type Patch series "kasan: improve error reports", v2. This patchset improves KASAN reports by making them easier to read and a little more detailed. Also improves mm/kasan/report.c readability. Effectively changes a use-after-free report to: ================================================================== BUG: KASAN: use-after-free in kmalloc_uaf+0xaa/0xb6 [test_kasan] Write of size 1 at addr ffff88006aa59da8 by task insmod/3951 CPU: 1 PID: 3951 Comm: insmod Tainted: G B 4.10.0+ #84 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x292/0x398 print_address_description+0x73/0x280 kasan_report.part.2+0x207/0x2f0 __asan_report_store1_noabort+0x2c/0x30 kmalloc_uaf+0xaa/0xb6 [test_kasan] kmalloc_tests_init+0x4f/0xa48 [test_kasan] do_one_initcall+0xf3/0x390 do_init_module+0x215/0x5d0 load_module+0x54de/0x82b0 SYSC_init_module+0x3be/0x430 SyS_init_module+0x9/0x10 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x7f22cfd0b9da RSP: 002b:00007ffe69118a78 EFLAGS: 00000206 ORIG_RAX: 00000000000000af RAX: ffffffffffffffda RBX: 0000555671242090 RCX: 00007f22cfd0b9da RDX: 00007f22cffcaf88 RSI: 000000000004df7e RDI: 00007f22d0399000 RBP: 00007f22cffcaf88 R08: 0000000000000003 R09: 0000000000000000 R10: 00007f22cfd07d0a R11: 0000000000000206 R12: 0000555671243190 R13: 000000000001fe81 R14: 0000000000000000 R15: 0000000000000004 Allocated by task 3951: save_stack_trace+0x16/0x20 save_stack+0x43/0xd0 kasan_kmalloc+0xad/0xe0 kmem_cache_alloc_trace+0x82/0x270 kmalloc_uaf+0x56/0xb6 [test_kasan] kmalloc_tests_init+0x4f/0xa48 [test_kasan] do_one_initcall+0xf3/0x390 do_init_module+0x215/0x5d0 load_module+0x54de/0x82b0 SYSC_init_module+0x3be/0x430 SyS_init_module+0x9/0x10 entry_SYSCALL_64_fastpath+0x1f/0xc2 Freed by task 3951: save_stack_trace+0x16/0x20 save_stack+0x43/0xd0 kasan_slab_free+0x72/0xc0 kfree+0xe8/0x2b0 kmalloc_uaf+0x85/0xb6 [test_kasan] kmalloc_tests_init+0x4f/0xa48 [test_kasan] do_one_initcall+0xf3/0x390 do_init_module+0x215/0x5d0 load_module+0x54de/0x82b0 SYSC_init_module+0x3be/0x430 SyS_init_module+0x9/0x10 entry_SYSCALL_64_fastpath+0x1f/0xc The buggy address belongs to the object at ffff88006aa59da0 which belongs to the cache kmalloc-16 of size 16 The buggy address is located 8 bytes inside of 16-byte region [ffff88006aa59da0, ffff88006aa59db0) The buggy address belongs to the page: page:ffffea0001aa9640 count:1 mapcount:0 mapping: (null) index:0x0 flags: 0x100000000000100(slab) raw: 0100000000000100 0000000000000000 0000000000000000 0000000180800080 raw: ffffea0001abe380 0000000700000007 ffff88006c401b40 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88006aa59c80: 00 00 fc fc 00 00 fc fc 00 00 fc fc 00 00 fc fc ffff88006aa59d00: 00 00 fc fc 00 00 fc fc 00 00 fc fc 00 00 fc fc >ffff88006aa59d80: fb fb fc fc fb fb fc fc fb fb fc fc fb fb fc fc ^ ffff88006aa59e00: fb fb fc fc fb fb fc fc fb fb fc fc fb fb fc fc ffff88006aa59e80: fb fb fc fc 00 00 fc fc 00 00 fc fc 00 00 fc fc ================================================================== from: ================================================================== BUG: KASAN: use-after-free in kmalloc_uaf+0xaa/0xb6 [test_kasan] at addr ffff88006c4dcb28 Write of size 1 by task insmod/3984 CPU: 1 PID: 3984 Comm: insmod Tainted: G B 4.10.0+ #83 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x292/0x398 kasan_object_err+0x1c/0x70 kasan_report.part.1+0x20e/0x4e0 __asan_report_store1_noabort+0x2c/0x30 kmalloc_uaf+0xaa/0xb6 [test_kasan] kmalloc_tests_init+0x4f/0xa48 [test_kasan] do_one_initcall+0xf3/0x390 do_init_module+0x215/0x5d0 load_module+0x54de/0x82b0 SYSC_init_module+0x3be/0x430 SyS_init_module+0x9/0x10 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x7feca0f779da RSP: 002b:00007ffdfeae5218 EFLAGS: 00000206 ORIG_RAX: 00000000000000af RAX: ffffffffffffffda RBX: 000055a064c13090 RCX: 00007feca0f779da RDX: 00007feca1236f88 RSI: 000000000004df7e RDI: 00007feca1605000 RBP: 00007feca1236f88 R08: 0000000000000003 R09: 0000000000000000 R10: 00007feca0f73d0a R11: 0000000000000206 R12: 000055a064c14190 R13: 000000000001fe81 R14: 0000000000000000 R15: 0000000000000004 Object at ffff88006c4dcb20, in cache kmalloc-16 size: 16 Allocated: PID = 3984 save_stack_trace+0x16/0x20 save_stack+0x43/0xd0 kasan_kmalloc+0xad/0xe0 kmem_cache_alloc_trace+0x82/0x270 kmalloc_uaf+0x56/0xb6 [test_kasan] kmalloc_tests_init+0x4f/0xa48 [test_kasan] do_one_initcall+0xf3/0x390 do_init_module+0x215/0x5d0 load_module+0x54de/0x82b0 SYSC_init_module+0x3be/0x430 SyS_init_module+0x9/0x10 entry_SYSCALL_64_fastpath+0x1f/0xc2 Freed: PID = 3984 save_stack_trace+0x16/0x20 save_stack+0x43/0xd0 kasan_slab_free+0x73/0xc0 kfree+0xe8/0x2b0 kmalloc_uaf+0x85/0xb6 [test_kasan] kmalloc_tests_init+0x4f/0xa48 [test_kasan] do_one_initcall+0xf3/0x390 do_init_module+0x215/0x5d0 load_module+0x54de/0x82b0 SYSC_init_module+0x3be/0x430 SyS_init_module+0x9/0x10 entry_SYSCALL_64_fastpath+0x1f/0xc2 Memory state around the buggy address: ffff88006c4dca00: fb fb fc fc fb fb fc fc fb fb fc fc fb fb fc fc ffff88006c4dca80: fb fb fc fc fb fb fc fc fb fb fc fc fb fb fc fc >ffff88006c4dcb00: fb fb fc fc fb fb fc fc fb fb fc fc fb fb fc fc ^ ffff88006c4dcb80: fb fb fc fc 00 00 fc fc fb fb fc fc fb fb fc fc ffff88006c4dcc00: fb fb fc fc fb fb fc fc fb fb fc fc fb fb fc fc ================================================================== This patch (of 9): Introduce get_shadow_bug_type() function, which determines bug type based on the shadow value for a particular kernel address. Introduce get_wild_bug_type() function, which determines bug type for addresses which don't have a corresponding shadow value. Link: http://lkml.kernel.org/r/20170302134851.101218-2-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 5e82cd120382ad7bbcc82298e34a034538b4384c) Change-Id: I3359775858891c9c66d11d2a520831e329993ae9 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index ab42a0803f16..c2bc08b1b5e0 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -51,7 +51,13 @@ static const void *find_first_bad_addr(const void *addr, size_t size) return first_bad_addr; } -static void print_error_description(struct kasan_access_info *info) +static bool addr_has_shadow(struct kasan_access_info *info) +{ + return (info->access_addr >= + kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +} + +static const char *get_shadow_bug_type(struct kasan_access_info *info) { const char *bug_type = "unknown-crash"; u8 *shadow_addr; @@ -98,6 +104,27 @@ static void print_error_description(struct kasan_access_info *info) break; } + return bug_type; +} + +const char *get_wild_bug_type(struct kasan_access_info *info) +{ + const char *bug_type = "unknown-crash"; + + if ((unsigned long)info->access_addr < PAGE_SIZE) + bug_type = "null-ptr-deref"; + else if ((unsigned long)info->access_addr < TASK_SIZE) + bug_type = "user-memory-access"; + else + bug_type = "wild-memory-access"; + + return bug_type; +} + +static void print_error_description(struct kasan_access_info *info) +{ + const char *bug_type = get_shadow_bug_type(info); + pr_err("BUG: KASAN: %s in %pS at addr %p\n", bug_type, (void *)info->ip, info->access_addr); @@ -267,18 +294,11 @@ static void print_shadow_for_address(const void *addr) static void kasan_report_error(struct kasan_access_info *info) { unsigned long flags; - const char *bug_type; kasan_start_report(&flags); - if (info->access_addr < - kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) { - if ((unsigned long)info->access_addr < PAGE_SIZE) - bug_type = "null-ptr-deref"; - else if ((unsigned long)info->access_addr < TASK_SIZE) - bug_type = "user-memory-access"; - else - bug_type = "wild-memory-access"; + if (!addr_has_shadow(info)) { + const char *bug_type = get_wild_bug_type(info); pr_err("BUG: KASAN: %s on address %p\n", bug_type, info->access_addr); pr_err("%s of size %zu by task %s/%d\n", -- GitLab From 00c37f35d412583cc2d19785216095babd093b11 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:28 -0700 Subject: [PATCH 0335/1398] UPSTREAM: kasan: unify report headers Unify KASAN report header format for different kinds of bad memory accesses. Makes the code simpler. Link: http://lkml.kernel.org/r/20170302134851.101218-3-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 7d418f7b0d3407b93ec70f3b380cc5beafa1fa68) Change-Id: I81577ad4617e8c4624fc0701f45a197d211f12a6 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index c2bc08b1b5e0..d6b6ec77c56a 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -121,16 +121,22 @@ const char *get_wild_bug_type(struct kasan_access_info *info) return bug_type; } +static const char *get_bug_type(struct kasan_access_info *info) +{ + if (addr_has_shadow(info)) + return get_shadow_bug_type(info); + return get_wild_bug_type(info); +} + static void print_error_description(struct kasan_access_info *info) { - const char *bug_type = get_shadow_bug_type(info); + const char *bug_type = get_bug_type(info); pr_err("BUG: KASAN: %s in %pS at addr %p\n", - bug_type, (void *)info->ip, - info->access_addr); + bug_type, (void *)info->ip, info->access_addr); pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, task_pid_nr(current)); + info->is_write ? "Write" : "Read", info->access_size, + current->comm, task_pid_nr(current)); } static inline bool kernel_or_module_addr(const void *addr) @@ -297,17 +303,11 @@ static void kasan_report_error(struct kasan_access_info *info) kasan_start_report(&flags); + print_error_description(info); + if (!addr_has_shadow(info)) { - const char *bug_type = get_wild_bug_type(info); - pr_err("BUG: KASAN: %s on address %p\n", - bug_type, info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, - task_pid_nr(current)); dump_stack(); } else { - print_error_description(info); print_address_description(info); print_shadow_for_address(info->first_bad_addr); } -- GitLab From 9592c76d84c35c434ea879980e8842665d28fdce Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:31 -0700 Subject: [PATCH 0336/1398] UPSTREAM: kasan: change allocation and freeing stack traces headers Change stack traces headers from: Allocated: PID = 42 to: Allocated by task 42: Makes the report one line shorter and look better. Link: http://lkml.kernel.org/r/20170302134851.101218-4-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from b6b72f4919c121bee5890732e0b8de2ab99c8dbc) Change-Id: Iab66777f16016b5a3a8ce85f7cc62d4572fcf5b0 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index d6b6ec77c56a..7d24363edd66 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -177,9 +177,9 @@ static void kasan_end_report(unsigned long *flags) kasan_enable_current(); } -static void print_track(struct kasan_track *track) +static void print_track(struct kasan_track *track, const char *prefix) { - pr_err("PID = %u\n", track->pid); + pr_err("%s by task %u:\n", prefix, track->pid); if (track->stack) { struct stack_trace trace; @@ -201,10 +201,8 @@ static void kasan_object_err(struct kmem_cache *cache, void *object) if (!(cache->flags & SLAB_KASAN)) return; - pr_err("Allocated:\n"); - print_track(&alloc_info->alloc_track); - pr_err("Freed:\n"); - print_track(&alloc_info->free_track); + print_track(&alloc_info->alloc_track, "Allocated"); + print_track(&alloc_info->free_track, "Freed"); } void kasan_report_double_free(struct kmem_cache *cache, void *object, -- GitLab From 74f1d22f17eef3ddb589971903a31ce863e9717e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:34 -0700 Subject: [PATCH 0337/1398] UPSTREAM: kasan: simplify address description logic Simplify logic for describing a memory address. Add addr_to_page() helper function. Makes the code easier to follow. Link: http://lkml.kernel.org/r/20170302134851.101218-5-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from db429f16e0b472292000fd53b63ebd7221a9856e) Change-Id: Ie688a8fe0da5d1012e64bdbd26b5e7cb2ed43ff8 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 7d24363edd66..a82d6896062b 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -190,11 +190,18 @@ static void print_track(struct kasan_track *track, const char *prefix) } } -static void kasan_object_err(struct kmem_cache *cache, void *object) +static struct page *addr_to_page(const void *addr) +{ + if ((addr >= (void *)PAGE_OFFSET) && + (addr < high_memory)) + return virt_to_head_page(addr); + return NULL; +} + +static void describe_object(struct kmem_cache *cache, void *object) { struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); - dump_stack(); pr_err("Object at %p, in cache %s size: %d\n", object, cache->name, cache->object_size); @@ -213,34 +220,32 @@ void kasan_report_double_free(struct kmem_cache *cache, void *object, kasan_start_report(&flags); pr_err("BUG: Double free or freeing an invalid pointer\n"); pr_err("Unexpected shadow byte: 0x%hhX\n", shadow); - kasan_object_err(cache, object); + dump_stack(); + describe_object(cache, object); kasan_end_report(&flags); } static void print_address_description(struct kasan_access_info *info) { const void *addr = info->access_addr; + struct page *page = addr_to_page(addr); - if ((addr >= (void *)PAGE_OFFSET) && - (addr < high_memory)) { - struct page *page = virt_to_head_page(addr); - - if (PageSlab(page)) { - void *object; - struct kmem_cache *cache = page->slab_cache; - object = nearest_obj(cache, page, - (void *)info->access_addr); - kasan_object_err(cache, object); - return; - } + if (page) dump_page(page, "kasan: bad access detected"); + + dump_stack(); + + if (page && PageSlab(page)) { + struct kmem_cache *cache = page->slab_cache; + void *object = nearest_obj(cache, page, (void *)addr); + + describe_object(cache, object); } if (kernel_or_module_addr(addr)) { if (!init_task_stack_addr(addr)) pr_err("Address belongs to variable %pS\n", addr); } - dump_stack(); } static bool row_is_guilty(const void *row, const void *guilty) -- GitLab From 5e2a11589323ae2d024a0f67476f9d60cd2c63b5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:38 -0700 Subject: [PATCH 0338/1398] UPSTREAM: kasan: change report header Change report header format from: BUG: KASAN: use-after-free in unwind_get_return_address+0x28a/0x2c0 at addr ffff880069437950 Read of size 8 by task insmod/3925 to: BUG: KASAN: use-after-free in unwind_get_return_address+0x28a/0x2c0 Read of size 8 at addr ffff880069437950 by task insmod/3925 The exact access address is not usually important, so move it to the second line. This also makes the header look visually balanced. Link: http://lkml.kernel.org/r/20170302134851.101218-6-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 7f0a84c23b1dede3e76a7b2ebbde45a506252005) Change-Id: If9cacce637c317538d813b05ef2647707300d310 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index a82d6896062b..8efc69473a37 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -132,11 +132,11 @@ static void print_error_description(struct kasan_access_info *info) { const char *bug_type = get_bug_type(info); - pr_err("BUG: KASAN: %s in %pS at addr %p\n", - bug_type, (void *)info->ip, info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", + pr_err("BUG: KASAN: %s in %pS\n", + bug_type, (void *)info->ip); + pr_err("%s of size %zu at addr %p by task %s/%d\n", info->is_write ? "Write" : "Read", info->access_size, - current->comm, task_pid_nr(current)); + info->access_addr, current->comm, task_pid_nr(current)); } static inline bool kernel_or_module_addr(const void *addr) -- GitLab From c13ce42db1071fefff6916c4b5e5391c8b9c932e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:41 -0700 Subject: [PATCH 0339/1398] UPSTREAM: kasan: improve slab object description Changes slab object description from: Object at ffff880068388540, in cache kmalloc-128 size: 128 to: The buggy address belongs to the object at ffff880068388540 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 123 bytes inside of 128-byte region [ffff880068388540, ffff8800683885c0) Makes it more explanatory and adds information about relative offset of the accessed address to the start of the object. Link: http://lkml.kernel.org/r/20170302134851.101218-7-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 0c06f1f86c87b1eb93420effe0c0457b30911360) Change-Id: I23928984dbe5a614b84c57e42b20ec13e7c739a4 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 53 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8efc69473a37..a1b1ef7a19f5 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -198,18 +198,49 @@ static struct page *addr_to_page(const void *addr) return NULL; } -static void describe_object(struct kmem_cache *cache, void *object) +static void describe_object_addr(struct kmem_cache *cache, void *object, + const void *addr) { - struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); + unsigned long access_addr = (unsigned long)addr; + unsigned long object_addr = (unsigned long)object; + const char *rel_type; + int rel_bytes; - pr_err("Object at %p, in cache %s size: %d\n", object, cache->name, - cache->object_size); + pr_err("The buggy address belongs to the object at %p\n" + " which belongs to the cache %s of size %d\n", + object, cache->name, cache->object_size); - if (!(cache->flags & SLAB_KASAN)) + if (!addr) return; - print_track(&alloc_info->alloc_track, "Allocated"); - print_track(&alloc_info->free_track, "Freed"); + if (access_addr < object_addr) { + rel_type = "to the left"; + rel_bytes = object_addr - access_addr; + } else if (access_addr >= object_addr + cache->object_size) { + rel_type = "to the right"; + rel_bytes = access_addr - (object_addr + cache->object_size); + } else { + rel_type = "inside"; + rel_bytes = access_addr - object_addr; + } + + pr_err("The buggy address is located %d bytes %s of\n" + " %d-byte region [%p, %p)\n", + rel_bytes, rel_type, cache->object_size, (void *)object_addr, + (void *)(object_addr + cache->object_size)); +} + +static void describe_object(struct kmem_cache *cache, void *object, + const void *addr) +{ + struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); + + if (cache->flags & SLAB_KASAN) { + print_track(&alloc_info->alloc_track, "Allocated"); + print_track(&alloc_info->free_track, "Freed"); + } + + describe_object_addr(cache, object, addr); } void kasan_report_double_free(struct kmem_cache *cache, void *object, @@ -221,13 +252,13 @@ void kasan_report_double_free(struct kmem_cache *cache, void *object, pr_err("BUG: Double free or freeing an invalid pointer\n"); pr_err("Unexpected shadow byte: 0x%hhX\n", shadow); dump_stack(); - describe_object(cache, object); + describe_object(cache, object, NULL); kasan_end_report(&flags); } static void print_address_description(struct kasan_access_info *info) { - const void *addr = info->access_addr; + void *addr = (void *)info->access_addr; struct page *page = addr_to_page(addr); if (page) @@ -237,9 +268,9 @@ static void print_address_description(struct kasan_access_info *info) if (page && PageSlab(page)) { struct kmem_cache *cache = page->slab_cache; - void *object = nearest_obj(cache, page, (void *)addr); + void *object = nearest_obj(cache, page, addr); - describe_object(cache, object); + describe_object(cache, object, addr); } if (kernel_or_module_addr(addr)) { -- GitLab From ba2d145e7ff724274a6f79c12e5c87a3b4d668c8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:44 -0700 Subject: [PATCH 0340/1398] UPSTREAM: kasan: print page description after stacks Moves page description after the stacks since it's less important. Link: http://lkml.kernel.org/r/20170302134851.101218-8-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 430a05f91d6051705a6ddbe207735ca62e39bb80) Change-Id: Ia20b7d6cf5602531072e9bd4fd478737f8623db1 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index a1b1ef7a19f5..b015acc80876 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -261,9 +261,6 @@ static void print_address_description(struct kasan_access_info *info) void *addr = (void *)info->access_addr; struct page *page = addr_to_page(addr); - if (page) - dump_page(page, "kasan: bad access detected"); - dump_stack(); if (page && PageSlab(page)) { @@ -273,9 +270,14 @@ static void print_address_description(struct kasan_access_info *info) describe_object(cache, object, addr); } - if (kernel_or_module_addr(addr)) { - if (!init_task_stack_addr(addr)) - pr_err("Address belongs to variable %pS\n", addr); + if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) { + pr_err("The buggy address belongs to the variable:\n"); + pr_err(" %pS\n", addr); + } + + if (page) { + pr_err("The buggy address belongs to the page:\n"); + dump_page(page, "kasan: bad access detected"); } } -- GitLab From bef4eb3c7083aa7b7009d36848aa2e58e125c7ae Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:47 -0700 Subject: [PATCH 0341/1398] UPSTREAM: kasan: improve double-free report format Changes double-free report header from BUG: Double free or freeing an invalid pointer Unexpected shadow byte: 0xFB to BUG: KASAN: double-free or invalid-free in kmalloc_oob_left+0xe5/0xef This makes a bug uniquely identifiable by the first report line. To account for removing of the unexpected shadow value, print shadow bytes at the end of the report as in reports for other kinds of bugs. Link: http://lkml.kernel.org/r/20170302134851.101218-9-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 5ab6d91ac998158d04f9563335aa5f1409eda971) Change-Id: I02dee92190216601d65866eb1c27f7381a22b0da Signed-off-by: Paul Lawrence --- mm/kasan/kasan.c | 3 ++- mm/kasan/kasan.h | 2 +- mm/kasan/report.c | 30 ++++++++++++++---------------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 59bb048f8282..7a6d1a1f9894 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -569,7 +569,8 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) { - kasan_report_double_free(cache, object, shadow_byte); + kasan_report_double_free(cache, object, + __builtin_return_address(1)); return true; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index dd2dea8eb077..1229298cce64 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -99,7 +99,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_double_free(struct kmem_cache *cache, void *object, - s8 shadow); + void *ip); #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index b015acc80876..7d3d9670e233 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -243,22 +243,8 @@ static void describe_object(struct kmem_cache *cache, void *object, describe_object_addr(cache, object, addr); } -void kasan_report_double_free(struct kmem_cache *cache, void *object, - s8 shadow) -{ - unsigned long flags; - - kasan_start_report(&flags); - pr_err("BUG: Double free or freeing an invalid pointer\n"); - pr_err("Unexpected shadow byte: 0x%hhX\n", shadow); - dump_stack(); - describe_object(cache, object, NULL); - kasan_end_report(&flags); -} - -static void print_address_description(struct kasan_access_info *info) +static void print_address_description(void *addr) { - void *addr = (void *)info->access_addr; struct page *page = addr_to_page(addr); dump_stack(); @@ -333,6 +319,18 @@ static void print_shadow_for_address(const void *addr) } } +void kasan_report_double_free(struct kmem_cache *cache, void *object, + void *ip) +{ + unsigned long flags; + + kasan_start_report(&flags); + pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip); + print_address_description(object); + print_shadow_for_address(object); + kasan_end_report(&flags); +} + static void kasan_report_error(struct kasan_access_info *info) { unsigned long flags; @@ -344,7 +342,7 @@ static void kasan_report_error(struct kasan_access_info *info) if (!addr_has_shadow(info)) { dump_stack(); } else { - print_address_description(info); + print_address_description((void *)info->access_addr); print_shadow_for_address(info->first_bad_addr); } -- GitLab From 37070c19d5e3e6b2675da65d687de15d0b6662c9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 3 May 2017 14:56:50 -0700 Subject: [PATCH 0342/1398] UPSTREAM: kasan: separate report parts by empty lines Makes the report easier to read. Link: http://lkml.kernel.org/r/20170302134851.101218-10-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from b19385993623c1a18a686b6b271cd24d5aa96f52) Change-Id: I8cc010a73e257cb08c7a2537d7aabd3c9c2c8116 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 7d3d9670e233..beee0e980e2d 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -237,7 +237,9 @@ static void describe_object(struct kmem_cache *cache, void *object, if (cache->flags & SLAB_KASAN) { print_track(&alloc_info->alloc_track, "Allocated"); + pr_err("\n"); print_track(&alloc_info->free_track, "Freed"); + pr_err("\n"); } describe_object_addr(cache, object, addr); @@ -248,6 +250,7 @@ static void print_address_description(void *addr) struct page *page = addr_to_page(addr); dump_stack(); + pr_err("\n"); if (page && PageSlab(page)) { struct kmem_cache *cache = page->slab_cache; @@ -326,7 +329,9 @@ void kasan_report_double_free(struct kmem_cache *cache, void *object, kasan_start_report(&flags); pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip); + pr_err("\n"); print_address_description(object); + pr_err("\n"); print_shadow_for_address(object); kasan_end_report(&flags); } @@ -338,11 +343,13 @@ static void kasan_report_error(struct kasan_access_info *info) kasan_start_report(&flags); print_error_description(info); + pr_err("\n"); if (!addr_has_shadow(info)) { dump_stack(); } else { print_address_description((void *)info->access_addr); + pr_err("\n"); print_shadow_for_address(info->first_bad_addr); } -- GitLab From 77b062e3083d190f1f9d9cc47330693dabe01f7e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Jul 2017 15:50:40 -0700 Subject: [PATCH 0343/1398] UPSTREAM: kasan: make get_wild_bug_type() static The helper function get_wild_bug_type() does not need to be in global scope, so make it static. Cleans up sparse warning: "symbol 'get_wild_bug_type' was not declared. Should it be static?" Link: http://lkml.kernel.org/r/20170622090049.10658-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 822d5ec25884b4e4436c819d03035fc0dd689309) Change-Id: If89c8ba8ee3bdb0db7ecb67e773bfbf3179514f3 Signed-off-by: Paul Lawrence --- mm/kasan/report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index beee0e980e2d..04bb1d3eb9ec 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -107,7 +107,7 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info) return bug_type; } -const char *get_wild_bug_type(struct kasan_access_info *info) +static const char *get_wild_bug_type(struct kasan_access_info *info) { const char *bug_type = "unknown-crash"; -- GitLab From d4952eba915fb3d6db2407e588bcac8db4ac2bb6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:38:21 +0900 Subject: [PATCH 0344/1398] BACKPORT: irq: Make the irqentry text section unconditional Generate irqentry and softirqentry text sections without any Kconfig dependencies. This will add extra sections, but there should be no performace impact. Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox Signed-off-by: Ingo Molnar Bug: 64145065 (cherry-picked from 229a71860547ec856b156179a9c6bef2de426f66) Change-Id: I8f10ad59f16d637834a9dcacebdf087a028e995d Signed-off-by: Paul Lawrence --- arch/arm/include/asm/traps.h | 7 ------- arch/arm64/include/asm/traps.h | 7 ------- arch/x86/entry/entry_64.S | 9 ++------- include/asm-generic/sections.h | 4 ++++ include/asm-generic/vmlinux.lds.h | 8 -------- include/linux/interrupt.h | 14 +------------- 6 files changed, 7 insertions(+), 42 deletions(-) diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index f555bb3664dc..683d9230984a 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -18,7 +18,6 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { extern char __irqentry_text_start[]; @@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr) return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif static inline int in_exception_text(unsigned long ptr) { diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 02e9035b0685..47a9066f7c86 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook); void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif static inline int in_exception_text(unsigned long ptr) { diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7b0e7ff4c58..072e9013e8e5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -682,13 +682,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) #endif /* Make sure APIC interrupt handlers end up in the irqentry section: */ -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) -# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" -# define POP_SECTION_IRQENTRY .popsection -#else -# define PUSH_SECTION_IRQENTRY -# define POP_SECTION_IRQENTRY -#endif +#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" +#define POP_SECTION_IRQENTRY .popsection .macro apicinterrupt num sym do_sym PUSH_SECTION_IRQENTRY diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 4df64a1fc09e..e02a3d968f12 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -27,6 +27,8 @@ * __kprobes_text_start, __kprobes_text_end * __entry_text_start, __entry_text_end * __ctors_start, __ctors_end + * __irqentry_text_start, __irqentry_text_end + * __softirqentry_text_start, __softirqentry_text_end */ extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; @@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __softirqentry_text_start[], __softirqentry_text_end[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dc81e5287ebf..d8b416e1a2f8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -499,25 +499,17 @@ *(.entry.text) \ VMLINUX_SYMBOL(__entry_text_end) = .; -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define IRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__irqentry_text_start) = .; \ *(.irqentry.text) \ VMLINUX_SYMBOL(__irqentry_text_end) = .; -#else -#define IRQENTRY_TEXT -#endif -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define SOFTIRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__softirqentry_text_start) = .; \ *(.softirqentry.text) \ VMLINUX_SYMBOL(__softirqentry_text_end) = .; -#else -#define SOFTIRQENTRY_TEXT -#endif /* Section used for early init (in .S files) */ #define HEAD_TEXT *(.head.text) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 72f0721f75e7..999b7c374645 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -699,7 +700,6 @@ extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ @@ -707,16 +707,4 @@ extern int arch_early_irq_init(void); #define __softirq_entry \ __attribute__((__section__(".softirqentry.text"))) -/* Limits of hardirq entrypoints */ -extern char __irqentry_text_start[]; -extern char __irqentry_text_end[]; -/* Limits of softirq entrypoints */ -extern char __softirqentry_text_start[]; -extern char __softirqentry_text_end[]; - -#else -#define __irq_entry -#define __softirq_entry -#endif - #endif -- GitLab From 7d9726f06e4760bde8d45993835122c6498d901a Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 14 Dec 2016 15:05:46 -0800 Subject: [PATCH 0345/1398] UPSTREAM: kcov: add more missing includes It is fragile that some definitions acquired via transitive dependencies, as shown in below: atomic_* () ENOMEM/EN* () EXPORT_SYMBOL () device_initcall () preempt_* () Include them to prevent possible issues. Link: http://lkml.kernel.org/r/1481163221-40170-1-git-send-email-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Suggested-by: Mark Rutland Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Mark Rutland Cc: James Morse Cc: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from db862358a4a96f52d3b0c713c703828f90d97de9) Change-Id: Ia529631d2072cc795c46ae0276e51592318cd40f Signed-off-by: Paul Lawrence --- kernel/kcov.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/kcov.c b/kernel/kcov.c index 3cbb0c879705..cc2fa35ca480 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -1,11 +1,16 @@ #define pr_fmt(fmt) "kcov: " fmt #define DISABLE_BRANCH_PROFILING +#include #include +#include +#include #include #include #include +#include #include +#include #include #include #include -- GitLab From 5fc77d000bb3b28a6d6b56f67ae633fa7c66a91f Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 19 Dec 2016 16:23:09 -0800 Subject: [PATCH 0346/1398] UPSTREAM: kcov: make kcov work properly with KASLR enabled Subtract KASLR offset from the kernel addresses reported by kcov. Tested on x86_64 and AArch64 (Hikey LeMaker). Link: http://lkml.kernel.org/r/1481417456-28826-3-git-send-email-alex.popov@linux.com Signed-off-by: Alexander Popov Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Rob Herring Cc: Kefeng Wang Cc: AKASHI Takahiro Cc: Jon Masters Cc: David Daney Cc: Ganapatrao Kulkarni Cc: Dmitry Vyukov Cc: Nicolai Stange Cc: James Morse Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Popov Cc: syzkaller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 4983f0ab7ffaad1e534b21975367429736475205) Change-Id: Ib19d7fb559f7db28314cd13a3e33e061d1dfdec9 Signed-off-by: Paul Lawrence --- kernel/kcov.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index cc2fa35ca480..85e5546cd791 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * kcov descriptor (one per opened debugfs file). @@ -73,6 +74,11 @@ void notrace __sanitizer_cov_trace_pc(void) if (mode == KCOV_MODE_TRACE) { unsigned long *area; unsigned long pos; + unsigned long ip = _RET_IP_; + +#ifdef CONFIG_RANDOMIZE_BASE + ip -= kaslr_offset(); +#endif /* * There is some code that runs in interrupts but for which @@ -86,7 +92,7 @@ void notrace __sanitizer_cov_trace_pc(void) /* The first word is number of subsequent PCs. */ pos = READ_ONCE(area[0]) + 1; if (likely(pos < t->kcov_size)) { - area[pos] = _RET_IP_; + area[pos] = ip; WRITE_ONCE(area[0], pos); } } -- GitLab From 99463c8134ee225d065bf089920f66da8a176b19 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 8 May 2017 15:56:48 -0700 Subject: [PATCH 0347/1398] UPSTREAM: kcov: simplify interrupt check in_interrupt() semantics are confusing and wrong for most users as it also returns true when bh is disabled. Thus we open coded a proper check for interrupts in __sanitizer_cov_trace_pc() with a lengthy explanatory comment. Use the new in_task() predicate instead. Link: http://lkml.kernel.org/r/20170321091026.139655-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Kefeng Wang Cc: James Morse Cc: Alexander Popov Cc: Andrey Konovalov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from f61e869d519c0c11a8d80a503cfdfb4897df855a) Change-Id: Ice260535314238c8f82ddc578ecaeea6177d28fc Signed-off-by: Paul Lawrence --- kernel/kcov.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 85e5546cd791..cd771993f96f 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -60,15 +60,8 @@ void notrace __sanitizer_cov_trace_pc(void) /* * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. - * The checks for whether we are in an interrupt are open-coded, because - * 1. We can't use in_interrupt() here, since it also returns true - * when we are inside local_bh_disable() section. - * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()), - * since that leads to slower generated code (three separate tests, - * one for each of the flags). */ - if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET - | NMI_MASK))) + if (!t || !in_task()) return; mode = READ_ONCE(t->kcov_mode); if (mode == KCOV_MODE_TRACE) { -- GitLab From 87a77ded9b98c21eccd91be334aced5fe8732228 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 8 Sep 2017 16:17:35 -0700 Subject: [PATCH 0348/1398] UPSTREAM: kcov: support compat processes Support compat processes in KCOV by providing compat_ioctl callback. Compat mode uses the same ioctl callback: we have 2 commands that do not use the argument and 1 that already checks that the arg does not overflow INT_MAX. This allows to use KCOV-guided fuzzing in compat processes. Link: http://lkml.kernel.org/r/20170823100553.55812-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 7483e5d420d9d5aa1732c5efb0da59e095a8b24e) Change-Id:I74b62f01941091649ce6e88b3130e4ca4274a8de Signed-off-by: Paul Lawrence --- kernel/kcov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kcov.c b/kernel/kcov.c index cd771993f96f..3f693a0f6f3e 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -270,6 +270,7 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) static const struct file_operations kcov_fops = { .open = kcov_open, .unlocked_ioctl = kcov_ioctl, + .compat_ioctl = kcov_ioctl, .mmap = kcov_mmap, .release = kcov_close, }; -- GitLab From 9dd90d685197874bfdb1493d1308a5c4ca99a4d6 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 17 Nov 2017 15:30:42 -0800 Subject: [PATCH 0349/1398] UPSTREAM: kcov: remove pointless current != NULL check __sanitizer_cov_trace_pc() is a hot code, so it's worth to remove pointless '!current' check. Current is never NULL. Link: http://lkml.kernel.org/r/20170929162221.32500-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Acked-by: Dmitry Vyukov Acked-by: Mark Rutland Cc: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from fcf4edac049a8bca41658970292e2dfdbc9d5f62) Change-Id: Ia76e8c6cc0dc3fb796d8e8b92430fcf659b52eee Signed-off-by: Paul Lawrence --- kernel/kcov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 3f693a0f6f3e..d9a4ee1f3423 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -61,7 +61,7 @@ void notrace __sanitizer_cov_trace_pc(void) * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. */ - if (!t || !in_task()) + if (!in_task()) return; mode = READ_ONCE(t->kcov_mode); if (mode == KCOV_MODE_TRACE) { -- GitLab From 94126098662f340edc7e47d06dd4b1b52fbcdd0d Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:46 -0800 Subject: [PATCH 0350/1398] UPSTREAM: kcov: support comparison operands collection Enables kcov to collect comparison operands from instrumented code. This is done by using Clang's -fsanitize=trace-cmp instrumentation (currently not available for GCC). The comparison operands help a lot in fuzz testing. E.g. they are used in Syzkaller to cover the interiors of conditional statements with way less attempts and thus make previously unreachable code reachable. To allow separate collection of coverage and comparison operands two different work modes are implemented. Mode selection is now done via a KCOV_ENABLE ioctl call with corresponding argument value. Link: http://lkml.kernel.org/r/20171011095459.70721-1-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from ded97d2c2b2c5f1dcced0bc57133f7753b037dfc) Change-Id: Iaba700a3f4786048be14a5e764ccabceae114eb7 Signed-off-by: Paul Lawrence --- include/linux/kcov.h | 12 ++- include/uapi/linux/kcov.h | 24 +++++ kernel/kcov.c | 214 +++++++++++++++++++++++++++++++------- 3 files changed, 211 insertions(+), 39 deletions(-) diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 2883ac98c280..87e2a44f1bab 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -7,19 +7,23 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + #else static inline void kcov_task_init(struct task_struct *t) {} diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 574e22ec640d..33b826b9946e 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -7,4 +7,28 @@ #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +enum { + /* + * Tracing coverage collection mode. + * Covered PCs are collected in a per-task buffer. + * In new KCOV version the mode is chosen by calling + * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument + * was supposed to be 0 in such a call. So, for reasons of backward + * compatibility, we have chosen the value KCOV_TRACE_PC to be 0. + */ + KCOV_TRACE_PC = 0, + /* Collecting comparison operands mode. */ + KCOV_TRACE_CMP = 1, +}; + +/* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ +#define KCOV_CMP_CONST (1 << 0) +#define KCOV_CMP_SIZE(n) ((n) << 1) +#define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/kernel/kcov.c b/kernel/kcov.c index d9a4ee1f3423..461c55e09c69 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -21,13 +21,21 @@ #include #include +/* Number of 64-bit words written per one comparison: */ +#define KCOV_WORDS_PER_CMP 4 + /* * kcov descriptor (one per opened debugfs file). * State transitions of the descriptor: * - initial state after open() * - then there must be a single ioctl(KCOV_INIT_TRACE) call * - then, mmap() call (several calls are allowed but not useful) - * - then, repeated enable/disable for a task (only one task a time allowed) + * - then, ioctl(KCOV_ENABLE, arg), where arg is + * KCOV_TRACE_PC - to trace only the PCs + * or + * KCOV_TRACE_CMP - to trace only the comparison operands + * - then, ioctl(KCOV_DISABLE) to disable the task. + * Enabling/disabling ioctls can be repeated (only one task a time allowed). */ struct kcov { /* @@ -47,51 +55,176 @@ struct kcov { struct task_struct *t; }; -/* - * Entry point from instrumented code. - * This is called once per basic-block/edge. - */ -void notrace __sanitizer_cov_trace_pc(void) +static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { - struct task_struct *t; enum kcov_mode mode; - t = current; /* * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. */ if (!in_task()) - return; + return false; mode = READ_ONCE(t->kcov_mode); - if (mode == KCOV_MODE_TRACE) { - unsigned long *area; - unsigned long pos; - unsigned long ip = _RET_IP_; + /* + * There is some code that runs in interrupts but for which + * in_interrupt() returns false (e.g. preempt_schedule_irq()). + * READ_ONCE()/barrier() effectively provides load-acquire wrt + * interrupts, there are paired barrier()/WRITE_ONCE() in + * kcov_ioctl_locked(). + */ + barrier(); + return mode == needed_mode; +} +static unsigned long canonicalize_ip(unsigned long ip) +{ #ifdef CONFIG_RANDOMIZE_BASE - ip -= kaslr_offset(); + ip -= kaslr_offset(); #endif + return ip; +} - /* - * There is some code that runs in interrupts but for which - * in_interrupt() returns false (e.g. preempt_schedule_irq()). - * READ_ONCE()/barrier() effectively provides load-acquire wrt - * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). - */ - barrier(); - area = t->kcov_area; - /* The first word is number of subsequent PCs. */ - pos = READ_ONCE(area[0]) + 1; - if (likely(pos < t->kcov_size)) { - area[pos] = ip; - WRITE_ONCE(area[0], pos); - } +/* + * Entry point from instrumented code. + * This is called once per basic-block/edge. + */ +void notrace __sanitizer_cov_trace_pc(void) +{ + struct task_struct *t; + unsigned long *area; + unsigned long ip = canonicalize_ip(_RET_IP_); + unsigned long pos; + + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) + return; + + area = t->kcov_area; + /* The first 64-bit word is the number of subsequent PCs. */ + pos = READ_ONCE(area[0]) + 1; + if (likely(pos < t->kcov_size)) { + area[pos] = ip; + WRITE_ONCE(area[0], pos); } } EXPORT_SYMBOL(__sanitizer_cov_trace_pc); +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS +static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) +{ + struct task_struct *t; + u64 *area; + u64 count, start_index, end_pos, max_pos; + + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t)) + return; + + ip = canonicalize_ip(ip); + + /* + * We write all comparison arguments and types as u64. + * The buffer was allocated for t->kcov_size unsigned longs. + */ + area = (u64 *)t->kcov_area; + max_pos = t->kcov_size * sizeof(unsigned long); + + count = READ_ONCE(area[0]); + + /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */ + start_index = 1 + count * KCOV_WORDS_PER_CMP; + end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64); + if (likely(end_pos <= max_pos)) { + area[start_index] = type; + area[start_index + 1] = arg1; + area[start_index + 2] = arg2; + area[start_index + 3] = ip; + WRITE_ONCE(area[0], count + 1); + } +} + +void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1); + +void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); + +void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4); + +void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8); + +void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1); + +void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); + +void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4); + +void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8); + +void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) +{ + u64 i; + u64 count = cases[0]; + u64 size = cases[1]; + u64 type = KCOV_CMP_CONST; + + switch (size) { + case 8: + type |= KCOV_CMP_SIZE(0); + break; + case 16: + type |= KCOV_CMP_SIZE(1); + break; + case 32: + type |= KCOV_CMP_SIZE(2); + break; + case 64: + type |= KCOV_CMP_SIZE(3); + break; + default: + return; + } + for (i = 0; i < count; i++) + write_comp_data(type, cases[i + 2], val, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_switch); +#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ + static void kcov_get(struct kcov *kcov) { atomic_inc(&kcov->refcount); @@ -128,6 +261,7 @@ void kcov_task_exit(struct task_struct *t) /* Just to not leave dangling references behind. */ kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -146,7 +280,7 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) spin_lock(&kcov->lock); size = kcov->size * sizeof(unsigned long); - if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 || + if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; @@ -175,6 +309,7 @@ static int kcov_open(struct inode *inode, struct file *filep) kcov = kzalloc(sizeof(*kcov), GFP_KERNEL); if (!kcov) return -ENOMEM; + kcov->mode = KCOV_MODE_DISABLED; atomic_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -210,7 +345,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (size < 2 || size > INT_MAX / sizeof(unsigned long)) return -EINVAL; kcov->size = size; - kcov->mode = KCOV_MODE_TRACE; + kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: /* @@ -220,17 +355,25 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, * at task exit or voluntary by KCOV_DISABLE. After that it can * be enabled for another task. */ - unused = arg; - if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || - kcov->area == NULL) + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) return -EINVAL; if (kcov->t != NULL) return -EBUSY; + if (arg == KCOV_TRACE_PC) + kcov->mode = KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + kcov->mode = KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; t = current; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; - /* See comment in __sanitizer_cov_trace_pc(). */ + /* See comment in check_kcov_mode(). */ barrier(); WRITE_ONCE(t->kcov_mode, kcov->mode); t->kcov = kcov; @@ -248,6 +391,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; kcov_put(kcov); return 0; default: -- GitLab From 9b3b5d1f251566ecf5519ad19518d636645dee9c Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 8 Dec 2017 02:01:35 +0000 Subject: [PATCH 0351/1398] UPSTREAM: kcov: fix comparison callback signature Fix a silly copy-paste bug. We truncated u32 args to u16. Link: http://lkml.kernel.org/r/20171207101134.107168-1-dvyukov@google.com Fixes: ded97d2c2b2c ("kcov: support comparison operands collection") Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: Alexander Potapenko Cc: Vegard Nossum Cc: Quentin Casasnovas Signed-off-by: Andrew Morton Bug: 64145065 (cherry-picked from 8c0431ec452de79ef3fe998c1fbb1e3d3ac13ddd) Change-Id: Ic3872c33d03a456640dd6fdcce3b0795765dc1c0 Signed-off-by: Paul Lawrence --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 461c55e09c69..f4988f5a72d2 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -156,7 +156,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); -void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) +void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) { write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); } @@ -182,7 +182,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); -void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) +void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) { write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, _RET_IP_); -- GitLab From c14c7b37e933db5f748d544b4a6c8c929ffd9565 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 19 Dec 2016 16:23:06 -0800 Subject: [PATCH 0352/1398] UPSTREAM: arm64: setup: introduce kaslr_offset() Introduce kaslr_offset() similar to x86_64 to fix kcov. [ Updated by Will Deacon ] Link: http://lkml.kernel.org/r/1481417456-28826-2-git-send-email-alex.popov@linux.com Signed-off-by: Alexander Popov Cc: Catalin Marinas Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Rob Herring Cc: Kefeng Wang Cc: AKASHI Takahiro Cc: Jon Masters Cc: David Daney Cc: Ganapatrao Kulkarni Cc: Dmitry Vyukov Cc: Nicolai Stange Cc: James Morse Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Popov Cc: syzkaller Signed-off-by: Andrew Morton Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 7ede8665f27cde7da69e8b2fbeaa1ed0664879c5) Change-Id: I9f013afed7f60d2280bb36ce3ba14d8c5515ddb1 Signed-off-by: Paul Lawrence --- arch/arm64/include/asm/memory.h | 5 +++++ arch/arm64/kernel/setup.c | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f43680fb2292..30cd69729aab 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -165,6 +165,11 @@ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; +static inline unsigned long kaslr_offset(void) +{ + return kimage_vaddr - KIMAGE_VADDR; +} + /* * Allow all memory at the discovery stage. We will clip it later. */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index fe014d62e2c6..6a9dc51da64c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -339,11 +339,11 @@ subsys_initcall(topology_init); static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + const unsigned long offset = kaslr_offset(); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { - pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", - kaslr_offset, KIMAGE_VADDR); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) { + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", + offset, KIMAGE_VADDR); } else { pr_emerg("Kernel Offset: disabled\n"); } -- GitLab From 1fb73eae9624e2d3583581cd80bf408cc9b3916d Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 17 Oct 2017 22:53:08 +0200 Subject: [PATCH 0353/1398] mfd: fsl-imx25: Clean up irq settings during removal commit 18f77393796848e68909e65d692c1d1436f06e06 upstream. When fsl-imx25-tsadc is compiled as a module, loading, unloading and reloading the module will lead to a crash. Unable to handle kernel paging request at virtual address bf005430 [] (irq_find_matching_fwspec) from [] (of_irq_get+0x58/0x74) [] (of_irq_get) from [] (platform_get_irq+0x48/0xc8) [] (platform_get_irq) from [] (mx25_tsadc_probe+0x220/0x2f4 [fsl_imx25_tsadc]) irq_find_matching_fwspec() loops over all registered irq domains. The irq domain is still registered from last time the module was loaded but the pointer to its operations is invalid after the module was unloaded. Add a removal function which clears the irq handler and removes the irq domain. With this cleanup in place, it's possible to unload and reload the module. Signed-off-by: Martin Kaiser Reviewed-by: Lucas Stach Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/fsl-imx25-tsadc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c index 77b2675cf8f5..92e176009ffe 100644 --- a/drivers/mfd/fsl-imx25-tsadc.c +++ b/drivers/mfd/fsl-imx25-tsadc.c @@ -183,6 +183,19 @@ static int mx25_tsadc_probe(struct platform_device *pdev) return 0; } +static int mx25_tsadc_remove(struct platform_device *pdev) +{ + struct mx25_tsadc *tsadc = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + + if (irq) { + irq_set_chained_handler_and_data(irq, NULL, NULL); + irq_domain_remove(tsadc->domain); + } + + return 0; +} + static const struct of_device_id mx25_tsadc_ids[] = { { .compatible = "fsl,imx25-tsadc" }, { /* Sentinel */ } @@ -194,6 +207,7 @@ static struct platform_driver mx25_tsadc_driver = { .of_match_table = of_match_ptr(mx25_tsadc_ids), }, .probe = mx25_tsadc_probe, + .remove = mx25_tsadc_remove, }; module_platform_driver(mx25_tsadc_driver); -- GitLab From cd9b59861f9cad2f1b36eace975060cfc3bc46e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 26 Nov 2017 23:16:49 -0800 Subject: [PATCH 0354/1398] crypto: rsa - fix buffer overread when stripping leading zeroes commit d2890c3778b164fde587bc16583f3a1c87233ec5 upstream. In rsa_get_n(), if the buffer contained all 0's and "FIPS mode" is enabled, we would read one byte past the end of the buffer while scanning the leading zeroes. Fix it by checking 'n_sz' before '!*ptr'. This bug was reachable by adding a specially crafted key of type "asymmetric" (requires CONFIG_RSA and CONFIG_X509_CERTIFICATE_PARSER). KASAN report: BUG: KASAN: slab-out-of-bounds in rsa_get_n+0x19e/0x1d0 crypto/rsa_helper.c:33 Read of size 1 at addr ffff88003501a708 by task keyctl/196 CPU: 1 PID: 196 Comm: keyctl Not tainted 4.14.0-09238-g1d3b78bbc6e9 #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: rsa_get_n+0x19e/0x1d0 crypto/rsa_helper.c:33 asn1_ber_decoder+0x82a/0x1fd0 lib/asn1_decoder.c:328 rsa_set_pub_key+0xd3/0x320 crypto/rsa.c:278 crypto_akcipher_set_pub_key ./include/crypto/akcipher.h:364 [inline] pkcs1pad_set_pub_key+0xae/0x200 crypto/rsa-pkcs1pad.c:117 crypto_akcipher_set_pub_key ./include/crypto/akcipher.h:364 [inline] public_key_verify_signature+0x270/0x9d0 crypto/asymmetric_keys/public_key.c:106 x509_check_for_self_signed+0x2ea/0x480 crypto/asymmetric_keys/x509_public_key.c:141 x509_cert_parse+0x46a/0x620 crypto/asymmetric_keys/x509_cert_parser.c:129 x509_key_preparse+0x61/0x750 crypto/asymmetric_keys/x509_public_key.c:174 asymmetric_key_preparse+0xa4/0x150 crypto/asymmetric_keys/asymmetric_type.c:388 key_create_or_update+0x4d4/0x10a0 security/keys/key.c:850 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xe8/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Allocated by task 196: __do_kmalloc mm/slab.c:3711 [inline] __kmalloc_track_caller+0x118/0x2e0 mm/slab.c:3726 kmemdup+0x17/0x40 mm/util.c:118 kmemdup ./include/linux/string.h:414 [inline] x509_cert_parse+0x2cb/0x620 crypto/asymmetric_keys/x509_cert_parser.c:106 x509_key_preparse+0x61/0x750 crypto/asymmetric_keys/x509_public_key.c:174 asymmetric_key_preparse+0xa4/0x150 crypto/asymmetric_keys/asymmetric_type.c:388 key_create_or_update+0x4d4/0x10a0 security/keys/key.c:850 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xe8/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 5a7de97309f5 ("crypto: rsa - return raw integers for the ASN.1 parser") Cc: Tudor Ambarus Signed-off-by: Eric Biggers Reviewed-by: James Morris Reviewed-by: David Howells Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/rsa_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c index 0b66dc824606..cad395d70d78 100644 --- a/crypto/rsa_helper.c +++ b/crypto/rsa_helper.c @@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag, return -EINVAL; if (fips_enabled) { - while (!*ptr && n_sz) { + while (n_sz && !*ptr) { ptr++; n_sz--; } -- GitLab From 43259d07fceb8cc1f5ba7e8003ae19023e0620f5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 28 Nov 2017 18:01:38 -0800 Subject: [PATCH 0355/1398] crypto: hmac - require that the underlying hash algorithm is unkeyed commit af3ff8045bbf3e32f1a448542e73abb4c8ceb6f1 upstream. Because the HMAC template didn't check that its underlying hash algorithm is unkeyed, trying to use "hmac(hmac(sha3-512-generic))" through AF_ALG or through KEYCTL_DH_COMPUTE resulted in the inner HMAC being used without having been keyed, resulting in sha3_update() being called without sha3_init(), causing a stack buffer overflow. This is a very old bug, but it seems to have only started causing real problems when SHA-3 support was added (requires CONFIG_CRYPTO_SHA3) because the innermost hash's state is ->import()ed from a zeroed buffer, and it just so happens that other hash algorithms are fine with that, but SHA-3 is not. However, there could be arch or hardware-dependent hash algorithms also affected; I couldn't test everything. Fix the bug by introducing a function crypto_shash_alg_has_setkey() which tests whether a shash algorithm is keyed. Then update the HMAC template to require that its underlying hash algorithm is unkeyed. Here is a reproducer: #include #include int main() { int algfd; struct sockaddr_alg addr = { .salg_type = "hash", .salg_name = "hmac(hmac(sha3-512-generic))", }; char key[4096] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (const struct sockaddr *)&addr, sizeof(addr)); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)); } Here was the KASAN report from syzbot: BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:341 [inline] BUG: KASAN: stack-out-of-bounds in sha3_update+0xdf/0x2e0 crypto/sha3_generic.c:161 Write of size 4096 at addr ffff8801cca07c40 by task syzkaller076574/3044 CPU: 1 PID: 3044 Comm: syzkaller076574 Not tainted 4.14.0-mm1+ #25 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x137/0x190 mm/kasan/kasan.c:267 memcpy+0x37/0x50 mm/kasan/kasan.c:303 memcpy include/linux/string.h:341 [inline] sha3_update+0xdf/0x2e0 crypto/sha3_generic.c:161 crypto_shash_update+0xcb/0x220 crypto/shash.c:109 shash_finup_unaligned+0x2a/0x60 crypto/shash.c:151 crypto_shash_finup+0xc4/0x120 crypto/shash.c:165 hmac_finup+0x182/0x330 crypto/hmac.c:152 crypto_shash_finup+0xc4/0x120 crypto/shash.c:165 shash_digest_unaligned+0x9e/0xd0 crypto/shash.c:172 crypto_shash_digest+0xc4/0x120 crypto/shash.c:186 hmac_setkey+0x36a/0x690 crypto/hmac.c:66 crypto_shash_setkey+0xad/0x190 crypto/shash.c:64 shash_async_setkey+0x47/0x60 crypto/shash.c:207 crypto_ahash_setkey+0xaf/0x180 crypto/ahash.c:200 hash_setkey+0x40/0x90 crypto/algif_hash.c:446 alg_setkey crypto/af_alg.c:221 [inline] alg_setsockopt+0x2a1/0x350 crypto/af_alg.c:254 SYSC_setsockopt net/socket.c:1851 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1830 entry_SYSCALL_64_fastpath+0x1f/0x96 Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/hmac.c | 6 +++++- crypto/shash.c | 5 +++-- include/crypto/internal/hash.h | 8 ++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/crypto/hmac.c b/crypto/hmac.c index 72e38c098bb3..ba07fb6221ae 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -194,11 +194,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb) salg = shash_attr_alg(tb[1], 0, 0); if (IS_ERR(salg)) return PTR_ERR(salg); + alg = &salg->base; + /* The underlying hash algorithm must be unkeyed */ err = -EINVAL; + if (crypto_shash_alg_has_setkey(salg)) + goto out_put_alg; + ds = salg->digestsize; ss = salg->statesize; - alg = &salg->base; if (ds > alg->cra_blocksize || ss < alg->cra_blocksize) goto out_put_alg; diff --git a/crypto/shash.c b/crypto/shash.c index 4d8a671d1614..9bd5044d467b 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -24,11 +24,12 @@ static const struct crypto_type crypto_shash_type; -static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } +EXPORT_SYMBOL_GPL(shash_no_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f6d9af3efa45..cac57358f7af 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -80,6 +80,14 @@ int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); void ahash_free_instance(struct crypto_instance *inst); +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen); + +static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) +{ + return alg->setkey != shash_no_setkey; +} + int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, struct hash_alg_common *alg, struct crypto_instance *inst); -- GitLab From c32e053a11f231376f0899ef906fd43f8fc8dbd0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 28 Nov 2017 20:56:59 -0800 Subject: [PATCH 0356/1398] crypto: salsa20 - fix blkcipher_walk API usage commit ecaaab5649781c5a0effdaf298a925063020500e upstream. When asked to encrypt or decrypt 0 bytes, both the generic and x86 implementations of Salsa20 crash in blkcipher_walk_done(), either when doing 'kfree(walk->buffer)' or 'free_page((unsigned long)walk->page)', because walk->buffer and walk->page have not been initialized. The bug is that Salsa20 is calling blkcipher_walk_done() even when nothing is in 'walk.nbytes'. But blkcipher_walk_done() is only meant to be called when a nonzero number of bytes have been provided. The broken code is part of an optimization that tries to make only one call to salsa20_encrypt_bytes() to process inputs that are not evenly divisible by 64 bytes. To fix the bug, just remove this "optimization" and use the blkcipher_walk API the same way all the other users do. Reproducer: #include #include #include int main() { int algfd, reqfd; struct sockaddr_alg addr = { .salg_type = "skcipher", .salg_name = "salsa20", }; char key[16] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (void *)&addr, sizeof(addr)); reqfd = accept(algfd, 0, 0); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)); read(reqfd, key, sizeof(key)); } Reported-by: syzbot Fixes: eb6f13eb9f81 ("[CRYPTO] salsa20_generic: Fix multi-page processing") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/salsa20_glue.c | 7 ------- crypto/salsa20_generic.c | 7 ------- 2 files changed, 14 deletions(-) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 399a29d067d6..cb91a64a99e7 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.src.virt.addr, walk.dst.virt.addr, diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index f550b5d94630..d7da0eea5622 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, - walk.src.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, walk.src.virt.addr, -- GitLab From d1175423ce67bd1903c4f876ba6774e3a48abdc1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 14 Dec 2017 15:32:38 -0800 Subject: [PATCH 0357/1398] autofs: fix careless error in recent commit commit 302ec300ef8a545a7fc7f667e5fd743b091c2eeb upstream. Commit ecc0c469f277 ("autofs: don't fail mount for transient error") was meant to replace an 'if' with a 'switch', but instead added the 'switch' leaving the case in place. Link: http://lkml.kernel.org/r/87zi6wstmw.fsf@notabene.neil.brown.name Fixes: ecc0c469f277 ("autofs: don't fail mount for transient error") Reported-by: Ben Hutchings Signed-off-by: NeilBrown Cc: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/autofs4/waitq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 4c71dba90120..0ea31a53fd5b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -176,7 +176,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); - if (autofs4_write(sbi, pipe, &pkt, pktsz)) switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { case 0: break; -- GitLab From d760f903419509d933b27d387854eb1e35f88cdf Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 30 Nov 2017 11:39:43 +0800 Subject: [PATCH 0358/1398] tracing: Allocate mask_str buffer dynamically commit 90e406f96f630c07d631a021fd4af10aac913e77 upstream. The default NR_CPUS can be very large, but actual possible nr_cpu_ids usually is very small. For my x86 distribution, the NR_CPUS is 8192 and nr_cpu_ids is 4. About 2 pages are wasted. Most machines don't have so many CPUs, so define a array with NR_CPUS just wastes memory. So let's allocate the buffer dynamically when need. With this change, the mutext tracing_cpumask_update_lock also can be removed now, which was used to protect mask_str. Link: http://lkml.kernel.org/r/1512013183-19107-1-git-send-email-changbin.du@intel.com Fixes: 36dfe9252bd4c ("ftrace: make use of tracing_cpumask") Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1e50cc0d7b0..4214cd960b8e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3727,37 +3727,30 @@ static const struct file_operations show_traces_fops = { .llseek = seq_lseek, }; -/* - * The tracer itself will not take this lock, but still we want - * to provide a consistent cpumask to user-space: - */ -static DEFINE_MUTEX(tracing_cpumask_update_lock); - -/* - * Temporary storage for the character representation of the - * CPU bitmask (and one more byte for the newline): - */ -static char mask_str[NR_CPUS + 1]; - static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; + char *mask_str; int len; - mutex_lock(&tracing_cpumask_update_lock); + len = snprintf(NULL, 0, "%*pb\n", + cpumask_pr_args(tr->tracing_cpumask)) + 1; + mask_str = kmalloc(len, GFP_KERNEL); + if (!mask_str) + return -ENOMEM; - len = snprintf(mask_str, count, "%*pb\n", + len = snprintf(mask_str, len, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)); if (len >= count) { count = -EINVAL; goto out_err; } - count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); + count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); out_err: - mutex_unlock(&tracing_cpumask_update_lock); + kfree(mask_str); return count; } @@ -3777,8 +3770,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - mutex_lock(&tracing_cpumask_update_lock); - local_irq_disable(); arch_spin_lock(&tr->max_lock); for_each_tracing_cpu(cpu) { @@ -3801,8 +3792,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); - - mutex_unlock(&tracing_cpumask_update_lock); free_cpumask_var(tracing_cpumask_new); return count; -- GitLab From 0d29ae4f5033453ea86d60a736bed58fa4c15007 Mon Sep 17 00:00:00 2001 From: David Kozub Date: Tue, 5 Dec 2017 22:40:04 +0100 Subject: [PATCH 0359/1398] USB: uas and storage: Add US_FL_BROKEN_FUA for another JMicron JMS567 ID commit 62354454625741f0569c2cbe45b2d192f8fd258e upstream. There is another JMS567-based USB3 UAS enclosure (152d:0578) that fails with the following error: [sda] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [sda] tag#0 Sense Key : Illegal Request [current] [sda] tag#0 Add. Sense: Invalid field in cdb The issue occurs both with UAS (occasionally) and mass storage (immediately after mounting a FS on a disk in the enclosure). Enabling US_FL_BROKEN_FUA quirk solves this issue. This patch adds an UNUSUAL_DEV with US_FL_BROKEN_FUA for the enclosure for both UAS and mass storage. Signed-off-by: David Kozub Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ drivers/usb/storage/unusual_uas.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 2572fd5cd2bb..b605115eb47a 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2113,6 +2113,13 @@ UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported by David Kozub */ +UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, + "JMicron", + "JMS567", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA), + /* * Reported by Alexandre Oliva * JMicron responds to USN and several other SCSI ioctls with a diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index cde115359793..9f356f7cf7d5 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -142,6 +142,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES), +/* Reported-by: David Kozub */ +UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, + "JMicron", + "JMS567", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA), + /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, "VIA", -- GitLab From 99542e468b76ae180675566692e0528c4c712661 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 12 Dec 2017 14:25:13 -0500 Subject: [PATCH 0360/1398] USB: core: prevent malicious bNumInterfaces overflow commit 48a4ff1c7bb5a32d2e396b03132d20d552c0eca7 upstream. A malicious USB device with crafted descriptors can cause the kernel to access unallocated memory by setting the bNumInterfaces value too high in a configuration descriptor. Although the value is adjusted during parsing, this adjustment is skipped in one of the error return paths. This patch prevents the problem by setting bNumInterfaces to 0 initially. The existing code already sets it to the proper value after parsing is complete. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 5ebe04d3598b..ba9b29bc441f 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -550,6 +550,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, unsigned iad_num = 0; memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); + nintf = nintf_orig = config->desc.bNumInterfaces; + config->desc.bNumInterfaces = 0; // Adjusted later + if (config->desc.bDescriptorType != USB_DT_CONFIG || config->desc.bLength < USB_DT_CONFIG_SIZE || config->desc.bLength > size) { @@ -563,7 +566,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, buffer += config->desc.bLength; size -= config->desc.bLength; - nintf = nintf_orig = config->desc.bNumInterfaces; if (nintf > USB_MAXINTERFACES) { dev_warn(ddev, "config %d has too many interfaces: %d, " "using maximum allowed: %d\n", -- GitLab From 20e825cdf7a1627f92371d45a5322ccecdebcb3b Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:47 -0700 Subject: [PATCH 0361/1398] usbip: fix stub_rx: get_pipe() to validate endpoint number commit 635f545a7e8be7596b9b2b6a43cab6bbd5a88e43 upstream. get_pipe() routine doesn't validate the input endpoint number and uses to reference ep_in and ep_out arrays. Invalid endpoint number can trigger BUG(). Range check the epnum and returning error instead of calling BUG(). Change caller stub_recv_cmd_submit() to handle the get_pipe() error return. Reported-by: Secunia Research Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 191b176ffedf..4f03c611c16d 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -342,15 +342,15 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) struct usb_host_endpoint *ep; struct usb_endpoint_descriptor *epd = NULL; + if (epnum < 0 || epnum > 15) + goto err_ret; + if (dir == USBIP_DIR_IN) ep = udev->ep_in[epnum & 0x7f]; else ep = udev->ep_out[epnum & 0x7f]; - if (!ep) { - dev_err(&sdev->udev->dev, "no such endpoint?, %d\n", - epnum); - BUG(); - } + if (!ep) + goto err_ret; epd = &ep->desc; if (usb_endpoint_xfer_control(epd)) { @@ -381,9 +381,10 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) return usb_rcvisocpipe(udev, epnum); } +err_ret: /* NOT REACHED */ - dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum); - return 0; + dev_err(&sdev->udev->dev, "get pipe() invalid epnum %d\n", epnum); + return -1; } static void masking_bogus_flags(struct urb *urb) @@ -449,6 +450,9 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, struct usb_device *udev = sdev->udev; int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction); + if (pipe == -1) + return; + priv = stub_priv_alloc(sdev, pdu); if (!priv) return; -- GitLab From b6dbace92ed744a80af75036e93615324cd27aec Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 26 Sep 2016 10:51:18 +0300 Subject: [PATCH 0362/1398] usb: add helper to extract bits 12:11 of wMaxPacketSize commit 541b6fe63023f3059cf85d47ff2767a3e42a8e44 upstream. According to USB Specification 2.0 table 9-4, wMaxPacketSize is a bitfield. Endpoint's maxpacket is laid out in bits 10:0. For high-speed, high-bandwidth isochronous endpoints, bits 12:11 contain a multiplier to tell us how many transactions we want to try per uframe. This means that if we want an isochronous endpoint to issue 3 transfers of 1024 bytes per uframe, wMaxPacketSize should contain the value: 1024 | (2 << 11) or 5120 (0x1400). In order to make Host and Peripheral controller drivers' life easier, we're adding a helper which returns bits 12:11. Note that no care is made WRT to checking endpoint type and gadget's speed. That's left for drivers to handle. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/ch9.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index ab1dadba9923..33c603dd7cd3 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -423,6 +423,11 @@ struct usb_endpoint_descriptor { #define USB_ENDPOINT_XFER_INT 3 #define USB_ENDPOINT_MAX_ADJUSTABLE 0x80 +#define USB_EP_MAXP_MULT_SHIFT 11 +#define USB_EP_MAXP_MULT_MASK (3 << USB_EP_MAXP_MULT_SHIFT) +#define USB_EP_MAXP_MULT(m) \ + (((m) & USB_EP_MAXP_MULT_MASK) >> USB_EP_MAXP_MULT_SHIFT) + /* The USB 3.0 spec redefines bits 5:4 of bmAttributes as interrupt ep type. */ #define USB_ENDPOINT_INTRTYPE 0x30 #define USB_ENDPOINT_INTR_PERIODIC (0 << 4) @@ -630,6 +635,20 @@ static inline int usb_endpoint_maxp(const struct usb_endpoint_descriptor *epd) return __le16_to_cpu(epd->wMaxPacketSize); } +/** + * usb_endpoint_maxp_mult - get endpoint's transactional opportunities + * @epd: endpoint to be checked + * + * Return @epd's wMaxPacketSize[12:11] + 1 + */ +static inline int +usb_endpoint_maxp_mult(const struct usb_endpoint_descriptor *epd) +{ + int maxp = __le16_to_cpu(epd->wMaxPacketSize); + + return USB_EP_MAXP_MULT(maxp) + 1; +} + static inline int usb_endpoint_interrupt_type( const struct usb_endpoint_descriptor *epd) { -- GitLab From f3e957266ae56c200fb13a42309c50f84576c64a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:48 -0700 Subject: [PATCH 0363/1398] usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input commit c6688ef9f29762e65bce325ef4acd6c675806366 upstream. Harden CMD_SUBMIT path to handle malicious input that could trigger large memory allocations. Add checks to validate transfer_buffer_length and number_of_packets to protect against bad input requesting for unbounded memory allocations. Validate early in get_pipe() and return failure. Reported-by: Secunia Research Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 4f03c611c16d..283a9be77a22 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -336,11 +336,13 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev, return priv; } -static int get_pipe(struct stub_device *sdev, int epnum, int dir) +static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) { struct usb_device *udev = sdev->udev; struct usb_host_endpoint *ep; struct usb_endpoint_descriptor *epd = NULL; + int epnum = pdu->base.ep; + int dir = pdu->base.direction; if (epnum < 0 || epnum > 15) goto err_ret; @@ -353,6 +355,15 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) goto err_ret; epd = &ep->desc; + + /* validate transfer_buffer_length */ + if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { + dev_err(&sdev->udev->dev, + "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", + pdu->u.cmd_submit.transfer_buffer_length); + return -1; + } + if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -375,6 +386,21 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) } if (usb_endpoint_xfer_isoc(epd)) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(epd); + maxp *= usb_endpoint_maxp_mult(epd); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&sdev->udev->dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + return -1; + } if (dir == USBIP_DIR_OUT) return usb_sndisocpipe(udev, epnum); else @@ -383,7 +409,7 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) err_ret: /* NOT REACHED */ - dev_err(&sdev->udev->dev, "get pipe() invalid epnum %d\n", epnum); + dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum); return -1; } @@ -448,7 +474,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, struct stub_priv *priv; struct usbip_device *ud = &sdev->ud; struct usb_device *udev = sdev->udev; - int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction); + int pipe = get_pipe(sdev, pdu); if (pipe == -1) return; @@ -470,7 +496,8 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0 && + pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); -- GitLab From 14513e49c43cd3149a03ff9e1c223c3d5803ad09 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:50 -0700 Subject: [PATCH 0364/1398] usbip: fix stub_send_ret_submit() vulnerability to null transfer_buffer commit be6123df1ea8f01ee2f896a16c2b7be3e4557a5a upstream. stub_send_ret_submit() handles urb with a potential null transfer_buffer, when it replays a packet with potential malicious data that could contain a null buffer. Add a check for the condition when actual_length > 0 and transfer_buffer is null. Reported-by: Secunia Research Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_tx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index be50cef645d8..87ff94be4235 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -181,6 +181,13 @@ static int stub_send_ret_submit(struct stub_device *sdev) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&sdev->udev->dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) iovnum = 2 + urb->number_of_packets; else -- GitLab From e081bd0d70bddbf912f6f7f0f82e3e1ac2c52ae6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 30 Nov 2017 11:59:22 +0800 Subject: [PATCH 0365/1398] ceph: drop negative child dentries before try pruning inode's alias commit 040d786032bf59002d374b86d75b04d97624005c upstream. Negative child dentry holds reference on inode's alias, it makes d_prune_aliases() do nothing. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c0f52c443c34..3d2639c30018 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1396,6 +1396,29 @@ static int __close_session(struct ceph_mds_client *mdsc, return request_close_session(mdsc, session); } +static bool drop_negative_children(struct dentry *dentry) +{ + struct dentry *child; + bool all_negative = true; + + if (!d_is_dir(dentry)) + goto out; + + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + if (d_really_is_positive(child)) { + all_negative = false; + break; + } + } + spin_unlock(&dentry->d_lock); + + if (all_negative) + shrink_dcache_parent(dentry); +out: + return all_negative; +} + /* * Trim old(er) caps. * @@ -1441,16 +1464,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) if ((used | wanted) & ~oissued & mine) goto out; /* we need these caps */ - session->s_trim_caps--; if (oissued) { /* we aren't the only cap.. just remove us */ __ceph_remove_cap(cap, true); + session->s_trim_caps--; } else { + struct dentry *dentry; /* try dropping referring dentries */ spin_unlock(&ci->i_ceph_lock); - d_prune_aliases(inode); - dout("trim_caps_cb %p cap %p pruned, count now %d\n", - inode, cap, atomic_read(&inode->i_count)); + dentry = d_find_any_alias(inode); + if (dentry && drop_negative_children(dentry)) { + int count; + dput(dentry); + d_prune_aliases(inode); + count = atomic_read(&inode->i_count); + if (count == 1) + session->s_trim_caps--; + dout("trim_caps_cb %p cap %p pruned, count now %d\n", + inode, cap, count); + } else { + dput(dentry); + } return 0; } -- GitLab From cdfe4c0091a8fa5e0a25d82fa0d4684382ac880d Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 8 Dec 2017 18:10:06 +0200 Subject: [PATCH 0366/1398] usb: xhci: fix TDS for MTK xHCI1.1 commit 72b663a99c074a8d073e7ecdae446cfb024ef551 upstream. For MTK's xHCI 1.0 or latter, TD size is the number of max packet sized packets remaining in the TD, not including this TRB (following spec). For MTK's xHCI 0.96 and older, TD size is the number of max packet sized packets remaining in the TD, including this TRB (not following spec). Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 63735b5310bb..89a14d5f6ad8 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3132,7 +3132,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, { u32 maxp, total_packet_count; - /* MTK xHCI is mostly 0.97 but contains some features from 1.0 */ + /* MTK xHCI 0.96 contains some features from 1.0 */ if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST)) return ((td_total_len - transferred) >> 10); @@ -3141,8 +3141,8 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, trb_buff_len == td_total_len) return 0; - /* for MTK xHCI, TD size doesn't include this TRB */ - if (xhci->quirks & XHCI_MTK_HOST) + /* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */ + if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100)) trb_buff_len = 0; maxp = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); -- GitLab From 7336f5481f6cf913a2d29d98c6e11f4bbe19d3b2 Mon Sep 17 00:00:00 2001 From: Sukumar Ghorai Date: Wed, 16 Aug 2017 14:46:55 -0700 Subject: [PATCH 0367/1398] Bluetooth: btusb: driver to enable the usb-wakeup feature commit a0085f2510e8976614ad8f766b209448b385492f upstream. BT-Controller connected as platform non-root-hub device and usb-driver initialize such device with wakeup disabled, Ref. usb_new_device(). At present wakeup-capability get enabled by hid-input device from usb function driver(e.g. BT HID device) at runtime. Again some functional driver does not set usb-wakeup capability(e.g LE HID device implement as HID-over-GATT), and can't wakeup the host on USB. Most of the device operation (such as mass storage) initiated from host (except HID) and USB wakeup aligned with host resume procedure. For BT device, usb-wakeup capability need to enable form btusc driver as a generic solution for multiple profile use case and required for USB remote wakeup (in-bus wakeup) while host is suspended. Also usb-wakeup feature need to enable/disable with HCI interface up and down. Signed-off-by: Sukumar Ghorai Signed-off-by: Amit K Bag Acked-by: Oliver Neukum Signed-off-by: Marcel Holtmann Cc: Matthias Kaehlcke Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 693028659ccc..3ae950c82922 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1059,6 +1059,10 @@ static int btusb_open(struct hci_dev *hdev) } data->intf->needs_remote_wakeup = 1; + /* device specific wakeup source enabled and required for USB + * remote wakeup while host is suspended + */ + device_wakeup_enable(&data->udev->dev); if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags)) goto done; @@ -1122,6 +1126,7 @@ static int btusb_close(struct hci_dev *hdev) goto failed; data->intf->needs_remote_wakeup = 0; + device_wakeup_disable(&data->udev->dev); usb_autopm_put_interface(data->intf); failed: -- GitLab From 3bdb508d686e4943bfb761d78dc915dd825f811f Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 8 Dec 2017 18:10:05 +0200 Subject: [PATCH 0368/1398] xhci: Don't add a virt_dev to the devs array before it's fully allocated commit 5d9b70f7d52eb14bb37861c663bae44de9521c35 upstream. Avoid null pointer dereference if some function is walking through the devs array accessing members of a new virt_dev that is mid allocation. Add the virt_dev to xhci->devs[i] _after_ the virt_device and all its members are properly allocated. issue found by KASAN: null-ptr-deref in xhci_find_slot_id_by_port "Quick analysis suggests that xhci_alloc_virt_device() is not mutex protected. If so, there is a time frame where xhci->devs[slot_id] is set but not fully initialized. Specifically, xhci->devs[i]->udev can be NULL." Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index a3ecd8bd5324..82eea55a7b5c 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1032,10 +1032,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, return 0; } - xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags); - if (!xhci->devs[slot_id]) + dev = kzalloc(sizeof(*dev), flags); + if (!dev) return 0; - dev = xhci->devs[slot_id]; /* Allocate the (output) device context that will be used in the HC. */ dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags); @@ -1083,9 +1082,17 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, &xhci->dcbaa->dev_context_ptrs[slot_id], le64_to_cpu(xhci->dcbaa->dev_context_ptrs[slot_id])); + xhci->devs[slot_id] = dev; + return 1; fail: - xhci_free_virt_device(xhci, slot_id); + + if (dev->in_ctx) + xhci_free_container_ctx(xhci, dev->in_ctx); + if (dev->out_ctx) + xhci_free_container_ctx(xhci, dev->out_ctx); + kfree(dev); + return 0; } -- GitLab From 9c537f06d61ada88ccc8080be79213ffce1ebf15 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 8 Dec 2017 16:00:12 -0500 Subject: [PATCH 0369/1398] nfs: don't wait on commit in nfs_commit_inode() if there were no commit requests commit dc4fd9ab01ab379ae5af522b3efd4187a7c30a31 upstream. If there were no commit requests, then nfs_commit_inode() should not wait on the commit or mark the inode dirty, otherwise the following BUG_ON can be triggered: [ 1917.130762] kernel BUG at fs/inode.c:578! [ 1917.130766] Oops: Exception in kernel mode, sig: 5 [#1] [ 1917.130768] SMP NR_CPUS=2048 NUMA pSeries [ 1917.130772] Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi blocklayoutdriver rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc sg nx_crypto pseries_rng ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_common ibmvscsi scsi_transport_srp ibmveth scsi_tgt dm_mirror dm_region_hash dm_log dm_mod [ 1917.130805] CPU: 2 PID: 14923 Comm: umount.nfs4 Tainted: G ------------ T 3.10.0-768.el7.ppc64 #1 [ 1917.130810] task: c0000005ecd88040 ti: c00000004cea0000 task.ti: c00000004cea0000 [ 1917.130813] NIP: c000000000354178 LR: c000000000354160 CTR: c00000000012db80 [ 1917.130816] REGS: c00000004cea3720 TRAP: 0700 Tainted: G ------------ T (3.10.0-768.el7.ppc64) [ 1917.130820] MSR: 8000000100029032 CR: 22002822 XER: 20000000 [ 1917.130828] CFAR: c00000000011f594 SOFTE: 1 GPR00: c000000000354160 c00000004cea39a0 c0000000014c4700 c0000000018cc750 GPR04: 000000000000c750 80c0000000000000 0600000000000000 04eeb76bea749a03 GPR08: 0000000000000034 c0000000018cc758 0000000000000001 d000000005e619e8 GPR12: c00000000012db80 c000000007b31200 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 c000000000dfc3ec 0000000000000000 c0000005eefc02c0 GPR28: d0000000079dbd50 c0000005b94a02c0 c0000005b94a0250 c0000005b94a01c8 [ 1917.130867] NIP [c000000000354178] .evict+0x1c8/0x350 [ 1917.130871] LR [c000000000354160] .evict+0x1b0/0x350 [ 1917.130873] Call Trace: [ 1917.130876] [c00000004cea39a0] [c000000000354160] .evict+0x1b0/0x350 (unreliable) [ 1917.130880] [c00000004cea3a30] [c0000000003558cc] .evict_inodes+0x13c/0x270 [ 1917.130884] [c00000004cea3af0] [c000000000327d20] .kill_anon_super+0x70/0x1e0 [ 1917.130896] [c00000004cea3b80] [d000000005e43e30] .nfs_kill_super+0x20/0x60 [nfs] [ 1917.130900] [c00000004cea3c00] [c000000000328a20] .deactivate_locked_super+0xa0/0x1b0 [ 1917.130903] [c00000004cea3c80] [c00000000035ba54] .cleanup_mnt+0xd4/0x180 [ 1917.130907] [c00000004cea3d10] [c000000000119034] .task_work_run+0x114/0x150 [ 1917.130912] [c00000004cea3db0] [c00000000001ba6c] .do_notify_resume+0xcc/0x100 [ 1917.130916] [c00000004cea3e30] [c00000000000a7b0] .ret_from_except_lite+0x5c/0x60 [ 1917.130919] Instruction dump: [ 1917.130921] 7fc3f378 486734b5 60000000 387f00a0 38800003 4bdcb365 60000000 e95f00a0 [ 1917.130927] 694a0060 7d4a0074 794ad182 694a0001 <0b0a0000> 892d02a4 2f890000 40de0134 Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e4772a8340f8..9905735463a4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1859,6 +1859,8 @@ int nfs_commit_inode(struct inode *inode, int how) if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); nfs_commit_end(cinfo.mds); + if (res == 0) + return res; if (error < 0) goto out_error; if (!may_wait) -- GitLab From d266817f5028fef8ff521a77777ef1c4b8de890e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 2 Dec 2017 13:04:54 -0500 Subject: [PATCH 0370/1398] sched/rt: Do not pull from current CPU if only one CPU to pull commit f73c52a5bcd1710994e53fbccc378c42b97a06b6 upstream. Daniel Wagner reported a crash on the BeagleBone Black SoC. This is a single CPU architecture, and does not have a functional arch_send_call_function_single_ipi() implementation which can crash the kernel if that is called. As it only has one CPU, it shouldn't be called, but if the kernel is compiled for SMP, the push/pull RT scheduling logic now calls it for irq_work if the one CPU is overloaded, it can use that function to call itself and crash the kernel. Ideally, we should disable the SCHED_FEAT(RT_PUSH_IPI) if the system only has a single CPU. But SCHED_FEAT is a constant if sched debugging is turned off. Another fix can also be used, and this should also help with normal SMP machines. That is, do not initiate the pull code if there's only one RT overloaded CPU, and that CPU happens to be the current CPU that is scheduling in a lower priority task. Even on a system with many CPUs, if there's many RT tasks waiting to run on a single CPU, and that CPU schedules in another RT task of lower priority, it will initiate the PULL logic in case there's a higher priority RT task on another CPU that is waiting to run. But if there is no other CPU with waiting RT tasks, it will initiate the RT pull logic on itself (as it still has RT tasks waiting to run). This is a wasted effort. Not only does this help with SMP code where the current CPU is the only one with RT overloaded tasks, it should also solve the issue that Daniel encountered, because it will prevent the PULL logic from executing, as there's only one CPU on the system, and the check added here will cause it to exit the RT pull code. Reported-by: Daniel Wagner Signed-off-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: linux-rt-users Fixes: 4bdced5c9 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/20171202130454.4cbbfe8d@vmware.local.home Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 9c131168d933..7a360d6f6798 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2022,8 +2022,9 @@ static void pull_rt_task(struct rq *this_rq) bool resched = false; struct task_struct *p; struct rq *src_rq; + int rt_overload_count = rt_overloaded(this_rq); - if (likely(!rt_overloaded(this_rq))) + if (likely(!rt_overload_count)) return; /* @@ -2032,6 +2033,11 @@ static void pull_rt_task(struct rq *this_rq) */ smp_rmb(); + /* If we are the only overloaded CPU do nothing */ + if (rt_overload_count == 1 && + cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) + return; + #ifdef HAVE_RT_PUSH_IPI if (sched_feat(RT_PUSH_IPI)) { tell_cpu_to_push(this_rq); -- GitLab From 744cb5ab337218e7a258d2a82c6f5d7afc72cd16 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 3 Dec 2017 19:54:41 -0600 Subject: [PATCH 0371/1398] eeprom: at24: change nvmem stride to 1 commit 7f6d2ecd3d7acaf205ea7b3e96f9ffc55b92298b upstream. Trying to read the MAC address from an eeprom that has an offset that is not a multiple of 4 causes an error currently. Fix it by changing the nvmem stride to 1. Signed-off-by: David Lechner [Bartosz: tweaked the commit message] Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 19c10dc56513..d8a485f1798b 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -783,7 +783,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) at24->nvmem_config.reg_read = at24_read; at24->nvmem_config.reg_write = at24_write; at24->nvmem_config.priv = at24; - at24->nvmem_config.stride = 4; + at24->nvmem_config.stride = 1; at24->nvmem_config.word_size = 1; at24->nvmem_config.size = chip.byte_len; -- GitLab From 679dbeac0b6bb551e1f3b95673695b22b2ac953d Mon Sep 17 00:00:00 2001 From: Adam Wallis Date: Mon, 27 Nov 2017 10:45:01 -0500 Subject: [PATCH 0372/1398] dmaengine: dmatest: move callback wait queue to thread context commit 6f6a23a213be51728502b88741ba6a10cda2441d upstream. Commit adfa543e7314 ("dmatest: don't use set_freezable_with_signal()") introduced a bug (that is in fact documented by the patch commit text) that leaves behind a dangling pointer. Since the done_wait structure is allocated on the stack, future invocations to the DMATEST can produce undesirable results (e.g., corrupted spinlocks). Commit a9df21e34b42 ("dmaengine: dmatest: warn user when dma test times out") attempted to WARN the user that the stack was likely corrupted but did not fix the actual issue. This patch fixes the issue by pushing the wait queue and callback structs into the the thread structure. If a failure occurs due to time, dmaengine_terminate_all will force the callback to safely call wake_up_all() without possibility of using a freed pointer. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=197605 Fixes: adfa543e7314 ("dmatest: don't use set_freezable_with_signal()") Reviewed-by: Sinan Kaya Suggested-by: Shunyong Yang Signed-off-by: Adam Wallis Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmatest.c | 55 ++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index fbb75514dfb4..e0bd578a253a 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -158,6 +158,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)"); #define PATTERN_OVERWRITE 0x20 #define PATTERN_COUNT_MASK 0x1f +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + struct dmatest_thread { struct list_head node; struct dmatest_info *info; @@ -166,6 +172,8 @@ struct dmatest_thread { u8 **srcs; u8 **dsts; enum dma_transaction_type type; + wait_queue_head_t done_wait; + struct dmatest_done test_done; bool done; }; @@ -326,18 +334,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start, return error_count; } -/* poor man's completion - we want to use wait_event_freezable() on it */ -struct dmatest_done { - bool done; - wait_queue_head_t *wait; -}; static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; - - done->done = true; - wake_up_all(done->wait); + struct dmatest_thread *thread = + container_of(arg, struct dmatest_thread, done_wait); + if (!thread->done) { + done->done = true; + wake_up_all(done->wait); + } else { + /* + * If thread->done, it means that this callback occurred + * after the parent thread has cleaned up. This can + * happen in the case that driver doesn't implement + * the terminate_all() functionality and a dma operation + * did not occur within the timeout period + */ + WARN(1, "dmatest: Kernel memory may be corrupted!!\n"); + } } static unsigned int min_odd(unsigned int x, unsigned int y) @@ -408,9 +423,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len) */ static int dmatest_func(void *data) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); struct dmatest_thread *thread = data; - struct dmatest_done done = { .wait = &done_wait }; + struct dmatest_done *done = &thread->test_done; struct dmatest_info *info; struct dmatest_params *params; struct dma_chan *chan; @@ -637,9 +651,9 @@ static int dmatest_func(void *data) continue; } - done.done = false; + done->done = false; tx->callback = dmatest_callback; - tx->callback_param = &done; + tx->callback_param = done; cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { @@ -652,21 +666,12 @@ static int dmatest_func(void *data) } dma_async_issue_pending(chan); - wait_event_freezable_timeout(done_wait, done.done, + wait_event_freezable_timeout(thread->done_wait, done->done, msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - if (!done.done) { - /* - * We're leaving the timed out dma operation with - * dangling pointer to done_wait. To make this - * correct, we'll need to allocate wait_done for - * each test iteration and perform "who's gonna - * free it this time?" dancing. For now, just - * leave it dangling. - */ - WARN(1, "dmatest: Kernel stack may be corrupted!!\n"); + if (!done->done) { dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); @@ -747,7 +752,7 @@ static int dmatest_func(void *data) dmatest_KBs(runtime, total_len), ret); /* terminate all transfers on specified channels */ - if (ret) + if (ret || failed_tests) dmaengine_terminate_all(chan); thread->done = true; @@ -807,6 +812,8 @@ static int dmatest_add_threads(struct dmatest_info *info, thread->info = info; thread->chan = dtc->chan; thread->type = type; + thread->test_done.wait = &thread->done_wait; + init_waitqueue_head(&thread->done_wait); smp_wmb(); thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", dma_chan_name(chan), op, i); -- GitLab From 6a851bb99e5c6569e51eb798d6a38b759fdb3302 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sun, 3 Dec 2017 22:52:51 -0500 Subject: [PATCH 0373/1398] ext4: fix fdatasync(2) after fallocate(2) operation commit c894aa97577e47d3066b27b32499ecf899bfa8b0 upstream. Currently, fallocate(2) with KEEP_SIZE followed by a fdatasync(2) then crash, we'll see wrong allocated block number (stat -c %b), the blocks allocated beyond EOF are all lost. fstests generic/468 exposes this bug. Commit 67a7d5f561f4 ("ext4: fix fdatasync(2) after extent manipulation operations") fixed all the other extent manipulation operation paths such as hole punch, zero range, collapse range etc., but forgot the fallocate case. So similarly, fix it by recording the correct journal tid in ext4 inode in fallocate(2) path, so that ext4_sync_file() will wait for the right tid to be committed on fdatasync(2). This addresses the test failure in xfstests test generic/468. Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a77cbc5b657b..1a0c57100f28 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4731,6 +4731,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, EXT4_INODE_EOFBLOCKS); } ext4_mark_inode_dirty(handle, inode); + ext4_update_inode_fsync_trans(handle, inode, 1); ret2 = ext4_journal_stop(handle); if (ret2) break; -- GitLab From 32e2ae03283b4047f5af9fd1178fe24f38e96601 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 11 Dec 2017 15:00:57 -0500 Subject: [PATCH 0374/1398] ext4: fix crash when a directory's i_size is too small commit 9d5afec6b8bd46d6ed821aa1579634437f58ef1f upstream. On a ppc64 machine, when mounting a fuzzed ext2 image (generated by fsfuzzer) the following call trace is seen, VFS: brelse: Trying to free free buffer WARNING: CPU: 1 PID: 6913 at /root/repos/linux/fs/buffer.c:1165 .__brelse.part.6+0x24/0x40 .__brelse.part.6+0x20/0x40 (unreliable) .ext4_find_entry+0x384/0x4f0 .ext4_lookup+0x84/0x250 .lookup_slow+0xdc/0x230 .walk_component+0x268/0x400 .path_lookupat+0xec/0x2d0 .filename_lookup+0x9c/0x1d0 .vfs_statx+0x98/0x140 .SyS_newfstatat+0x48/0x80 system_call+0x58/0x6c This happens because the directory that ext4_find_entry() looks up has inode->i_size that is less than the block size of the filesystem. This causes 'nblocks' to have a value of zero. ext4_bread_batch() ends up not reading any of the directory file's blocks. This renders the entries in bh_use[] array to continue to have garbage data. buffer_uptodate() on bh_use[0] can then return a zero value upon which brelse() function is invoked. This commit fixes the bug by returning -ENOENT when the directory file has no associated blocks. Reported-by: Abdul Haleem Signed-off-by: Chandan Rajendra Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4438b93f6fd6..b1766a67d2eb 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1417,6 +1417,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, "falling back\n")); } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); + if (!nblocks) { + ret = NULL; + goto cleanup_and_exit; + } start = EXT4_I(dir)->i_dir_start_lookup; if (start >= nblocks) start = 0; -- GitLab From bf864220a59c51a676717cbee7f346a400c5ab47 Mon Sep 17 00:00:00 2001 From: Ilan peer Date: Mon, 26 Dec 2016 18:17:36 +0200 Subject: [PATCH 0375/1398] mac80211: Fix addition of mesh configuration element MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 57629915d568c522ac1422df7bba4bee5b5c7a7c upstream. The code was setting the capabilities byte to zero, after it was already properly set previously. Fix it. The bug was found while debugging hwsim mesh tests failures that happened since the commit mentioned below. Fixes: 76f43b4c0a93 ("mac80211: Remove invalid flag operations in mesh TSF synchronization") Signed-off-by: Ilan Peer Reviewed-by: Masashi Honma Signed-off-by: Johannes Berg Cc: Richard Schütz Cc: Mathias Kretschmer Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5c67a696e046..b4b3fe078868 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -279,8 +279,6 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ *pos |= ifmsh->ps_peers_deep_sleep ? IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; - *pos++ = 0x00; - return 0; } -- GitLab From 50fc2d4152fb03bfbdec186403ce5c49fd0ff77e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 22 Feb 2017 15:23:22 -0300 Subject: [PATCH 0376/1398] usb: phy: isp1301: Add OF device ID table [ Upstream commit fd567653bdb908009b650f079bfd4b63169e2ac4 ] The driver doesn't have a struct of_device_id table but supported devices are registered via Device Trees. This is working on the assumption that a I2C device registered via OF will always match a legacy I2C device ID and that the MODALIAS reported will always be of the form i2c:. But this could change in the future so the correct approach is to have an OF device ID table if the devices are registered via OF. Signed-off-by: Javier Martinez Canillas Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-isp1301.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c index db68156568e6..b3b33cf7ddf6 100644 --- a/drivers/usb/phy/phy-isp1301.c +++ b/drivers/usb/phy/phy-isp1301.c @@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = { }; MODULE_DEVICE_TABLE(i2c, isp1301_id); +static const struct of_device_id isp1301_of_match[] = { + {.compatible = "nxp,isp1301" }, + { }, +}; +MODULE_DEVICE_TABLE(of, isp1301_of_match); + static struct i2c_client *isp1301_i2c_client; static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear) @@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client) static struct i2c_driver isp1301_driver = { .driver = { .name = DRV_NAME, + .of_match_table = of_match_ptr(isp1301_of_match), }, .probe = isp1301_probe, .remove = isp1301_remove, -- GitLab From 215df1f35542081485cff549830ff01af98c5279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 7 Mar 2017 17:51:49 +0100 Subject: [PATCH 0377/1398] KVM: nVMX: do not warn when MSR bitmap address is not backed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 05d8d34611139f8435af90ac54b65eb31e82e388 ] Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(), we have already checked that the MSR bitmap address is valid (4k aligned and within physical limits). SDM doesn't specify what happens if the there is no memory mapped at the valid address, but Intel CPUs treat the situation as if the bitmap was configured to trap all MSRs. KVM already does that by returning false and a correct handling doesn't need the guest-trigerrable warning that was reported by syzkaller: (The warning was originally there to catch some possible bugs in nVMX.) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline] nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561 handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312 vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526 vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline] vcpu_run arch/x86/kvm/x86.c:7044 [inline] kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205 kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570 Reported-by: Dmitry Vyukov Reviewed-by: Jim Mattson [Jim Mattson explained the bare metal behavior: "I believe this behavior would be documented in the chipset data sheet rather than the SDM, since the chipset returns all 1s for an unclaimed read."] Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9aa62ab13ae8..a929ca03b7ed 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9543,10 +9543,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) { - WARN_ON(1); + if (!page) return false; - } msr_bitmap_l1 = (unsigned long *)kmap(page); if (!msr_bitmap_l1) { nested_release_page_clean(page); -- GitLab From 9841d7b08ff6285123663acddd0ec7aa292402b7 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 9 Mar 2017 15:39:34 +0200 Subject: [PATCH 0378/1398] usb: xhci-mtk: check hcc_params after adding primary hcd [ Upstream commit 94a631d91ad341b3b4bdac72d1104d9f090e0ca9 ] hcc_params is set in xhci_gen_setup() called from usb_add_hcd(), so checks the Maximum Primary Stream Array Size in the hcc_params register after adding primary hcd. Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index f2365a47fa4a..ce9e457e60c3 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -632,13 +632,13 @@ static int xhci_mtk_probe(struct platform_device *pdev) goto power_off_phys; } - if (HCC_MAX_PSA(xhci->hcc_params) >= 4) - xhci->shared_hcd->can_do_streams = 1; - ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) goto put_usb3_hcd; + if (HCC_MAX_PSA(xhci->hcc_params) >= 4) + xhci->shared_hcd->can_do_streams = 1; + ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); if (ret) goto dealloc_usb2_hcd; -- GitLab From 9bcd15bdfb6151e0418fa9321d4dce2b2eb0cb16 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:12 +0800 Subject: [PATCH 0379/1398] md-cluster: free md_cluster_info if node leave cluster [ Upstream commit 9c8043f337f14d1743006dfc59c03e80a42e3884 ] To avoid memory leak, we need to free the cinfo which is allocated when node join cluster. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 2b13117fb918..ba7edcdd09ce 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -974,6 +974,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); dlm_release_lockspace(cinfo->lockspace, 2); + kfree(cinfo); return 0; } -- GitLab From 275314e90c5e84c972be2da278cee60279e07fc7 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:16:28 -0800 Subject: [PATCH 0380/1398] userfaultfd: shmem: __do_fault requires VM_FAULT_NOPAGE [ Upstream commit 6bbc4a4144b1a69743022ac68dfaf6e7d993abb9 ] __do_fault assumes vmf->page has been initialized and is valid if VM_FAULT_NOPAGE is not returned by vma->vm_ops->fault(vma, vmf). handle_userfault() in turn should return VM_FAULT_NOPAGE if it doesn't return VM_FAULT_SIGBUS or VM_FAULT_RETRY (the other two possibilities). This VM_FAULT_NOPAGE case is only invoked when signal are pending and it didn't matter for anonymous memory before. It only started to matter since shmem was introduced. hugetlbfs also takes a different path and doesn't exercise __do_fault. Link: http://lkml.kernel.org/r/20170228154201.GH5816@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Dmitry Vyukov Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index b86054cc41db..784d667475ae 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -419,7 +419,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) * in such case. */ down_read(&mm->mmap_sem); - ret = 0; + ret = VM_FAULT_NOPAGE; } } -- GitLab From 6783015096dc66dee0035baeec410f16b6ee2d84 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:17:14 -0800 Subject: [PATCH 0381/1398] userfaultfd: selftest: vm: allow to build in vm/ directory [ Upstream commit 46aa6a302b53f543f8e8b8e1714dc5e449ad36a6 ] linux/tools/testing/selftests/vm $ make gcc -Wall -I ../../../../usr/include compaction_test.c -lrt -o /compaction_test /usr/lib/gcc/x86_64-pc-linux-gnu/4.9.4/../../../../x86_64-pc-linux-gnu/bin/ld: cannot open output file /compaction_test: Permission denied collect2: error: ld returned 1 exit status make: *** [../lib.mk:54: /compaction_test] Error 1 Since commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") selftests/vm build fails if run from the "selftests/vm" directory, but it works in the selftests/ directory. It's quicker to be able to do a local vm-only build after a tree wipe and this patch allows for it again. Link: http://lkml.kernel.org/r/20170302173738.18994-4-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index bbab7f4664ac..d116a19477a7 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,9 @@ # Makefile for vm selftests +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) BINARIES = compaction_test BINARIES += hugepage-mmap -- GitLab From ae0ebdba96674c278674d2b12eedde0681e42f91 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 8 Mar 2017 18:08:16 +0100 Subject: [PATCH 0382/1398] net: initialize msg.msg_flags in recvfrom [ Upstream commit 9f138fa609c47403374a862a08a41394be53d461 ] KMSAN reports a use of uninitialized memory in put_cmsg() because msg.msg_flags in recvfrom haven't been initialized properly. The flag values don't affect the result on this path, but it's still a good idea to initialize them explicitly. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/socket.c b/net/socket.c index 6bbccf05854f..05f13b24572c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1702,6 +1702,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; msg.msg_iocb = NULL; + msg.msg_flags = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); -- GitLab From 72cd0c3f66165cce2c91012382a57800f54e3bff Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 8 Mar 2017 18:44:35 -0500 Subject: [PATCH 0383/1398] bnxt_en: Ignore 0 value in autoneg supported speed from firmware. [ Upstream commit 520ad89a54edea84496695d528f73ddcf4a52ea4 ] In some situations, the firmware will return 0 for autoneg supported speed. This may happen if the firmware detects no SFP module, for example. The driver should ignore this so that we don't end up with an invalid autoneg setting with nothing advertised. When SFP module is inserted, we'll get the updated settings from firmware at that time. Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d2cf56aed0e..0b894d76aa41 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5132,8 +5132,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) & PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK; } - link_info->support_auto_speeds = - le16_to_cpu(resp->supported_speeds_auto_mode); + if (resp->supported_speeds_auto_mode) + link_info->support_auto_speeds = + le16_to_cpu(resp->supported_speeds_auto_mode); hwrm_phy_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); -- GitLab From bb5c42a5b1d2b484939c1a1d18306853db8fcba9 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:43 -0800 Subject: [PATCH 0384/1398] net: bcmgenet: correct the RBUF_OVFL_CNT and RBUF_ERR_CNT MIB values [ Upstream commit ffff71328a3c321f7c14cc1edd33577717037744 ] The location of the RBUF overflow and error counters has moved between different version of the GENET MAC. This commit corrects the driver to read from the correct locations depending on the version of the GENET MAC. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/broadcom/genet/bcmgenet.c | 60 ++++++++++++++++--- .../net/ethernet/broadcom/genet/bcmgenet.h | 10 +++- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0975af2903ef..a751761d6121 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1,7 +1,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014 Broadcom Corporation + * Copyright (c) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -778,8 +778,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes), /* Misc UniMAC counters */ STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, - UMAC_RBUF_OVFL_CNT), - STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT), + UMAC_RBUF_OVFL_CNT_V1), + STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, + UMAC_RBUF_ERR_CNT_V1), STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT), STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), @@ -821,6 +822,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset, } } +static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset) +{ + u16 new_offset; + u32 val; + + switch (offset) { + case UMAC_RBUF_OVFL_CNT_V1: + if (GENET_IS_V2(priv)) + new_offset = RBUF_OVFL_CNT_V2; + else + new_offset = RBUF_OVFL_CNT_V3PLUS; + + val = bcmgenet_rbuf_readl(priv, new_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_rbuf_writel(priv, 0, new_offset); + break; + case UMAC_RBUF_ERR_CNT_V1: + if (GENET_IS_V2(priv)) + new_offset = RBUF_ERR_CNT_V2; + else + new_offset = RBUF_ERR_CNT_V3PLUS; + + val = bcmgenet_rbuf_readl(priv, new_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_rbuf_writel(priv, 0, new_offset); + break; + default: + val = bcmgenet_umac_readl(priv, offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_umac_writel(priv, 0, offset); + break; + } + + return val; +} + static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) { int i, j = 0; @@ -845,10 +885,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) UMAC_MIB_START + j + offset); break; case BCMGENET_STAT_MISC: - val = bcmgenet_umac_readl(priv, s->reg_offset); - /* clear if overflowed */ - if (val == ~0) - bcmgenet_umac_writel(priv, 0, s->reg_offset); + if (GENET_IS_V1(priv)) { + val = bcmgenet_umac_readl(priv, s->reg_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_umac_writel(priv, 0, + s->reg_offset); + } else { + val = bcmgenet_update_stat_misc(priv, + s->reg_offset); + } break; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 1e2dc34d331a..d5fb0d772dcd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Broadcom Corporation + * Copyright (c) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -214,7 +214,9 @@ struct bcmgenet_mib_counters { #define MDIO_REG_SHIFT 16 #define MDIO_REG_MASK 0x1F -#define UMAC_RBUF_OVFL_CNT 0x61C +#define UMAC_RBUF_OVFL_CNT_V1 0x61C +#define RBUF_OVFL_CNT_V2 0x80 +#define RBUF_OVFL_CNT_V3PLUS 0x94 #define UMAC_MPD_CTRL 0x620 #define MPD_EN (1 << 0) @@ -224,7 +226,9 @@ struct bcmgenet_mib_counters { #define UMAC_MPD_PW_MS 0x624 #define UMAC_MPD_PW_LS 0x628 -#define UMAC_RBUF_ERR_CNT 0x634 +#define UMAC_RBUF_ERR_CNT_V1 0x634 +#define RBUF_ERR_CNT_V2 0x84 +#define RBUF_ERR_CNT_V3PLUS 0x98 #define UMAC_MDF_ERR_CNT 0x638 #define UMAC_MDF_CTRL 0x650 #define UMAC_MDF_ADDR 0x654 -- GitLab From dc8d63c43af042d5740c29f23906788ffbb6eaf4 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:44 -0800 Subject: [PATCH 0385/1398] net: bcmgenet: correct MIB access of UniMAC RUNT counters [ Upstream commit 1ad3d225e5a40ca6c586989b4baaca710544c15a ] The gap between the Tx status counters and the Rx RUNT counters is now being added to allow correct reporting of the registers. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a751761d6121..2e5e206e906e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -876,13 +876,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) case BCMGENET_STAT_NETDEV: case BCMGENET_STAT_SOFT: continue; - case BCMGENET_STAT_MIB_RX: - case BCMGENET_STAT_MIB_TX: case BCMGENET_STAT_RUNT: - if (s->type != BCMGENET_STAT_MIB_RX) - offset = BCMGENET_STAT_OFFSET; + offset += BCMGENET_STAT_OFFSET; + /* fall through */ + case BCMGENET_STAT_MIB_TX: + offset += BCMGENET_STAT_OFFSET; + /* fall through */ + case BCMGENET_STAT_MIB_RX: val = bcmgenet_umac_readl(priv, UMAC_MIB_START + j + offset); + offset = 0; /* Reset Offset */ break; case BCMGENET_STAT_MISC: if (GENET_IS_V1(priv)) { -- GitLab From 66e522ab02cc7ae47cb8f1247e7b74efdc16072e Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:45 -0800 Subject: [PATCH 0386/1398] net: bcmgenet: reserved phy revisions must be checked first [ Upstream commit eca4bad73409aedc6ff22f823c18b67a4f08c851 ] The reserved gphy_rev value of 0x01ff must be tested before the old or new scheme for GPHY major versioning are tested, otherwise it will be treated as 0xff00 according to the old scheme. Fixes: b04a2f5b9ff5 ("net: bcmgenet: add support for new GENET PHY revision scheme") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2e5e206e906e..1eac98e42c1d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3233,6 +3233,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) */ gphy_rev = reg & 0xffff; + /* This is reserved so should require special treatment */ + if (gphy_rev == 0 || gphy_rev == 0x01ff) { + pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); + return; + } + /* This is the good old scheme, just GPHY major, no minor nor patch */ if ((gphy_rev & 0xf0) != 0) priv->gphy_rev = gphy_rev << 8; @@ -3241,12 +3247,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) else if ((gphy_rev & 0xff00) != 0) priv->gphy_rev = gphy_rev; - /* This is reserved so should require special treatment */ - else if (gphy_rev == 0 || gphy_rev == 0x01ff) { - pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); - return; - } - #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!(params->flags & GENET_HAS_40BITS)) pr_warn("GENET does not support 40-bits PA\n"); -- GitLab From 4c3727f6add57b03197b25af01dd1aa26090ed00 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:46 -0800 Subject: [PATCH 0387/1398] net: bcmgenet: power down internal phy if open or resume fails [ Upstream commit 7627409cc4970e8c8b9de6945ad86a575290a94e ] Since the internal PHY is powered up during the open and resume functions it should be powered back down if the functions fail. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 1eac98e42c1d..b5fb74c463d1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2857,6 +2857,8 @@ static int bcmgenet_open(struct net_device *dev) err_fini_dma: bcmgenet_fini_dma(priv); err_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); clk_disable_unprepare(priv->clk); return ret; } @@ -3560,6 +3562,8 @@ static int bcmgenet_resume(struct device *d) return 0; out_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); clk_disable_unprepare(priv->clk); return ret; } -- GitLab From f9ac24794f2e688336c64686129df6d2133c9c0b Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:47 -0800 Subject: [PATCH 0388/1398] net: bcmgenet: synchronize irq0 status between the isr and task [ Upstream commit 07c52d6a0b955a8a28834f9354793cfc4b81d0e9 ] Add a spinlock to ensure that irq0_stat is not unintentionally altered as the result of preemption. Also removed unserviced irq0 interrupts and removed irq1_stat since there is no bottom half service for those interrupts. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/broadcom/genet/bcmgenet.c | 73 ++++++++++--------- .../net/ethernet/broadcom/genet/bcmgenet.h | 6 +- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b5fb74c463d1..b7be36e201d7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2513,24 +2513,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) /* Interrupt bottom half */ static void bcmgenet_irq_task(struct work_struct *work) { + unsigned long flags; + unsigned int status; struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); netif_dbg(priv, intr, priv->dev, "%s\n", __func__); - if (priv->irq0_stat & UMAC_IRQ_MPD_R) { - priv->irq0_stat &= ~UMAC_IRQ_MPD_R; + spin_lock_irqsave(&priv->lock, flags); + status = priv->irq0_stat; + priv->irq0_stat = 0; + spin_unlock_irqrestore(&priv->lock, flags); + + if (status & UMAC_IRQ_MPD_R) { netif_dbg(priv, wol, priv->dev, "magic packet detected, waking up\n"); bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); } /* Link UP/DOWN event */ - if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { + if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->phydev, - !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); - priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; - } + !!(status & UMAC_IRQ_LINK_UP)); } /* bcmgenet_isr1: handle Rx and Tx priority queues */ @@ -2539,22 +2543,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) struct bcmgenet_priv *priv = dev_id; struct bcmgenet_rx_ring *rx_ring; struct bcmgenet_tx_ring *tx_ring; - unsigned int index; + unsigned int index, status; - /* Save irq status for bottom-half processing. */ - priv->irq1_stat = - bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & + /* Read irq status */ + status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ - bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); + bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, - "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); + "%s: IRQ=0x%x\n", __func__, status); /* Check Rx priority queue interrupts */ for (index = 0; index < priv->hw_params->rx_queues; index++) { - if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) + if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) continue; rx_ring = &priv->rx_rings[index]; @@ -2567,7 +2570,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) /* Check Tx priority queue interrupts */ for (index = 0; index < priv->hw_params->tx_queues; index++) { - if (!(priv->irq1_stat & BIT(index))) + if (!(status & BIT(index))) continue; tx_ring = &priv->tx_rings[index]; @@ -2587,19 +2590,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) struct bcmgenet_priv *priv = dev_id; struct bcmgenet_rx_ring *rx_ring; struct bcmgenet_tx_ring *tx_ring; + unsigned int status; + unsigned long flags; - /* Save irq status for bottom-half processing. */ - priv->irq0_stat = - bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & + /* Read irq status */ + status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ - bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR); + bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, - "IRQ=0x%x\n", priv->irq0_stat); + "IRQ=0x%x\n", status); - if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) { + if (status & UMAC_IRQ_RXDMA_DONE) { rx_ring = &priv->rx_rings[DESC_INDEX]; if (likely(napi_schedule_prep(&rx_ring->napi))) { @@ -2608,7 +2612,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) { + if (status & UMAC_IRQ_TXDMA_DONE) { tx_ring = &priv->tx_rings[DESC_INDEX]; if (likely(napi_schedule_prep(&tx_ring->napi))) { @@ -2617,22 +2621,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | - UMAC_IRQ_PHY_DET_F | - UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_HFB_SM | - UMAC_IRQ_HFB_MM | - UMAC_IRQ_MPD_R)) { - /* all other interested interrupts handled in bottom half */ - schedule_work(&priv->bcmgenet_irq_work); - } - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && - priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { - priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); + status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { wake_up(&priv->wq); } + /* all other interested interrupts handled in bottom half */ + status &= (UMAC_IRQ_LINK_EVENT | + UMAC_IRQ_MPD_R); + if (status) { + /* Save irq status for bottom-half processing. */ + spin_lock_irqsave(&priv->lock, flags); + priv->irq0_stat |= status; + spin_unlock_irqrestore(&priv->lock, flags); + + schedule_work(&priv->bcmgenet_irq_work); + } + return IRQ_HANDLED; } @@ -3334,6 +3339,8 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->lock); + SET_NETDEV_DEV(dev, &pdev->dev); dev_set_drvdata(&pdev->dev, dev); ether_addr_copy(dev->dev_addr, macaddr); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index d5fb0d772dcd..db7f289d65ae 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -623,11 +623,13 @@ struct bcmgenet_priv { struct work_struct bcmgenet_irq_work; int irq0; int irq1; - unsigned int irq0_stat; - unsigned int irq1_stat; int wol_irq; bool wol_irq_disabled; + /* shared status */ + spinlock_t lock; + unsigned int irq0_stat; + /* HW descriptors/checksum variables */ bool desc_64b_en; bool desc_rxchk_en; -- GitLab From e85b9bc29b04bbde927c2c556e0832327bc62279 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:48 -0800 Subject: [PATCH 0389/1398] net: bcmgenet: Power up the internal PHY before probing the MII [ Upstream commit 6be371b053dc86f11465cc1abce2e99bda0a0574 ] When using the internal PHY it must be powered up when the MII is probed or the PHY will not be detected. Since the PHY is powered up at reset this has not been a problem. However, when the kernel is restarted with kexec the PHY will likely be powered down when the kernel starts so it will not be detected and the Ethernet link will not be established. This commit explicitly powers up the internal PHY when the GENET driver is probed to correct this behavior. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b7be36e201d7..3480b3078775 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3296,6 +3296,7 @@ static int bcmgenet_probe(struct platform_device *pdev) const void *macaddr; struct resource *r; int err = -EIO; + const char *phy_mode_str; /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */ dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1, @@ -3403,6 +3404,13 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->clk_eee = NULL; } + /* If this is an internal GPHY, power it on now, before UniMAC is + * brought out of reset as absolutely no UniMAC activity is allowed + */ + if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) && + !strcasecmp(phy_mode_str, "internal")) + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + err = reset_umac(priv); if (err) goto err_clk_disable; -- GitLab From 515d78dc0a89a14c10ce3b3f007c99508aa65e61 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 Mar 2017 07:48:49 +0000 Subject: [PATCH 0390/1398] rxrpc: Wake up the transmitter if Rx window size increases on the peer [ Upstream commit 702f2ac87a9a8da23bf8506466bc70175fc970b2 ] The RxRPC ACK packet may contain an extension that includes the peer's current Rx window size for this call. We adjust the local Tx window size to match. However, the transmitter can stall if the receive window is reduced to 0 by the peer and then reopened. This is because the normal way that the transmitter is re-energised is by dropping something out of our Tx queue and thus making space. When a single gap is made, the transmitter is woken up. However, because there's nothing in the Tx queue at this point, this doesn't happen. To fix this, perform a wake_up() any time we see the peer's Rx window size increasing. The observable symptom is that calls start failing on ETIMEDOUT and the following: kAFS: SERVER DEAD state=-62 appears in dmesg. Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/input.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d..1060d14d4e6a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -649,6 +649,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; unsigned int mtu; + bool wake = false; u32 rwind = ntohl(ackinfo->rwind); _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", @@ -656,9 +657,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; - call->tx_winsize = rwind; + if (call->tx_winsize != rwind) { + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + if (rwind > call->tx_winsize) + wake = true; + call->tx_winsize = rwind; + } + if (call->cong_ssthresh > rwind) call->cong_ssthresh = rwind; @@ -672,6 +678,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, spin_unlock_bh(&peer->lock); _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); } + + if (wake) + wake_up(&call->waitq); } /* -- GitLab From 248cbd97be813adfa675d48e1e702f9d5e7ffca8 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Fri, 10 Mar 2017 14:33:01 +0200 Subject: [PATCH 0391/1398] net/mlx5: Fix create autogroup prev initializer [ Upstream commit af36370569eb37420e1e78a2e60c277b781fcd00 ] The autogroups list is a list of non overlapping group boundaries sorted by their start index. If the autogroups list wasn't empty and an empty group slot was found at the start of the list, the new group was added to the end of the list instead of the beginning, as the prev initializer was incorrect. When this was repeated, it caused multiple groups to have overlapping boundaries. Fixed that by correctly initializing the prev pointer to the start of the list. Fixes: eccec8da3b4e ('net/mlx5: Keep autogroups list ordered') Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4de3c28b0547..331a6ca4856d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1015,7 +1015,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, u32 *match_criteria) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct list_head *prev = ft->node.children.prev; + struct list_head *prev = &ft->node.children; unsigned int candidate_index = 0; struct mlx5_flow_group *fg; void *match_criteria_addr; -- GitLab From 721c136ac2780e519cd44940ba47f08cdeafec9d Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 10 Mar 2017 14:33:02 +0200 Subject: [PATCH 0392/1398] net/mlx5: Don't save PCI state when PCI error is detected [ Upstream commit 5d47f6c89d568ab61712d8c40676fbb020b68752 ] When a PCI error is detected the PCI state could be corrupt, don't save it in that flow. Save the state after initialization. After restoring the PCI state during slot reset save it again, restoring the state destroys the previously saved state info. Fixes: 05ac2c0b7438 ('net/mlx5: Fix race between PCI error handlers and health work') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b3309f2ed7dc..981cd1d84a5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1283,6 +1283,7 @@ static int init_one(struct pci_dev *pdev, if (err) goto clean_load; + pci_save_state(pdev); return 0; clean_load: @@ -1331,9 +1332,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain the health wq */ + /* In case of kernel call drain the health wq */ if (state) { - pci_save_state(pdev); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } @@ -1385,6 +1385,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) pci_set_master(pdev); pci_restore_state(pdev); + pci_save_state(pdev); if (wait_vital(pdev)) { dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); -- GitLab From 3d40364d333e3de4cc2e49c80ad8ddb242c46315 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Mon, 27 Feb 2017 14:30:26 +0200 Subject: [PATCH 0393/1398] iommu/io-pgtable-arm-v7s: Check for leaf entry before dereferencing it [ Upstream commit a03849e7210277fa212779b7cd9c30e1ab6194b2 ] Do a check for already installed leaf entry at the current level before dereferencing it in order to avoid walking the page table down with wrong pointer to the next level. Signed-off-by: Oleksandr Tyshchenko CC: Will Deacon CC: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/io-pgtable-arm-v7s.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index f50e51c1a9c8..d68a552cfe8d 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -418,8 +418,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, pte |= ARM_V7S_ATTR_NS_TABLE; __arm_v7s_set_pte(ptep, pte, 1, cfg); - } else { + } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { cptep = iopte_deref(pte, lvl); + } else { + /* We require an unmap first */ + WARN_ON(!selftest_running); + return -EEXIST; } /* Rinse, repeat */ -- GitLab From 063c753ef78bee13265bf02df185704e8fdff1f9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Mar 2017 12:13:04 +1000 Subject: [PATCH 0394/1398] drm/amdgpu: fix parser init error path to avoid crash in parser fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 607523d19c9d67ba4cf7bdaced644f11ed04992c ] If we don't reset the chunk info in the error path, the subsequent fini path will double free. Reviewed-by: Christian König Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index bfb4b91869e7..f26d1fd53bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -240,6 +240,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); + p->chunks = NULL; + p->nchunks = 0; put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: -- GitLab From 0154269f9c122440b841e06c2c16cde10c26bf7b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 11:36:39 +1100 Subject: [PATCH 0395/1398] NFSD: fix nfsd_minorversion(.., NFSD_AVAIL) [ Upstream commit 928c6fb3a9bfd6c5b287aa3465226add551c13c0 ] Current code will return 1 if the version is supported, and -1 if it isn't. This is confusing and inconsistent with the one place where this is used. So change to return 1 if it is supported, and zero if not. i.e. an error is never returned. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfssvc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1645b977c9c6..0b752e98d153 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -155,7 +155,8 @@ int nfsd_vers(int vers, enum vers_op change) int nfsd_minorversion(u32 minorversion, enum vers_op change) { - if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && + change != NFSD_AVAIL) return -1; switch(change) { case NFSD_SET: -- GitLab From 817f60ccf72cf7d98c4a96d408660bbbcb495489 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 11:36:39 +1100 Subject: [PATCH 0396/1398] NFSD: fix nfsd_reset_versions for NFSv4. [ Upstream commit 800a938f0bf9130c8256116649c0cc5806bfb2fd ] If you write "-2 -3 -4" to the "versions" file, it will notice that no versions are enabled, and nfsd_reset_versions() is called. This enables all major versions, not no minor versions. So we lose the invariant that NFSv4 is only advertised when at least one minor is enabled. Fix the code to explicitly enable minor versions for v4, change it to use nfsd_vers() to test and set, and simplify the code. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfssvc.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0b752e98d153..5c4800626f13 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -400,23 +400,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) void nfsd_reset_versions(void) { - int found_one = 0; int i; - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { - if (nfsd_program.pg_vers[i]) - found_one = 1; - } - - if (!found_one) { - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) - nfsd_program.pg_vers[i] = nfsd_version[i]; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) - nfsd_acl_program.pg_vers[i] = - nfsd_acl_version[i]; -#endif - } + for (i = 0; i < NFSD_NRVERS; i++) + if (nfsd_vers(i, NFSD_TEST)) + return; + + for (i = 0; i < NFSD_NRVERS; i++) + if (i != 4) + nfsd_vers(i, NFSD_SET); + else { + int minor = 0; + while (nfsd_minorversion(minor, NFSD_SET) >= 0) + minor++; + } } /* -- GitLab From 8fb782bbd21214a211e8e05dd0230ddd9b255e5e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 28 Feb 2017 17:14:41 -0800 Subject: [PATCH 0397/1398] Input: i8042 - add TUXEDO BU1406 (N24_25BU) to the nomux list [ Upstream commit a4c2a13129f7c5bcf81704c06851601593303fd5 ] TUXEDO BU1406 does not implement active multiplexing mode properly, and takes around 550 ms in i8042_set_mux_mode(). Given that the device does not have external AUX port, there is no downside in disabling the MUX mode. Reported-by: Paul Menzel Suggested-by: Vojtech Pavlik Reviewed-by: Marcos Paulo de Souza Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index dbf09836ff30..d1051e3ce819 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -520,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"), }, }, + { + /* TUXEDO BU1406 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"), + }, + }, { } }; -- GitLab From 38780b9ae45a40f0f39f0279c381397fb910b072 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Feb 2017 10:11:45 +0200 Subject: [PATCH 0398/1398] drm/omap: fix dmabuf mmap for dma_alloc'ed buffers [ Upstream commit 9fa1d7537242bd580ffa99c4725a0407096aad26 ] omap_gem_dmabuf_mmap() returns an error (with a WARN) when called for a buffer which is allocated with dma_alloc_*(). This prevents dmabuf mmap from working on SoCs without DMM, e.g. AM4 and OMAP3. I could not find any reason for omap_gem_dmabuf_mmap() rejecting such buffers, and just removing the if() fixes the limitation. Signed-off-by: Tomi Valkeinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index af267c35d813..ee5883f59be5 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, struct drm_gem_object *obj = buffer->priv; int ret = 0; - if (WARN_ON(!obj->filp)) - return -EINVAL; - ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma); if (ret < 0) return ret; -- GitLab From 97b75dad9dd1bc352e199d15516978edb06792fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Mar 2017 23:22:30 +0100 Subject: [PATCH 0399/1398] netfilter: bridge: honor frag_max_size when refragmenting [ Upstream commit 4ca60d08cbe65f501baad64af50fceba79c19fbb ] consider a bridge with mtu 9000, but end host sending smaller packets to another host with mtu < 9000. In this case, after reassembly, bridge+defrag would refragment, and then attempt to send the reassembled packet as long as it was below 9k. Instead we have to cap by the largest fragment size seen. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netfilter_hooks.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index aa1df1a10dd7..82ce5713f744 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -706,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } - nf_bridge = nf_bridge_info_get(skb); - /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ -- GitLab From 143d13d1e6c0a9ea7e6e86a0279907b174a42b4d Mon Sep 17 00:00:00 2001 From: Hiroyuki Yokoyama Date: Wed, 1 Mar 2017 03:51:00 +0000 Subject: [PATCH 0400/1398] ASoC: rsnd: fix sound route path when using SRC6/SRC9 [ Upstream commit a1c2ff53726907aff5feb37e4cfd45c1ff626431 ] This patch fixes the problem that the missing value of the route path setting table and incorrect values are set in the CMD_ROUTE_SELECT register. Signed-off-by: Hiroyuki Yokoyama [Kuninori: shared data on MIX and non-MIX case] Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/cmd.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c index abb5eaac854a..7d92a24b7cfa 100644 --- a/sound/soc/sh/rcar/cmd.c +++ b/sound/soc/sh/rcar/cmd.c @@ -31,23 +31,24 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, struct rsnd_mod *mix = rsnd_io_to_mod_mix(io); struct device *dev = rsnd_priv_to_dev(priv); u32 data; + u32 path[] = { + [1] = 1 << 0, + [5] = 1 << 8, + [6] = 1 << 12, + [9] = 1 << 15, + }; if (!mix && !dvc) return 0; + if (ARRAY_SIZE(path) < rsnd_mod_id(mod) + 1) + return -ENXIO; + if (mix) { struct rsnd_dai *rdai; struct rsnd_mod *src; struct rsnd_dai_stream *tio; int i; - u32 path[] = { - [0] = 0, - [1] = 1 << 0, - [2] = 0, - [3] = 0, - [4] = 0, - [5] = 1 << 8 - }; /* * it is assuming that integrater is well understanding about @@ -70,16 +71,19 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, } else { struct rsnd_mod *src = rsnd_io_to_mod_src(io); - u32 path[] = { - [0] = 0x30000, - [1] = 0x30001, - [2] = 0x40000, - [3] = 0x10000, - [4] = 0x20000, - [5] = 0x40100 + u8 cmd_case[] = { + [0] = 0x3, + [1] = 0x3, + [2] = 0x4, + [3] = 0x1, + [4] = 0x2, + [5] = 0x4, + [6] = 0x1, + [9] = 0x2, }; - data = path[rsnd_mod_id(src)]; + data = path[rsnd_mod_id(src)] | + cmd_case[rsnd_mod_id(src)] << 16; } dev_dbg(dev, "ctu/mix path = 0x%08x", data); -- GitLab From d28046fb8c435fc63f51dca30cdeb423d8ab4479 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Mar 2017 16:10:11 +0200 Subject: [PATCH 0401/1398] blk-mq: Fix tagset reinit in the presence of cpu hot-unplug [ Upstream commit 0067d4b020ea07a58540acb2c5fcd3364bf326e0 ] In case cpu was unplugged, we need to make sure not to assume that the tags for that cpu are still allocated. so check for null tags when reinitializing a tagset. Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-tag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index dcf5ce3ba4bf..4bc701b32ce2 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -311,6 +311,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i]; + if (!tags) + continue; + for (j = 0; j < tags->nr_tags; j++) { if (!tags->rqs[j]) continue; -- GitLab From 2e70c4d5de8751df5d14a578459a58ef0471e1ef Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Fri, 10 Mar 2017 12:09:49 -0800 Subject: [PATCH 0402/1398] writeback: fix memory leak in wb_queue_work() [ Upstream commit 4a3a485b1ed0e109718cc8c9d094fa0f552de9b2 ] When WB_registered flag is not set, wb_queue_work() skips queuing the work, but does not perform the necessary clean up. In particular, if work->auto_free is true, it should free the memory. The leak condition can be reprouced by following these steps: mount /dev/sdb /mnt/sdb /* In qemu console: device_del sdb */ umount /dev/sdb Above will result in a wb_queue_work() call on an unregistered wb and thus leak memory. Reported-by: John Sperbeck Signed-off-by: Tahsin Erdogan Reviewed-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05713a5da083..0703a1179847 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb) spin_unlock_bh(&wb->work_lock); } +static void finish_writeback_work(struct bdi_writeback *wb, + struct wb_writeback_work *work) +{ + struct wb_completion *done = work->done; + + if (work->auto_free) + kfree(work); + if (done && atomic_dec_and_test(&done->cnt)) + wake_up_all(&wb->bdi->wb_waitq); +} + static void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { trace_writeback_queue(wb, work); - spin_lock_bh(&wb->work_lock); - if (!test_bit(WB_registered, &wb->state)) - goto out_unlock; if (work->done) atomic_inc(&work->done->cnt); - list_add_tail(&work->list, &wb->work_list); - mod_delayed_work(bdi_wq, &wb->dwork, 0); -out_unlock: + + spin_lock_bh(&wb->work_lock); + + if (test_bit(WB_registered, &wb->state)) { + list_add_tail(&work->list, &wb->work_list); + mod_delayed_work(bdi_wq, &wb->dwork, 0); + } else + finish_writeback_work(wb, work); + spin_unlock_bh(&wb->work_lock); } @@ -1875,16 +1889,9 @@ static long wb_do_writeback(struct bdi_writeback *wb) set_bit(WB_writeback_running, &wb->state); while ((work = get_next_work_item(wb)) != NULL) { - struct wb_completion *done = work->done; - trace_writeback_exec(wb, work); - wrote += wb_writeback(wb, work); - - if (work->auto_free) - kfree(work); - if (done && atomic_dec_and_test(&done->cnt)) - wake_up_all(&wb->bdi->wb_waitq); + finish_writeback_work(wb, work); } /* -- GitLab From 46cbe3f51c511b648de0220b14ab4f5832f00020 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:42:03 +0100 Subject: [PATCH 0403/1398] net: wimax/i2400m: fix NULL-deref at probe [ Upstream commit 6e526fdff7be4f13b24f929a04c0e9ae6761291e ] Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. The endpoints are specifically dereferenced in the i2400m_bootrom_init path during probe (e.g. in i2400mu_tx_bulk_out). Fixes: f398e4240fce ("i2400m/USB: probe/disconnect, dev init/shutdown and reset backends") Cc: Inaky Perez-Gonzalez Signed-off-by: Johan Hovold Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wimax/i2400m/usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index e7f5910a6519..f8eb66ef2944 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface, struct i2400mu *i2400mu; struct usb_device *usb_dev = interface_to_usbdev(iface); + if (iface->cur_altsetting->desc.bNumEndpoints < 4) + return -ENODEV; + if (usb_dev->speed != USB_SPEED_HIGH) dev_err(dev, "device not connected as high speed\n"); -- GitLab From e30ccb5f1c7e00ee189a0991e633b3034801899c Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 13 Mar 2017 14:30:29 -0700 Subject: [PATCH 0404/1398] dmaengine: Fix array index out of bounds warning in __get_unmap_pool() [ Upstream commit 23f963e91fd81f44f6b316b1c24db563354c6be8 ] This fixes the following warning when building with clang and CONFIG_DMA_ENGINE_RAID=n : drivers/dma/dmaengine.c:1102:11: error: array index 2 is past the end of the array (which contains 1 element) [-Werror,-Warray-bounds] return &unmap_pool[2]; ^ ~ drivers/dma/dmaengine.c:1083:1: note: array 'unmap_pool' declared here static struct dmaengine_unmap_pool unmap_pool[] = { ^ drivers/dma/dmaengine.c:1104:11: error: array index 3 is past the end of the array (which contains 1 element) [-Werror,-Warray-bounds] return &unmap_pool[3]; ^ ~ drivers/dma/dmaengine.c:1083:1: note: array 'unmap_pool' declared here static struct dmaengine_unmap_pool unmap_pool[] = { Signed-off-by: Matthias Kaehlcke Reviewed-by: Dan Williams Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmaengine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6b535262ac5d..3db94e81bc14 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1107,12 +1107,14 @@ static struct dmaengine_unmap_pool *__get_unmap_pool(int nr) switch (order) { case 0 ... 1: return &unmap_pool[0]; +#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) case 2 ... 4: return &unmap_pool[1]; case 5 ... 7: return &unmap_pool[2]; case 8: return &unmap_pool[3]; +#endif default: BUG(); return NULL; -- GitLab From 889163d75fe5e0cb4ead674851e08c4e706d645c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:54:12 +0100 Subject: [PATCH 0405/1398] irqchip/mvebu-odmi: Select GENERIC_MSI_IRQ_DOMAIN [ Upstream commit fa23b9d1b89fdc34f296f02e496a20aeff5736be ] This driver uses the MSI domain but has no strict dependency on PCI_MSI, so we may run into a build failure when CONFIG_GENERIC_MSI_IRQ_DOMAIN is disabled: drivers/irqchip/irq-mvebu-odmi.c:152:15: error: variable 'odmi_msi_ops' has initializer but incomplete type static struct msi_domain_ops odmi_msi_ops = { ^~~~~~~~~~~~~~ drivers/irqchip/irq-mvebu-odmi.c:155:15: error: variable 'odmi_msi_domain_info' has initializer but incomplete type static struct msi_domain_info odmi_msi_domain_info = { ^~~~~~~~~~~~~~~ drivers/irqchip/irq-mvebu-odmi.c:156:3: error: 'struct msi_domain_info' has no member named 'flags' .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), ^~~~~ drivers/irqchip/irq-mvebu-odmi.c:156:12: error: 'MSI_FLAG_USE_DEF_DOM_OPS' undeclared here (not in a function) .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), ^~~~~~~~~~~~~~~~~~~~~~~~ drivers/irqchip/irq-mvebu-odmi.c:156:39: error: 'MSI_FLAG_USE_DEF_CHIP_OPS' undeclared here (not in a function); did you mean 'MSI_FLAG_USE_DEF_DOM_OPS'? Selecting the option from this driver seems to solve this nicely, though I could not find any other instance of this in irqchip drivers. Signed-off-by: Arnd Bergmann Acked-by: Thomas Petazzoni Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index bc0af3307bbf..910cb5e23371 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -258,6 +258,7 @@ config IRQ_MXS config MVEBU_ODMI bool + select GENERIC_MSI_IRQ_DOMAIN config MVEBU_PIC bool -- GitLab From 6c548e90a0bcfc8beb29ccb0de5d7d40efb41da9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 14 Mar 2017 08:58:08 -0400 Subject: [PATCH 0406/1398] net: Resend IGMP memberships upon peer notification. [ Upstream commit 37c343b4f4e70e9dc328ab04903c0ec8d154c1a4 ] When we notify peers of potential changes, it's also good to update IGMP memberships. For example, during VM migration, updating IGMP memberships will redirect existing multicast streams to the VM at the new location. Signed-off-by: Vladislav Yasevich Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index c37891828e4e..09007a71c8dd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev) { rtnl_lock(); call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); rtnl_unlock(); } EXPORT_SYMBOL(netdev_notify_peers); -- GitLab From 4c8b4e60b5755b65c39db2adcb3cc5ba293e118e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Mar 2017 14:00:00 +0100 Subject: [PATCH 0407/1398] mlxsw: reg: Fix SPVM max record count [ Upstream commit f004ec065b4879d6bc9ba0211af2169b3ce3097f ] The num_rec field is 8 bit, so the maximal count number is 255. This fixes vlans not being enabled for wider ranges than 255. Fixes: b2e345f9a454 ("mlxsw: reg: Add Switch Port VID and Switch Port VLAN Membership registers definitions") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6460c7256f2b..cb6f82d27297 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -788,7 +788,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid) #define MLXSW_REG_SPVM_ID 0x200F #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */ #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */ -#define MLXSW_REG_SPVM_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVM_REC_MAX_COUNT 255 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) -- GitLab From fddc3df7647e796c8947215a281db95109d17ac1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Mar 2017 14:00:01 +0100 Subject: [PATCH 0408/1398] mlxsw: reg: Fix SPVMLR max record count [ Upstream commit e9093b1183bbac462d2caef3eac165778c0b1bf1 ] The num_rec field is 8 bit, so the maximal count number is 255. This fixes vlans learning not being enabled for wider ranges than 255. Fixes: a4feea74cd7a ("mlxsw: reg: Add Switch Port VLAN MAC Learning register definition") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index cb6f82d27297..a01e6c0d0cd1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1757,7 +1757,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload, #define MLXSW_REG_SPVMLR_ID 0x2020 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */ #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */ -#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \ MLXSW_REG_SPVMLR_REC_LEN * \ MLXSW_REG_SPVMLR_REC_MAX_COUNT) -- GitLab From 8de6d7b28d2f135838275258b715a52db27fdb1c Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 14 Mar 2017 15:25:58 +0200 Subject: [PATCH 0409/1398] qed: Align CIDs according to DORQ requirement [ Upstream commit f3e48119b97f56fb09310c95d49da122a27003d7 ] The Doorbell HW block can be configured at a granularity of 16 x CIDs, so we need to make sure that the actual number of CIDs configured would be a multiplication of 16. Today, when RoCE is enabled - given that the number is unaligned, doorbelling the higher CIDs would fail to reach the firmware and would eventually timeout. Fixes: dbb799c39717 ("qed: Initialize hardware for new protocols") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 0c42c240b5cf..ed014bdbbabd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -373,8 +373,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn, u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val; u32 cxt_size = CONN_CXT_SIZE(p_hwfn); u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; + u32 align = elems_per_page * DQ_RANGE_ALIGN; - p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page); + p_conn->cid_count = roundup(p_conn->cid_count, align); } } -- GitLab From ac04ab9624b5337628a8a5b96eee81a6fee704e5 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 14 Mar 2017 15:26:00 +0200 Subject: [PATCH 0410/1398] qed: Fix mapping leak on LL2 rx flow [ Upstream commit 752ecb2da11124a948567076b60767dc8034cfa5 ] When receiving an Rx LL2 packet, qed fails to unmap the previous buffer. Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support"); Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 62ae55bd81b8..7f00c7b065cb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -187,6 +187,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, /* If need to reuse or there's no replacement buffer, repost this */ if (rc) goto out_post; + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); skb = build_skb(buffer->data, 0); if (!skb) { -- GitLab From 4c9c0971271dfcc5f0e74232446f938e2bed717b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 14 Mar 2017 15:26:02 +0200 Subject: [PATCH 0411/1398] qed: Fix interrupt flags on Rx LL2 [ Upstream commit 1df2adedcce17ad4a39fba74f0e2b611f797fe10 ] Before iterating over the the LL2 Rx ring, the ring's spinlock is taken via spin_lock_irqsave(). The actual processing of the packet [including handling by the protocol driver] is done without said lock, so qed releases the spinlock and re-claims it afterwards. Problem is that the final spin_lock_irqrestore() at the end of the iteration uses the original flags saved from the initial irqsave() instead of the flags from the most recent irqsave(). So it's possible that the interrupt status would be incorrect at the end of the processing. Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support"); CC: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 7f00c7b065cb..a3360cbdb30b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -443,7 +443,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn, static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, union core_rx_cqe_union *p_cqe, - unsigned long lock_flags, + unsigned long *p_lock_flags, bool b_last_cqe) { struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; @@ -464,10 +464,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, "Mismatch between active_descq and the LL2 Rx chain\n"); list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); - spin_unlock_irqrestore(&p_rx->lock, lock_flags); + spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags); qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); - spin_lock_irqsave(&p_rx->lock, lock_flags); + spin_lock_irqsave(&p_rx->lock, *p_lock_flags); return 0; } @@ -507,7 +507,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) break; case CORE_RX_CQE_TYPE_REGULAR: rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, - cqe, flags, b_last_cqe); + cqe, &flags, + b_last_cqe); break; default: rc = -EIO; -- GitLab From 3544f57578a6110f14e837c2a002a8eadad510cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 22:27:11 +0100 Subject: [PATCH 0412/1398] drm: amd: remove broken include path [ Upstream commit 655d9ca9ac075da1ef2a45012ba48a39f6eb1f58 ] The AMD ACP driver adds "-I../acp -I../acp/include" to the gcc command line, which makes no sense, since these are evaluated relative to the build directory. When we build with "make W=1", they instead cause a warning: cc1: error: ../acp/: No such file or directory [-Werror=missing-include-dirs] cc1: error: ../acp/include: No such file or directory [-Werror=missing-include-dirs] cc1: all warnings being treated as errors ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.o' failed ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.o' failed ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.o' failed This removes the subdir-ccflags variable that evidently did not serve any purpose here. Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/acp/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile index 8363cb57915b..8a08e81ee90d 100644 --- a/drivers/gpu/drm/amd/acp/Makefile +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -3,6 +3,4 @@ # of AMDSOC/AMDGPU drm driver. # It provides the HW control for ACP related functionalities. -subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include - AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o -- GitLab From 18b39b61b2c66556396f8584f0769f221d3383f6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Jun 2016 16:10:51 +0300 Subject: [PATCH 0413/1398] intel_th: pci: Add Gemini Lake support [ Upstream commit 340837f985c2cb87ca0868d4aa9ce42b0fab3a21 ] This adds Intel(R) Trace Hub PCI ID for Gemini Lake SOC. Signed-off-by: Alexander Shishkin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 63b5db4e4070..e0f3244505d3 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -95,6 +95,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6), .driver_data = (kernel_ulong_t)0, }, + { + /* Gemini Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e), + .driver_data = (kernel_ulong_t)0, + }, { 0 }, }; -- GitLab From fd2530a4ee62c17bc8e8715cc6f449c1a1f7f212 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 07:44:45 +0900 Subject: [PATCH 0414/1398] openrisc: fix issue handling 8 byte get_user calls [ Upstream commit 154e67cd8e8f964809d0e75e44bb121b169c75b3 ] Was getting the following error with allmodconfig: ERROR: "__get_user_bad" [lib/test_user_copy.ko] undefined! This was simply a missing break statement, causing an unwanted fall through. Signed-off-by: Stafford Horne Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 140faa16685a..1311e6b13991 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -211,7 +211,7 @@ do { \ case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ - case 8: __get_user_asm2(x, ptr, retval); \ + case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ } while (0) -- GitLab From 8652baa5a31d9f65f93464d27d9a330e3dee6191 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Mar 2017 09:34:49 +0900 Subject: [PATCH 0415/1398] ASoC: rcar: clear DE bit only in PDMACHCR when it stops [ Upstream commit 62a10498afb27370ec6018e9d802b74850fd8d9a ] R-Car datasheet indicates "Clear DE in PDMACHCR" for transfer stop, but current code clears all bits in PDMACHCR. Because of this, DE bit might never been cleared, and it causes CMD overflow. This patch fixes this issue. Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/dma.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 6bc93cbb3049..edeb74a13c0f 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -361,6 +361,20 @@ static u32 rsnd_dmapp_read(struct rsnd_dma *dma, u32 reg) return ioread32(rsnd_dmapp_addr(dmac, dma, reg)); } +static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg) +{ + struct rsnd_mod *mod = rsnd_mod_get(dma); + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv); + volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg); + u32 val = ioread32(addr); + + val &= ~mask; + val |= (data & mask); + + iowrite32(val, addr); +} + static int rsnd_dmapp_stop(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -368,10 +382,10 @@ static int rsnd_dmapp_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); int i; - rsnd_dmapp_write(dma, 0, PDMACHCR); + rsnd_dmapp_bset(dma, 0, PDMACHCR_DE, PDMACHCR); for (i = 0; i < 1024; i++) { - if (0 == rsnd_dmapp_read(dma, PDMACHCR)) + if (0 == (rsnd_dmapp_read(dma, PDMACHCR) & PDMACHCR_DE)) return 0; udelay(1); } -- GitLab From c81410a4353d45766564ba6af9a067d33d1525d7 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:11 -0600 Subject: [PATCH 0416/1398] scsi: hpsa: update check for logical volume status [ Upstream commit 85b29008d8af6d94a0723aaa8d93cfb6e041158b ] - Add in a new case for volume offline. Resolves internal testing bug for multilun array management. - Return correct status for failed TURs. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 35 ++++++++++++++++------------------- drivers/scsi/hpsa_cmd.h | 2 ++ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index a1d6ab76a514..3c5a3fb07d56 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3686,7 +3686,7 @@ static int hpsa_get_volume_status(struct ctlr_info *h, * # (integer code indicating one of several NOT READY states * describing why a volume is to be kept offline) */ -static int hpsa_volume_offline(struct ctlr_info *h, +static unsigned char hpsa_volume_offline(struct ctlr_info *h, unsigned char scsi3addr[]) { struct CommandList *c; @@ -3707,7 +3707,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, DEFAULT_TIMEOUT); if (rc) { cmd_free(h, c); - return 0; + return HPSA_VPD_LV_STATUS_UNSUPPORTED; } sense = c->err_info->SenseInfo; if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) @@ -3718,19 +3718,13 @@ static int hpsa_volume_offline(struct ctlr_info *h, cmd_status = c->err_info->CommandStatus; scsi_status = c->err_info->ScsiStatus; cmd_free(h, c); - /* Is the volume 'not ready'? */ - if (cmd_status != CMD_TARGET_STATUS || - scsi_status != SAM_STAT_CHECK_CONDITION || - sense_key != NOT_READY || - asc != ASC_LUN_NOT_READY) { - return 0; - } /* Determine the reason for not ready state */ ldstat = hpsa_get_volume_status(h, scsi3addr); /* Keep volume offline in certain cases: */ switch (ldstat) { + case HPSA_LV_FAILED: case HPSA_LV_UNDERGOING_ERASE: case HPSA_LV_NOT_AVAILABLE: case HPSA_LV_UNDERGOING_RPI: @@ -3752,7 +3746,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, default: break; } - return 0; + return HPSA_LV_OK; } /* @@ -3825,10 +3819,10 @@ static int hpsa_update_device_info(struct ctlr_info *h, /* Do an inquiry to the device to see what it is. */ if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { - /* Inquiry failed (msg printed already) */ dev_err(&h->pdev->dev, - "hpsa_update_device_info: inquiry failed\n"); - rc = -EIO; + "%s: inquiry failed, device will be skipped.\n", + __func__); + rc = HPSA_INQUIRY_FAILED; goto bail_out; } @@ -3857,15 +3851,19 @@ static int hpsa_update_device_info(struct ctlr_info *h, if ((this_device->devtype == TYPE_DISK || this_device->devtype == TYPE_ZBC) && is_logical_dev_addr_mode(scsi3addr)) { - int volume_offline; + unsigned char volume_offline; hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); - if (volume_offline < 0 || volume_offline > 0xff) - volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; - this_device->volume_offline = volume_offline & 0xff; + if (volume_offline == HPSA_LV_FAILED) { + rc = HPSA_LV_FAILED; + dev_err(&h->pdev->dev, + "%s: LV failed, device will be skipped.\n", + __func__); + goto bail_out; + } } else { this_device->raid_level = RAID_UNKNOWN; this_device->offload_config = 0; @@ -4353,8 +4351,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) goto out; } if (rc) { - dev_warn(&h->pdev->dev, - "Inquiry failed, skipping device.\n"); + h->drv_req_rescan = 1; continue; } diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index a584cdf07058..5961705eef76 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -156,6 +156,7 @@ #define CFGTBL_BusType_Fibre2G 0x00000200l /* VPD Inquiry types */ +#define HPSA_INQUIRY_FAILED 0x02 #define HPSA_VPD_SUPPORTED_PAGES 0x00 #define HPSA_VPD_LV_DEVICE_ID 0x83 #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 @@ -166,6 +167,7 @@ /* Logical volume states */ #define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff #define HPSA_LV_OK 0x0 +#define HPSA_LV_FAILED 0x01 #define HPSA_LV_NOT_AVAILABLE 0x0b #define HPSA_LV_UNDERGOING_ERASE 0x0F #define HPSA_LV_UNDERGOING_RPI 0x12 -- GitLab From 0f07e7611184cd78a26dd1fbf13513264896cdac Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:17 -0600 Subject: [PATCH 0417/1398] scsi: hpsa: limit outstanding rescans [ Upstream commit 87b9e6aa87d9411f1059aa245c0c79976bc557ac ] Avoid rescan storms. No need to queue another if one is pending. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 16 +++++++++++++++- drivers/scsi/hpsa.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 3c5a3fb07d56..20e4278bdcdc 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5529,7 +5529,7 @@ static void hpsa_scan_complete(struct ctlr_info *h) spin_lock_irqsave(&h->scan_lock, flags); h->scan_finished = 1; - wake_up_all(&h->scan_wait_queue); + wake_up(&h->scan_wait_queue); spin_unlock_irqrestore(&h->scan_lock, flags); } @@ -5547,11 +5547,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh) if (unlikely(lockup_detected(h))) return hpsa_scan_complete(h); + /* + * If a scan is already waiting to run, no need to add another + */ + spin_lock_irqsave(&h->scan_lock, flags); + if (h->scan_waiting) { + spin_unlock_irqrestore(&h->scan_lock, flags); + return; + } + + spin_unlock_irqrestore(&h->scan_lock, flags); + /* wait until any scan already in progress is finished. */ while (1) { spin_lock_irqsave(&h->scan_lock, flags); if (h->scan_finished) break; + h->scan_waiting = 1; spin_unlock_irqrestore(&h->scan_lock, flags); wait_event(h->scan_wait_queue, h->scan_finished); /* Note: We don't need to worry about a race between this @@ -5561,6 +5573,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh) */ } h->scan_finished = 0; /* mark scan as in progress */ + h->scan_waiting = 0; spin_unlock_irqrestore(&h->scan_lock, flags); if (unlikely(lockup_detected(h))) @@ -8799,6 +8812,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) init_waitqueue_head(&h->event_sync_wait_queue); mutex_init(&h->reset_mutex); h->scan_finished = 1; /* no scan currently in progress */ + h->scan_waiting = 0; pci_set_drvdata(pdev, h); h->ndevices = 0; diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index 9ea162de80dc..e16f2945f6ac 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -203,6 +203,7 @@ struct ctlr_info { dma_addr_t errinfo_pool_dhandle; unsigned long *cmd_pool_bits; int scan_finished; + u8 scan_waiting : 1; spinlock_t scan_lock; wait_queue_head_t scan_wait_queue; -- GitLab From 91510a623baa8c3be2033e42634ebc837e819635 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:23 -0600 Subject: [PATCH 0418/1398] scsi: hpsa: do not timeout reset operations [ Upstream commit 2ef2884980873081a4edae92f9d88dd580c85f6e ] Resets can take longer than DEFAULT_TIMEOUT. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 20e4278bdcdc..11a9ffcf9113 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2951,7 +2951,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr, /* fill_cmd can't fail here, no data buffer to map. */ (void) fill_cmd(c, reset_type, h, NULL, 0, 0, scsi3addr, TYPE_MSG); - rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT); + rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT); if (rc) { dev_warn(&h->pdev->dev, "Failed to send reset command\n"); goto out; -- GitLab From 1867eb805091fbcd654161dbb15acfe3b92f1599 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Wed, 15 Mar 2017 13:47:50 +0900 Subject: [PATCH 0419/1398] fjes: Fix wrong netdevice feature flags [ Upstream commit fe8daf5fa715f7214952f06a387e4b7de818c5be ] This patch fixes netdev->features for Extended Socket network device. Currently Extended Socket network device's netdev->feature claims NETIF_F_HW_CSUM, however this is completely wrong. There's no feature of checksum offloading. That causes invalid TCP/UDP checksum and packet rejection when IP forwarding from Extended Socket network device to other network device. NETIF_F_HW_CSUM should be omitted. Signed-off-by: Taku Izumi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/fjes/fjes_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index e46b1ebbbff4..7ea8ead4fd1c 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1277,7 +1277,7 @@ static void fjes_netdev_setup(struct net_device *netdev) fjes_set_ethtool_ops(netdev); netdev->mtu = fjes_support_mtu[3]; netdev->flags |= IFF_BROADCAST; - netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } static void fjes_irq_watch_task(struct work_struct *work) -- GitLab From da626b13ce329cb2629897251226ca8be54ab919 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Mar 2017 14:42:03 -0400 Subject: [PATCH 0420/1398] drm/radeon/si: add dpm quirk for Oland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0f424de1fd9bc4ab24bd1fe5430ab5618e803e31 ] OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems with higher sclks. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 13ba73fd9b68..56109b57b776 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3029,6 +3029,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, max_sclk = 75000; max_mclk = 80000; } + } else if (rdev->family == CHIP_OLAND) { + if ((rdev->pdev->device == 0x6604) && + (rdev->pdev->subsystem_vendor == 0x1028) && + (rdev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } /* Apply dpm quirks */ while (p && p->chip_device != 0) { -- GitLab From a524bb57dd367c299b6c61ff39df6328ae49ed08 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 4 Mar 2017 18:13:59 -0700 Subject: [PATCH 0421/1398] Drivers: hv: util: move waiting for release to hv_utils_transport itself [ Upstream commit e9c18ae6eb2b312f16c63e34b43ea23926daa398 ] Waiting for release_event in all three drivers introduced issues on release as on_reset() hook is not always called. E.g. if the device was never opened we will never get the completion. Move the waiting code to hvutil_transport_destroy() and make sure it is only called when the device is open. hvt->lock serialization should guarantee the absence of races. Fixes: 5a66fecbf6aa ("Drivers: hv: util: kvp: Fix a rescind processing issue") Fixes: 20951c7535b5 ("Drivers: hv: util: Fcopy: Fix a rescind processing issue") Fixes: d77044d142e9 ("Drivers: hv: util: Backup: Fix a rescind processing issue") Reported-by: Dexuan Cui Tested-by: Dexuan Cui Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 4 ---- drivers/hv/hv_kvp.c | 4 ---- drivers/hv/hv_snapshot.c | 4 ---- drivers/hv/hv_utils_transport.c | 12 ++++++++---- drivers/hv/hv_utils_transport.h | 1 + 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 75126e4e3f05..44420073edda 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -61,7 +61,6 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data); static const char fcopy_devname[] = "vmbus/hv_fcopy"; static u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; /* * This state maintains the version number registered by the daemon. */ @@ -322,7 +321,6 @@ static void fcopy_on_reset(void) if (cancel_delayed_work_sync(&fcopy_timeout_work)) fcopy_respond_to_host(HV_E_FAIL); - complete(&release_event); } int hv_fcopy_init(struct hv_util_service *srv) @@ -330,7 +328,6 @@ int hv_fcopy_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; fcopy_transaction.recv_channel = srv->channel; - init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -352,5 +349,4 @@ void hv_fcopy_deinit(void) fcopy_transaction.state = HVUTIL_DEVICE_DYING; cancel_delayed_work_sync(&fcopy_timeout_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 3abfc5983c97..5e1fdc8d32ab 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -88,7 +88,6 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; static u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; /* * Register the kernel component with the user-level daemon. * As part of this registration, pass the LIC version number. @@ -717,7 +716,6 @@ static void kvp_on_reset(void) if (cancel_delayed_work_sync(&kvp_timeout_work)) kvp_respond_to_host(NULL, HV_E_FAIL); kvp_transaction.state = HVUTIL_DEVICE_INIT; - complete(&release_event); } int @@ -726,7 +724,6 @@ hv_kvp_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; kvp_transaction.recv_channel = srv->channel; - init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -750,5 +747,4 @@ void hv_kvp_deinit(void) cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index a76e3db0d01f..a6707133c297 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -66,7 +66,6 @@ static int dm_reg_value; static const char vss_devname[] = "vmbus/hv_vss"; static __u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; static void vss_timeout_func(struct work_struct *dummy); static void vss_handle_request(struct work_struct *dummy); @@ -331,13 +330,11 @@ static void vss_on_reset(void) if (cancel_delayed_work_sync(&vss_timeout_work)) vss_respond_to_host(HV_E_FAIL); vss_transaction.state = HVUTIL_DEVICE_INIT; - complete(&release_event); } int hv_vss_init(struct hv_util_service *srv) { - init_completion(&release_event); if (vmbus_proto_version < VERSION_WIN8_1) { pr_warn("Integration service 'Backup (volume snapshot)'" " not supported on this host version.\n"); @@ -368,5 +365,4 @@ void hv_vss_deinit(void) cancel_delayed_work_sync(&vss_timeout_work); cancel_work_sync(&vss_handle_request_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index c235a9515267..4402a71e23f7 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -182,10 +182,11 @@ static int hvt_op_release(struct inode *inode, struct file *file) * connects back. */ hvt_reset(hvt); - mutex_unlock(&hvt->lock); if (mode_old == HVUTIL_TRANSPORT_DESTROY) - hvt_transport_free(hvt); + complete(&hvt->release); + + mutex_unlock(&hvt->lock); return 0; } @@ -304,6 +305,7 @@ struct hvutil_transport *hvutil_transport_init(const char *name, init_waitqueue_head(&hvt->outmsg_q); mutex_init(&hvt->lock); + init_completion(&hvt->release); spin_lock(&hvt_list_lock); list_add(&hvt->list, &hvt_list); @@ -351,6 +353,8 @@ void hvutil_transport_destroy(struct hvutil_transport *hvt) if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0) cn_del_callback(&hvt->cn_id); - if (mode_old != HVUTIL_TRANSPORT_CHARDEV) - hvt_transport_free(hvt); + if (mode_old == HVUTIL_TRANSPORT_CHARDEV) + wait_for_completion(&hvt->release); + + hvt_transport_free(hvt); } diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h index d98f5225c3e6..79afb626e166 100644 --- a/drivers/hv/hv_utils_transport.h +++ b/drivers/hv/hv_utils_transport.h @@ -41,6 +41,7 @@ struct hvutil_transport { int outmsg_len; /* its length */ wait_queue_head_t outmsg_q; /* poll/read wait queue */ struct mutex lock; /* protects struct members */ + struct completion release; /* synchronize with fd release */ }; struct hvutil_transport *hvutil_transport_init(const char *name, -- GitLab From 8d3a318194ec6a3c52384cbc63894e8dfcc15c9b Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 14 Mar 2017 09:50:35 +0200 Subject: [PATCH 0422/1398] iwlwifi: mvm: cleanup pending frames in DQA mode [ Upstream commit 9a3fcf912ef7f5c6e18f9af6875dd13f7311f7aa ] When a station is asleep, the fw will set it as "asleep". All queues that are used only by one station will be stopped by the fw. In pre-DQA mode this was relevant for aggregation queues. However, in DQA mode a queue is owned by one station only, so all queues will be stopped. As a result, we don't expect to get filtered frames back to mac80211 and don't have to maintain the entire pending_frames state logic, the same way as we do in aggregations. The correct behavior is to align DQA behavior with the aggregation queue behaviour pre-DQA: - Don't count pending frames. - Let mac80211 know we have frames in these queues so that it can properly handle trigger frames. When a trigger frame is received, mac80211 tells the driver to send frames from the queues using release_buffered_frames. The driver will tell the fw to let frames out even if the station is asleep. This is done by iwl_mvm_sta_modify_sleep_tx_count. Reported-and-tested-by: Jens Axboe Reported-by: Linus Torvalds Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 5 ++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 11 ++--- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 41 ++++++++----------- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 9789f3c5a785..f1231c0ea336 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2320,7 +2320,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw, { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - /* Called when we need to transmit (a) frame(s) from agg queue */ + /* Called when we need to transmit (a) frame(s) from agg or dqa queue */ iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames, tids, more_data, true); @@ -2340,7 +2340,8 @@ static void iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw, for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) { struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid]; - if (tid_data->state != IWL_AGG_ON && + if (!iwl_mvm_is_dqa_supported(mvm) && + tid_data->state != IWL_AGG_ON && tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA) continue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index e64aeb4a2204..bdd1deed55a4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3032,7 +3032,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum ieee80211_frame_release_type reason, u16 cnt, u16 tids, bool more_data, - bool agg) + bool single_sta_queue) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_add_sta_cmd cmd = { @@ -3052,14 +3052,14 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT) cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]); - /* If we're releasing frames from aggregation queues then check if the - * all queues combined that we're releasing frames from have + /* If we're releasing frames from aggregation or dqa queues then check + * if all the queues that we're releasing frames from, combined, have: * - more frames than the service period, in which case more_data * needs to be set * - fewer than 'cnt' frames, in which case we need to adjust the * firmware command (but do that unconditionally) */ - if (agg) { + if (single_sta_queue) { int remaining = cnt; int sleep_tx_count; @@ -3069,7 +3069,8 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, u16 n_queued; tid_data = &mvmsta->tid_data[tid]; - if (WARN(tid_data->state != IWL_AGG_ON && + if (WARN(!iwl_mvm_is_dqa_supported(mvm) && + tid_data->state != IWL_AGG_ON && tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA, "TID %d state is %d\n", tid, tid_data->state)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index e068d5355865..f65950e91ed5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -545,7 +545,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum ieee80211_frame_release_type reason, u16 cnt, u16 tids, bool more_data, - bool agg); + bool single_sta_queue); int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool drain); void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 092ae0024f22..7465d4db136f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH - * Copyright(c) 2016 Intel Deutschland GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -621,8 +622,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) * values. * Note that we don't need to make sure it isn't agg'd, since we're * TXing non-sta + * For DQA mode - we shouldn't increase it though */ - atomic_inc(&mvm->pending_frames[sta_id]); + if (!iwl_mvm_is_dqa_supported(mvm)) + atomic_inc(&mvm->pending_frames[sta_id]); return 0; } @@ -1009,11 +1012,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, spin_unlock(&mvmsta->lock); - /* Increase pending frames count if this isn't AMPDU */ - if ((iwl_mvm_is_dqa_supported(mvm) && - mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON && - mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) || - (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)) + /* Increase pending frames count if this isn't AMPDU or DQA queue */ + if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu) atomic_inc(&mvm->pending_frames[mvmsta->sta_id]); return 0; @@ -1083,12 +1083,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, lockdep_assert_held(&mvmsta->lock); if ((tid_data->state == IWL_AGG_ON || - tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) && + tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA || + iwl_mvm_is_dqa_supported(mvm)) && iwl_mvm_tid_queued(tid_data) == 0) { /* - * Now that this aggregation queue is empty tell mac80211 so it - * knows we no longer have frames buffered for the station on - * this TID (for the TIM bitmap calculation.) + * Now that this aggregation or DQA queue is empty tell + * mac80211 so it knows we no longer have frames buffered for + * the station on this TID (for the TIM bitmap calculation.) */ ieee80211_sta_set_buffered(sta, tid, false); } @@ -1261,7 +1262,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, u8 skb_freed = 0; u16 next_reclaimed, seq_ctl; bool is_ndp = false; - bool txq_agg = false; /* Is this TXQ aggregated */ __skb_queue_head_init(&skbs); @@ -1287,6 +1287,10 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, info->flags |= IEEE80211_TX_STAT_ACK; break; case TX_STATUS_FAIL_DEST_PS: + /* In DQA, the FW should have stopped the queue and not + * return this status + */ + WARN_ON(iwl_mvm_is_dqa_supported(mvm)); info->flags |= IEEE80211_TX_STAT_TX_FILTERED; break; default: @@ -1391,15 +1395,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, bool send_eosp_ndp = false; spin_lock_bh(&mvmsta->lock); - if (iwl_mvm_is_dqa_supported(mvm)) { - enum iwl_mvm_agg_state state; - - state = mvmsta->tid_data[tid].state; - txq_agg = (state == IWL_AGG_ON || - state == IWL_EMPTYING_HW_QUEUE_DELBA); - } else { - txq_agg = txq_id >= mvm->first_agg_queue; - } if (!is_ndp) { tid_data->next_reclaimed = next_reclaimed; @@ -1456,11 +1451,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, * If the txq is not an AMPDU queue, there is no chance we freed * several skbs. Check that out... */ - if (txq_agg) + if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue) goto out; /* We can't free more than one frame at once on a shared queue */ - WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1)); + WARN_ON(skb_freed > 1); /* If we have still frames for this STA nothing to do here */ if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id])) -- GitLab From 0a4d4dac5e342889331a88e70ee24f29c7b64873 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 6 Mar 2017 21:51:28 -0800 Subject: [PATCH 0423/1398] sched/deadline: Add missing update_rq_clock() in dl_task_timer() [ Upstream commit dcc3b5ffe1b32771c9a22e2c916fb94c4fcf5b79 ] The following warning can be triggered by hot-unplugging the CPU on which an active SCHED_DEADLINE task is running on: ------------[ cut here ]------------ WARNING: CPU: 7 PID: 0 at kernel/sched/sched.h:833 replenish_dl_entity+0x71e/0xc40 rq->clock_update_flags < RQCF_ACT_SKIP CPU: 7 PID: 0 Comm: swapper/7 Tainted: G B 4.11.0-rc1+ #24 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 Call Trace: dump_stack+0x85/0xc4 __warn+0x172/0x1b0 warn_slowpath_fmt+0xb4/0xf0 ? __warn+0x1b0/0x1b0 ? debug_check_no_locks_freed+0x2c0/0x2c0 ? cpudl_set+0x3d/0x2b0 replenish_dl_entity+0x71e/0xc40 enqueue_task_dl+0x2ea/0x12e0 ? dl_task_timer+0x777/0x990 ? __hrtimer_run_queues+0x270/0xa50 dl_task_timer+0x316/0x990 ? enqueue_task_dl+0x12e0/0x12e0 ? enqueue_task_dl+0x12e0/0x12e0 __hrtimer_run_queues+0x270/0xa50 ? hrtimer_cancel+0x20/0x20 ? hrtimer_interrupt+0x119/0x600 hrtimer_interrupt+0x19c/0x600 ? trace_hardirqs_off+0xd/0x10 local_apic_timer_interrupt+0x74/0xe0 smp_apic_timer_interrupt+0x76/0xa0 apic_timer_interrupt+0x93/0xa0 The DL task will be migrated to a suitable later deadline rq once the DL timer fires and currnet rq is offline. The rq clock of the new rq should be updated. This patch fixes it by updating the rq clock after holding the new rq's rq lock. Signed-off-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Matt Fleming Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1488865888-15894-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c95c5122b105..b0598b2bad88 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -638,6 +638,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) lockdep_unpin_lock(&rq->lock, rf.cookie); rq = dl_task_offline_migration(rq, p); rf.cookie = lockdep_pin_lock(&rq->lock); + update_rq_clock(rq); /* * Now that the task has been migrated to the new RQ and we -- GitLab From 9cc56a00eab7604f2c99ce27665dbb4fa9fa3280 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 2 Mar 2017 15:10:57 +0100 Subject: [PATCH 0424/1398] sched/deadline: Make sure the replenishment timer fires in the next period [ Upstream commit 5ac69d37784b237707a7b15d199cdb6c6fdb6780 ] Currently, the replenishment timer is set to fire at the deadline of a task. Although that works for implicit deadline tasks because the deadline is equals to the begin of the next period, that is not correct for constrained deadline tasks (deadline < period). For instance: f.c: --------------- %< --------------- int main (void) { for(;;); } --------------- >% --------------- # gcc -o f f.c # trace-cmd record -e sched:sched_switch \ -e syscalls:sys_exit_sched_setattr \ chrt -d --sched-runtime 490000000 \ --sched-deadline 500000000 \ --sched-period 1000000000 0 ./f # trace-cmd report | grep "{pid of ./f}" After setting parameters, the task is replenished and continue running until being throttled: f-11295 [003] 13322.113776: sys_exit_sched_setattr: 0x0 The task is throttled after running 492318 ms, as expected: f-11295 [003] 13322.606094: sched_switch: f:11295 [-1] R ==> watchdog/3:32 [0] But then, the task is replenished 500719 ms after the first replenishment: -0 [003] 13322.614495: sched_switch: swapper/3:0 [120] R ==> f:11295 [-1] Running for 490277 ms: f-11295 [003] 13323.104772: sched_switch: f:11295 [-1] R ==> swapper/3:0 [120] Hence, in the first period, the task runs 2 * runtime, and that is a bug. During the first replenishment, the next deadline is set one period away. So the runtime / period starts to be respected. However, as the second replenishment took place in the wrong instant, the next replenishment will also be held in a wrong instant of time. Rather than occurring in the nth period away from the first activation, it is taking place in the (nth period - relative deadline). Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Luca Abeni Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/ac50d89887c25285b47465638354b63362f8adff.1488392936.git.bristot@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b0598b2bad88..0a42be57054e 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se, } } +static inline u64 dl_next_period(struct sched_dl_entity *dl_se) +{ + return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; +} + /* * If the entity depleted all its runtime, and if we want it to sleep * while waiting for some new execution time to become available, we - * set the bandwidth enforcement timer to the replenishment instant + * set the bandwidth replenishment timer to the replenishment instant * and try to activate it. * * Notice that it is important for the caller to know if the timer @@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p) * that it is actually coming from rq->clock and not from * hrtimer's time base reading. */ - act = ns_to_ktime(dl_se->deadline); + act = ns_to_ktime(dl_next_period(dl_se)); now = hrtimer_cb_get_time(timer); delta = ktime_to_ns(now) - rq_clock(rq); act = ktime_add_ns(act, delta); -- GitLab From a2e29113f1abe5299c0af6ab841cbdddfd427fcf Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 2 Mar 2017 15:10:58 +0100 Subject: [PATCH 0425/1398] sched/deadline: Throttle a constrained deadline task activated after the deadline [ Upstream commit df8eac8cafce7d086be3bd5cf5a838fa37594dfb ] During the activation, CBS checks if it can reuse the current task's runtime and period. If the deadline of the task is in the past, CBS cannot use the runtime, and so it replenishes the task. This rule works fine for implicit deadline tasks (deadline == period), and the CBS was designed for implicit deadline tasks. However, a task with constrained deadline (deadine < period) might be awakened after the deadline, but before the next period. In this case, replenishing the task would allow it to run for runtime / deadline. As in this case deadline < period, CBS enables a task to run for more than the runtime / period. In a very loaded system, this can cause a domino effect, making other tasks miss their deadlines. To avoid this problem, in the activation of a constrained deadline task after the deadline but before the next period, throttle the task and set the replenishing timer to the begin of the next period, unless it is boosted. Reproducer: --------------- %< --------------- int main (int argc, char **argv) { int ret; int flags = 0; unsigned long l = 0; struct timespec ts; struct sched_attr attr; memset(&attr, 0, sizeof(attr)); attr.size = sizeof(attr); attr.sched_policy = SCHED_DEADLINE; attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */ attr.sched_deadline = 2 * 1000 * 1000; /* 2 ms */ attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */ ts.tv_sec = 0; ts.tv_nsec = 2000 * 1000; /* 2 ms */ ret = sched_setattr(0, &attr, flags); if (ret < 0) { perror("sched_setattr"); exit(-1); } for(;;) { /* XXX: you may need to adjust the loop */ for (l = 0; l < 150000; l++); /* * The ideia is to go to sleep right before the deadline * and then wake up before the next period to receive * a new replenishment. */ nanosleep(&ts, NULL); } exit(0); } --------------- >% --------------- On my box, this reproducer uses almost 50% of the CPU time, which is obviously wrong for a task with 2/2000 reservation. Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/edf58354e01db46bf42df8d2dd32418833f68c89.1488392936.git.bristot@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0a42be57054e..6d594e22bbc8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -695,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) timer->function = dl_task_timer; } +/* + * During the activation, CBS checks if it can reuse the current task's + * runtime and period. If the deadline of the task is in the past, CBS + * cannot use the runtime, and so it replenishes the task. This rule + * works fine for implicit deadline tasks (deadline == period), and the + * CBS was designed for implicit deadline tasks. However, a task with + * constrained deadline (deadine < period) might be awakened after the + * deadline, but before the next period. In this case, replenishing the + * task would allow it to run for runtime / deadline. As in this case + * deadline < period, CBS enables a task to run for more than the + * runtime / period. In a very loaded system, this can cause a domino + * effect, making other tasks miss their deadlines. + * + * To avoid this problem, in the activation of a constrained deadline + * task after the deadline but before the next period, throttle the + * task and set the replenishing timer to the begin of the next period, + * unless it is boosted. + */ +static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) +{ + struct task_struct *p = dl_task_of(dl_se); + struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); + + if (dl_time_before(dl_se->deadline, rq_clock(rq)) && + dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { + if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) + return; + dl_se->dl_throttled = 1; + } +} + static int dl_runtime_exceeded(struct sched_dl_entity *dl_se) { @@ -928,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) __dequeue_dl_entity(dl_se); } +static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_deadline < dl_se->dl_period; +} + static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) { struct task_struct *pi_task = rt_mutex_get_top_task(p); @@ -953,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) return; } + /* + * Check if a constrained deadline task was activated + * after the deadline but before the next period. + * If that is the case, the task will be throttled and + * the replenishment timer will be set to the next period. + */ + if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) + dl_check_constrained_dl(&p->dl); + /* * If p is throttled, we do nothing. In fact, if it exhausted * its budget it needs a replenishment and, since it now is on -- GitLab From 28714e962a719d1898b6cff5d51352bac2135534 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 15:10:59 +0100 Subject: [PATCH 0426/1398] sched/deadline: Use deadline instead of period when calculating overflow [ Upstream commit 2317d5f1c34913bac5971d93d69fb6c31bb74670 ] I was testing Daniel's changes with his test case, and tweaked it a little. Instead of having the runtime equal to the deadline, I increased the deadline ten fold. Daniel's test case had: attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */ attr.sched_deadline = 2 * 1000 * 1000; /* 2 ms */ attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */ To make it more interesting, I changed it to: attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */ attr.sched_deadline = 20 * 1000 * 1000; /* 20 ms */ attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */ The results were rather surprising. The behavior that Daniel's patch was fixing came back. The task started using much more than .1% of the CPU. More like 20%. Looking into this I found that it was due to the dl_entity_overflow() constantly returning true. That's because it uses the relative period against relative runtime vs the absolute deadline against absolute runtime. runtime / (deadline - t) > dl_runtime / dl_period There's even a comment mentioning this, and saying that when relative deadline equals relative period, that the equation is the same as using deadline instead of period. That comment is backwards! What we really want is: runtime / (deadline - t) > dl_runtime / dl_deadline We care about if the runtime can make its deadline, not its period. And then we can say "when the deadline equals the period, the equation is the same as using dl_period instead of dl_deadline". After correcting this, now when the task gets enqueued, it can throttle correctly, and Daniel's fix to the throttling of sleeping deadline tasks works even when the runtime and deadline are not the same. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/02135a27f1ae3fe5fd032568a5a2f370e190e8d7.1488392936.git.bristot@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 6d594e22bbc8..df5c32a0c6ed 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * * This function returns true if: * - * runtime / (deadline - t) > dl_runtime / dl_period , + * runtime / (deadline - t) > dl_runtime / dl_deadline , * * IOW we can't recycle current parameters. * - * Notice that the bandwidth check is done against the period. For + * Notice that the bandwidth check is done against the deadline. For * task with deadline equal to period this is the same of using - * dl_deadline instead of dl_period in the equation above. + * dl_period instead of dl_deadline in the equation above. */ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, struct sched_dl_entity *pi_se, u64 t) @@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, * of anything below microseconds resolution is actually fiction * (but still we want to give the user that illusion >;). */ - left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); + left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); right = ((dl_se->deadline - t) >> DL_SCALE) * (pi_se->dl_runtime >> DL_SCALE); -- GitLab From 2a84fce9b0396b62c6e3020b5fb9f5378697b6ae Mon Sep 17 00:00:00 2001 From: yong mao Date: Sat, 4 Mar 2017 15:10:03 +0800 Subject: [PATCH 0427/1398] mmc: mediatek: Fixed bug where clock frequency could be set wrong [ Upstream commit 40ceda09c8c84694c2ca6b00bcc6dc71e8e62d96 ] This patch can fix two issues: Issue 1: In previous code, div may be overflow when setting clock frequency as f_min. We can use DIV_ROUND_UP to fix this boundary related issue. Issue 2: In previous code, we can not set the correct clock frequency when div equals 0xff. Signed-off-by: Yong Mao Signed-off-by: Chaotian Jing Reviewed-by: Daniel Kurtz Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mtk-sd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 84e9afcb5c09..6f9535e5e584 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -579,7 +579,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz) } } sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV, - (mode << 8) | (div % 0xff)); + (mode << 8) | div); sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN); while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB)) cpu_relax(); @@ -1562,7 +1562,7 @@ static int msdc_drv_probe(struct platform_device *pdev) host->src_clk_freq = clk_get_rate(host->src_clk); /* Set host parameters to mmc */ mmc->ops = &mt_msdc_ops; - mmc->f_min = host->src_clk_freq / (4 * 255); + mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255); mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23; /* MMC core transfer sizes tunable parameters */ -- GitLab From b29c7b7c62d5aa8ba3748c6dfebeee19c279fc14 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Mar 2017 21:11:46 -0400 Subject: [PATCH 0428/1398] drm/radeon: reinstate oland workaround for sclk [ Upstream commit 66822d815ae61ecb2d9dba9031517e8a8476969d ] Higher sclks seem to be unstable on some boards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 56109b57b776..8bd9e6c371d1 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3030,9 +3030,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, max_mclk = 80000; } } else if (rdev->family == CHIP_OLAND) { - if ((rdev->pdev->device == 0x6604) && - (rdev->pdev->subsystem_vendor == 0x1028) && - (rdev->pdev->subsystem_device == 0x066F)) { + if ((rdev->pdev->revision == 0xC7) || + (rdev->pdev->revision == 0x80) || + (rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || + (rdev->pdev->device == 0x6604) || + (rdev->pdev->device == 0x6605)) { max_sclk = 75000; } } -- GitLab From c250fae9ad4bb1d629b8451c2e07b300e0fb8bb8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: [PATCH 0429/1398] afs: Fix missing put_page() [ Upstream commit 29c8bbbd6e21daa0997d1c3ee886b897ee7ad652 ] In afs_writepages_region(), inside the loop where we find dirty pages to deal with, one of the if-statements is missing a put_page(). Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index f865c3f05bea..26d809aee78b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -502,6 +502,7 @@ static int afs_writepages_region(struct address_space *mapping, if (PageWriteback(page) || !PageDirty(page)) { unlock_page(page); + put_page(page); continue; } -- GitLab From ba47c159748068a3371fdc0d77c7c1dcef2b7271 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: [PATCH 0430/1398] afs: Populate group ID from vnode status [ Upstream commit 6186f0788b31f44affceeedc7b48eb10faea120d ] The group was hard coded to GLOBAL_ROOT_GID; use the group ID that was received from the server. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 86cc7264c21c..df9fa4ddcf3a 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -70,7 +70,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; - inode->i_gid = GLOBAL_ROOT_GID; + inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; inode->i_ctime.tv_sec = vnode->status.mtime_server; inode->i_ctime.tv_nsec = 0; -- GitLab From 900048089cc135fea39452ec32aa44415e1c8dba Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 0431/1398] afs: Adjust mode bits processing [ Upstream commit 627f46943ff90bcc32ddeb675d881c043c6fa2ae ] Mode bits for an afs file should not be enforced in the usual way. For files, the absence of user bits can restrict file access with respect to what is granted by the server. These bits apply regardless of the owner or the current uid; the rest of the mode bits (group, other) are ignored. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/security.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 8d010422dc89..bfa9d3428383 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask) } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; + if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) + goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; + if (!(inode->i_mode & S_IRUSR)) + goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; + if (!(inode->i_mode & S_IWUSR)) + goto permission_denied; } } key_put(key); - ret = generic_permission(inode, mask); _leave(" = %d", ret); return ret; -- GitLab From eaaad7646d3de73ca30653e635261827242be124 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 0432/1398] afs: Deal with an empty callback array [ Upstream commit bcd89270d93b7edebb5de5e5e7dca1a77a33496e ] Servers may send a callback array that is the same size as the FID array, or an empty array. If the callback count is 0, the code would attempt to read (fid_count * 12) bytes of data, which would fail and result in an unmarshalling error. This would lead to stale data for remotely modified files or directories. Store the callback array size in the internal afs_call structure and use that to determine the amount of data to read. Signed-off-by: Marc Dionne Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/cmservice.c | 11 +++++------ fs/afs/internal.h | 5 ++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 8d2c5180e015..168f2a4d1180 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -168,7 +168,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) struct afs_callback *cb; struct afs_server *server; __be32 *bp; - u32 tmp; int ret, loop; _enter("{%u}", call->unmarshall); @@ -230,9 +229,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (ret < 0) return ret; - tmp = ntohl(call->tmp); - _debug("CB count: %u", tmp); - if (tmp != call->count && tmp != 0) + call->count2 = ntohl(call->tmp); + _debug("CB count: %u", call->count2); + if (call->count2 != call->count && call->count2 != 0) return -EBADMSG; call->offset = 0; call->unmarshall++; @@ -240,14 +239,14 @@ static int afs_deliver_cb_callback(struct afs_call *call) case 4: _debug("extract CB array"); ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, false); + call->count2 * 3 * 4, false); if (ret < 0) return ret; _debug("unmarshall CB array"); cb = call->request; bp = call->buffer; - for (loop = call->count; loop > 0; loop--, cb++) { + for (loop = call->count2; loop > 0; loop--, cb++) { cb->version = ntohl(*bp++); cb->expiry = ntohl(*bp++); cb->type = ntohl(*bp++); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 535a38d2c1d0..6961f6175529 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -105,7 +105,10 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ - unsigned last_to; /* amount of mapping[last] */ + union { + unsigned last_to; /* amount of mapping[last] */ + unsigned count2; /* count used in unmarshalling */ + }; unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ -- GitLab From 7286fad1570444b924568b13994843ab28b55dd4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: [PATCH 0433/1398] afs: Flush outstanding writes when an fd is closed [ Upstream commit 58fed94dfb17e89556b5705f20f90e5b2971b6a1 ] Flush outstanding writes in afs when an fd is closed. This is what NFS and CIFS do. Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/file.c | 1 + fs/afs/internal.h | 1 + fs/afs/write.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/fs/afs/file.c b/fs/afs/file.c index 6344aee4ac4b..72372970725b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -29,6 +29,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, + .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6961f6175529..67c63aa86b25 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -752,6 +752,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_writeback_all(struct afs_vnode *); +extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/afs/write.c b/fs/afs/write.c index 26d809aee78b..4ab108e6a3cd 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -735,6 +735,20 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +/* + * Flush out all outstanding writes on a file opened for writing when it is + * closed. + */ +int afs_flush(struct file *file, fl_owner_t id) +{ + _enter(""); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + + return vfs_fsync(file, 0); +} + /* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal -- GitLab From 7da1b85a75d4dd9b090a242ea162f27d2aea7518 Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 0434/1398] afs: Migrate vlocation fields to 64-bit [ Upstream commit 8a79790bf0b7da216627ffb85f52cfb4adbf1e4e ] get_seconds() returns real wall-clock seconds. On 32-bit systems this value will overflow in year 2038 and beyond. This patch changes afs's vlocation record to use ktime_get_real_seconds() instead, for the fields time_of_death and update_at. Signed-off-by: Tina Ruchandani Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/callback.c | 7 ++++--- fs/afs/internal.h | 7 ++++--- fs/afs/server.c | 6 +++--- fs/afs/vlocation.c | 16 +++++++++------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 1e9d2f84e5b5..1592dc613200 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work) { struct afs_server *server; struct afs_vnode *vnode, *xvnode; - time_t now; + time64_t now; long timeout; int ret; @@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find the first vnode to update */ spin_lock(&server->cb_lock); @@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vnode->update_at = get_seconds() + afs_vnode_update_timeout; + vnode->update_at = ktime_get_real_seconds() + + afs_vnode_update_timeout; spin_lock(&server->cb_lock); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 67c63aa86b25..aaa607a969cf 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -245,7 +246,7 @@ struct afs_cache_vhash { */ struct afs_vlocation { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct list_head link; /* link in cell volume location list */ struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ @@ -256,7 +257,7 @@ struct afs_vlocation { struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ wait_queue_head_t waitq; /* status change waitqueue */ - time_t update_at; /* time at which record should be updated */ + time64_t update_at; /* time at which record should be updated */ spinlock_t lock; /* access lock */ afs_vlocation_state_t state; /* volume location state */ unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ @@ -269,7 +270,7 @@ struct afs_vlocation { */ struct afs_server { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct in_addr addr; /* server address */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ diff --git a/fs/afs/server.c b/fs/afs/server.c index d4066ab7dd55..c001b1f2455f 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server) spin_lock(&afs_server_graveyard_lock); if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &afs_server_graveyard); - server->time_of_death = get_seconds(); + server->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_server_reaper, afs_server_timeout * HZ); } @@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work) LIST_HEAD(corpses); struct afs_server *server; unsigned long delay, expiry; - time_t now; + time64_t now; - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_server_graveyard_lock); while (!list_empty(&afs_server_graveyard)) { diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 45a86396fd2d..92bd5553b8c9 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) struct afs_vlocation *xvl; /* wait at least 10 minutes before updating... */ - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); @@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl) if (atomic_read(&vl->usage) == 0) { _debug("buried"); list_move_tail(&vl->grave, &afs_vlocation_graveyard); - vl->time_of_death = get_seconds(); + vl->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_vlocation_reap, afs_vlocation_timeout * HZ); @@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work) LIST_HEAD(corpses); struct afs_vlocation *vl; unsigned long delay, expiry; - time_t now; + time64_t now; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_vlocation_graveyard_lock); while (!list_empty(&afs_vlocation_graveyard)) { @@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; struct afs_vlocation *vl, *xvl; - time_t now; + time64_t now; long timeout; int ret; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find a record to update */ spin_lock(&afs_vlocation_updates_lock); @@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); -- GitLab From 9329ae4cb10e2563b8ec15a3d6a2a15dfe16fd3d Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 0435/1398] afs: Prevent callback expiry timer overflow [ Upstream commit 56e714312e7dbd6bb83b2f78d3ec19a404c7649f ] get_seconds() returns real wall-clock seconds. On 32-bit systems this value will overflow in year 2038 and beyond. This patch changes afs_vnode record to use ktime_get_real_seconds() instead, for the fields cb_expires and cb_expires_at. Signed-off-by: Tina Ruchandani Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/fsclient.c | 2 +- fs/afs/inode.c | 7 ++++--- fs/afs/internal.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 31c616ab9b40..fed95c34bdc0 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -139,7 +139,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) vnode->cb_version = ntohl(*bp++); vnode->cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); *_bp = bp; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index df9fa4ddcf3a..67295fc49161 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -245,12 +245,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = 0; vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = get_seconds(); + vnode->cb_expires = ktime_get_real_seconds(); } else { vnode->cb_version = cb->version; vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + + ktime_get_real_seconds(); } } @@ -323,7 +324,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < get_seconds() + 10) { + if (vnode->cb_expires < ktime_get_real_seconds() + 10) { _debug("callback expired"); set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); } else { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index aaa607a969cf..dd98dcda6a3f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -373,8 +373,8 @@ struct afs_vnode { struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ struct work_struct cb_broken_work; /* work to be done on callback break */ - time_t cb_expires; /* time at which callback expires */ - time_t cb_expires_at; /* time used to order cb_promise */ + time64_t cb_expires; /* time at which callback expires */ + time64_t cb_expires_at; /* time used to order cb_promise */ unsigned cb_version; /* callback version */ unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ -- GitLab From 972e7b7cbf5c82e9b0b963c9443a30dd3354687b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 0436/1398] afs: Fix the maths in afs_fs_store_data() [ Upstream commit 146a1192783697810b63a1e41c4d59fc93387340 ] afs_fs_store_data() works out of the size of the write it's going to make, but it uses 32-bit unsigned subtraction in one place that gets automatically cast to loff_t. However, if to < offset, then the number goes negative, but as the result isn't signed, this doesn't get sign-extended to 64-bits when placed in a loff_t. Fix by casting the operands to loff_t. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/fsclient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index fed95c34bdc0..2ae86018fcb7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1178,7 +1178,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, _enter(",%x,{%x:%u},,", key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); - size = to - offset; + size = (loff_t)to - (loff_t)offset; if (first != last) size += (loff_t)(last - first) << PAGE_SHIFT; pos = (loff_t)first << PAGE_SHIFT; -- GitLab From ab239061161929a3aa048cb514b0183b8742b04c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 0437/1398] afs: Invalid op ID should abort with RXGEN_OPCODE [ Upstream commit 1157f153f37a8586765034470e4f00a4a6c4ce6f ] When we are given an invalid operation ID, we should abort that with RXGEN_OPCODE rather than RX_INVALID_OPERATION. Also map RXGEN_OPCODE to -ENOTSUPP. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/misc.c | 2 ++ fs/afs/rxrpc.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 91ea1aa0d8b3..100b207efc9e 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGEN_OPCODE: return -ENOTSUPP; + default: return -EREMOTEIO; } } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 25f05a8d21b1..01483f84c0f4 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -440,7 +440,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code, -ret, "KNC"); goto do_abort; case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; + abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); goto do_abort; -- GitLab From a3e7a29abf0bf205292ee63bd5d1de54aaa6d18f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 0438/1398] afs: Better abort and net error handling [ Upstream commit 70af0e3bd65142f9e674961c975451638a7ce1d5 ] If we receive a network error, a remote abort or a protocol error whilst we're still transmitting data, make sure we return an appropriate error to the caller rather than ESHUTDOWN or ECONNABORTED. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/rxrpc.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 01483f84c0f4..ee4c11489edf 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -321,6 +321,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; + size_t offset; + u32 abort_code; int ret; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -368,9 +370,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); - /* have to change the state *before* sending the last packet as RxRPC - * might give us the reply before it returns from sending the - * request */ + /* We have to change the state *before* sending the last packet as + * rxrpc might give us the reply before it returns from sending the + * request. Further, if the send fails, we may already have been given + * a notification and may have collected it. + */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, rxcall, @@ -389,7 +393,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); + call->state = AFS_CALL_COMPLETE; + if (ret != -ECONNABORTED) { + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, + -ret, "KSD"); + } else { + abort_code = 0; + offset = 0; + rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, + false, &abort_code); + ret = call->type->abort_to_error(abort_code); + } error_kill_call: afs_end_call(call); _leave(" = %d", ret); @@ -434,16 +448,18 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; + case -ECONNABORTED: + goto call_complete; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KNC"); - goto do_abort; + goto save_error; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); - goto do_abort; + goto save_error; case -ENODATA: case -EBADMSG: case -EMSGSIZE: @@ -453,7 +469,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, EBADMSG, "KUM"); - goto do_abort; + goto save_error; } } @@ -464,8 +480,9 @@ static void afs_deliver_to_call(struct afs_call *call) _leave(""); return; -do_abort: +save_error: call->error = ret; +call_complete: call->state = AFS_CALL_COMPLETE; goto done; } @@ -513,6 +530,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) _debug("call incomplete"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD, -ret, abort_why); + } else if (call->error < 0) { + ret = call->error; } _debug("call complete"); -- GitLab From 833acb3e09dba37c277ed16bc46792c310066264 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 0439/1398] afs: Populate and use client modification time [ Upstream commit ab94f5d0dd6fd82e7eeca5e7c8096eaea0a0261f ] The inode timestamps should be set from the client time in the status received from the server, rather than the server time which is meant for internal server use. Set AFS_SET_MTIME and populate the mtime for operations that take an input status, such as file/dir creation and StoreData. If an input time is not provided the server will set the vnode times based on the current server time. In a situation where the server has some skew with the client, this could lead to the client seeing a timestamp in the future for a file that it just created or wrote. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/fsclient.c | 18 +++++++++--------- fs/afs/inode.c | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 2ae86018fcb7..88e440607ed7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -105,7 +105,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_mode = mode; } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; + vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_version = data_version; @@ -676,8 +676,8 @@ int afs_fs_create(struct afs_server *server, memset(bp, 0, padsz); bp = (void *) bp + padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ @@ -945,8 +945,8 @@ int afs_fs_symlink(struct afs_server *server, memset(bp, 0, c_padsz); bp = (void *) bp + c_padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ @@ -1145,8 +1145,8 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ @@ -1222,8 +1222,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 67295fc49161..42582e41948f 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -72,7 +72,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_uid = vnode->status.owner; inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_server; + inode->i_ctime.tv_sec = vnode->status.mtime_client; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; -- GitLab From 856bb4b609ee1cac2a5c74a30c40ad82e782dabf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: [PATCH 0440/1398] afs: Fix page leak in afs_write_begin() [ Upstream commit 6d06b0d25209c80e99c1e89700f1e09694a3766b ] afs_write_begin() leaks a ref and a lock on a page if afs_fill_page() fails. Fix the leak by unlocking and releasing the page in the error path. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/write.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 4ab108e6a3cd..f9e5994e80ab 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -148,12 +148,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping, kfree(candidate); return -ENOMEM; } - *pagep = page; - /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); if (ret < 0) { + unlock_page(page); + put_page(page); kfree(candidate); _leave(" = %d [prep]", ret); return ret; @@ -161,6 +161,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + /* page won't leak in error case: it eventually gets cleaned off LRU */ + *pagep = page; + try_again: spin_lock(&vnode->writeback_lock); -- GitLab From d43dda072544ecf7890eab6a920fb602c9aafa63 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: [PATCH 0441/1398] afs: Fix afs_kill_pages() [ Upstream commit 7286a35e893176169b09715096a4aca557e2ccd2 ] Fix afs_kill_pages() in two ways: (1) If a writeback has been partially flushed, then if we try and kill the pages it contains, some of them may no longer be undergoing writeback and end_page_writeback() will assert. Fix this by checking to see whether the page in question is actually undergoing writeback before ending that writeback. (2) The loop that scans for pages to kill doesn't increase the first page index, and so the loop may not terminate, but it will try to process the same pages over and over again. Fix this by increasing the first page index to one after the last page we processed. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/write.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index f9e5994e80ab..3fba2b573c86 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -299,10 +299,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - ClearPageUptodate(pv.pages[loop]); + struct page *page = pv.pages[loop]; + ClearPageUptodate(page); if (error) - SetPageError(pv.pages[loop]); - end_page_writeback(pv.pages[loop]); + SetPageError(page); + if (PageWriteback(page)) + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; } __pagevec_release(&pv); -- GitLab From a8939aac82b0802fa66ca6455ed8bc88913556cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: [PATCH 0442/1398] afs: Fix abort on signal while waiting for call completion [ Upstream commit 954cd6dc02a65065aecb7150962c0870c5b0e322 ] Fix the way in which a call that's in progress and being waited for is aborted in the case that EINTR is detected. We should be sending RX_USER_ABORT rather than RX_CALL_DEAD as the abort code. Note that since the only two ways out of the loop are if the call completes or if a signal happens, the kill-the-call clause after the loop has finished can only happen in the case of EINTR. This means that we only have one abort case to deal with, not two, and the "KWC" case can never happen and so can be deleted. Note further that simply aborting the call isn't necessarily the best thing here since at this point: the request has been entirely sent and it's likely the server will do the operation anyway - whether we abort it or not. In future, we should punt the handling of the remainder of the call off to a background thread. Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/rxrpc.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ee4c11489edf..523b1d3ca2c6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -492,7 +492,6 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -511,13 +510,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } - abort_why = "KWC"; - ret = call->error; - if (call->state == AFS_CALL_COMPLETE) - break; - abort_why = "KWI"; - ret = -EINTR; - if (signal_pending(current)) + if (call->state == AFS_CALL_COMPLETE || + signal_pending(current)) break; schedule(); } @@ -525,15 +519,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* kill the call */ + /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { - _debug("call incomplete"); + _debug("call interrupted"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD, -ret, abort_why); - } else if (call->error < 0) { - ret = call->error; + RX_USER_ABORT, -EINTR, "KWI"); } + ret = call->error; _debug("call complete"); afs_end_call(call); _leave(" = %d", ret); -- GitLab From af0cee086b0920612de5d6ad2a8f406792b5aa9d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 27 Feb 2017 18:44:45 +0200 Subject: [PATCH 0443/1398] nvme-loop: fix a possible use-after-free when destroying the admin queue [ Upstream commit e4c5d3762e2d6d274bd1cc948c47063becfa2103 ] we need to destroy the nvmet sq and let it finish gracefully before continue to cleanup the queue. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d5df77d686b2..c8e612c1c72f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -288,9 +288,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) -- GitLab From 571e47760de55281834f28a54f75d51285e7787d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 6 Mar 2017 18:46:20 +0200 Subject: [PATCH 0444/1398] nvmet: confirm sq percpu has scheduled and switched to atomic [ Upstream commit d11ea004a458b982e19b188c386e25a9b66ec446 ] percpu_ref_kill is not enough to prevent subsequent percpu_ref_tryget_live from failing. Hence call perfcpu_ref_kill_confirm to make it safe. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/core.c | 11 ++++++++++- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index fbd6d487103f..c89d68a76f3d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -422,6 +422,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, ctrl->sqs[qid] = sq; } +static void nvmet_confirm_sq(struct percpu_ref *ref) +{ + struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); + + complete(&sq->confirm_done); +} + void nvmet_sq_destroy(struct nvmet_sq *sq) { /* @@ -430,7 +437,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) */ if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) nvmet_async_events_free(sq->ctrl); - percpu_ref_kill(&sq->ref); + percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); + wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); @@ -458,6 +466,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) return ret; } init_completion(&sq->free_done); + init_completion(&sq->confirm_done); return 0; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7655a351320f..26b87dc843d2 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -73,6 +73,7 @@ struct nvmet_sq { u16 qid; u16 size; struct completion free_done; + struct completion confirm_done; }; /** -- GitLab From 65bfe003dcebd1144edacedb43c2f96d813bab79 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 9 Mar 2017 13:45:52 +0200 Subject: [PATCH 0445/1398] nvmet-rdma: Fix a possible uninitialized variable dereference [ Upstream commit b25634e2a051bef4b2524b11adddfbfa6448f6cd ] When handling a new recv command, we grab a new rsp resource and check for the queue state being live. In case the queue is not in live state, we simply restore the rsp back to the free list. However in this flow we didn't set rsp->queue yet, so we cannot dereference it. Instead, make sure to initialize rsp->queue (and other rsp members) as soon as possible so we won't reference uninitialized variables. Reported-by: Yi Zhang Reported-by: Raju Rangoju Reviewed-by: Christoph Hellwig Tested-by: Raju Rangoju Signed-off-by: Sagi Grimberg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/rdma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ca8ddc3fb19e..53bd32550867 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, { u16 status; - cmd->queue = queue; - cmd->n_rdma = 0; - cmd->req.port = queue->port; - - ib_dma_sync_single_for_cpu(queue->dev->device, cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, DMA_FROM_DEVICE); @@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); + rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; rsp->req.cmd = cmd->nvme_cmd; + rsp->req.port = queue->port; + rsp->n_rdma = 0; if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; -- GitLab From fd27dbcae9371d0f15b3f0fcb69a544e41d819ba Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 13 Mar 2017 19:29:08 +0200 Subject: [PATCH 0446/1398] net/mlx4_core: Avoid delays during VF driver device shutdown [ Upstream commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4 ] Some Hypervisors detach VFs from VMs by instantly causing an FLR event to be generated for a VF. In the mlx4 case, this will cause that VF's comm channel to be disabled before the VM has an opportunity to invoke the VF device's "shutdown" method. For such Hypervisors, there is a race condition between the VF's shutdown method and its internal-error detection/reset thread. The internal-error detection/reset thread (which runs every 5 seconds) also detects a disabled comm channel. If the internal-error detection/reset flow wins the race, we still get delays (while that flow tries repeatedly to detect comm-channel recovery). The cited commit fixed the command timeout problem when the internal-error detection/reset flow loses the race. This commit avoids the unneeded delays when the internal-error detection/reset flow wins. Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") Signed-off-by: Jack Morgenstein Reported-by: Simon Xiao Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ include/linux/mlx4/device.h | 1 + 3 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e36bebcab3f2..dae9dcfa8f36 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2304,6 +2304,17 @@ static int sync_toggles(struct mlx4_dev *dev) rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { /* PCI might be offline */ + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) { + mlx4_warn(dev, + "communication channel is offline\n"); + return -EIO; + } + msleep(100); wr_toggle = swab32(readl(&priv->mfunc.comm-> slave_write)); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 727122de7df0..5411ca48978a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); if (!offline_bit) return 0; + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) + break; + /* There are cases as part of AER/Reset flow that PF needs * around 100 msec to load. We therefore sleep for 100 msec * to allow other tasks to make use of that CPU during this @@ -3954,6 +3962,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + if (mlx4_is_slave(dev)) + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; + mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80faf44b8887..dd1b009106a5 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,6 +476,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- GitLab From 42b6d6e824d349eef8a3d1925a2667564a31b8b8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 13 Mar 2017 16:49:10 -0700 Subject: [PATCH 0447/1398] net: mpls: Fix nexthop alive tracking on down events [ Upstream commit 61733c91c454a61be0ffc93fe46a5d5f2f048c1c ] Alive tracking of nexthops can account for a link twice if the carrier goes down followed by an admin down of the same link rendering multipath routes useless. This is similar to 79099aab38c8 for UNREGISTER events and DOWN events. Fix by tracking number of alive nexthops in mpls_ifdown similar to the logic in mpls_ifup. Checking the flags per nexthop once after all events have been processed is simpler than trying to maintian a running count through all event combinations. Also, WRITE_ONCE is used instead of ACCESS_ONCE to set rt_nhn_alive per a comment from checkpatch: WARNING: Prefer WRITE_ONCE(, ) over ACCESS_ONCE() = Fixes: c89359a42e2a4 ("mpls: support for dead routes") Signed-off-by: David Ahern Acked-by: Robert Shearman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 1309e2c34764..c5a5a6959c1b 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -937,6 +937,8 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN; + unsigned int alive; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -946,9 +948,11 @@ static void mpls_ifdown(struct net_device *dev, int event) if (!rt) continue; + alive = 0; change_nexthops(rt) { if (rtnl_dereference(nh->nh_dev) != dev) - continue; + goto next; + switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: @@ -956,13 +960,16 @@ static void mpls_ifdown(struct net_device *dev, int event) /* fall through */ case NETDEV_CHANGE: nh->nh_flags |= RTNH_F_LINKDOWN; - if (event != NETDEV_UNREGISTER) - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); +next: + if (!(nh->nh_flags & nh_flags)) + alive++; } endfor_nexthops(rt); + + WRITE_ONCE(rt->rt_nhn_alive, alive); } } -- GitLab From 3d57ec51d204d220d0eff1e903fb58af86a889de Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:10 +0000 Subject: [PATCH 0448/1398] rxrpc: Ignore BUSY packets on old calls [ Upstream commit 4d4a6ac73e7466c2085c307fac41f74ce4568a45 ] If we receive a BUSY packet for a call we think we've just completed, the packet is handed off to the connection processor to deal with - but the connection processor doesn't expect a BUSY packet and so flags a protocol error. Fix this by simply ignoring the BUSY packet for the moment. The symptom of this may appear as a system call failing with EPROTO. This may be triggered by pressing ctrl-C under some circumstances. This comes about we abort calls due to interruption by a signal (which we shouldn't do, but that's going to be a large fix and mostly in fs/afs/). What happens is that we abort the call and may also abort follow up calls too (this needs offloading somehoe). So we see a transmission of something like the following sequence of packets: DATA for call N ABORT call N DATA for call N+1 ABORT call N+1 in very quick succession on the same channel. However, the peer may have deferred the processing of the ABORT from the call N to a background thread and thus sees the DATA message from the call N+1 coming in before it has cleared the channel. Thus it sends a BUSY packet[*]. [*] Note that some implementations (OpenAFS, for example) mark the BUSY packet with one plus the callNumber of the call prior to call N. Ordinarily, this would be call N, but there's no requirement for the calls on a channel to be numbered strictly sequentially (the number is required to increase). This is wrong and means that the callNumber in the BUSY packet should be ignored (it really ought to be N+1 since that's what it's in response to). Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/conn_event.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 3f9d8d7ec632..b099b64366f3 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, rxrpc_conn_retransmit_call(conn, skb); return 0; + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &wtmp, sizeof(wtmp)) < 0) -- GitLab From 70f450fc8610464d4ac2274b403b8b15797c27af Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sat, 4 Mar 2017 14:55:19 +0100 Subject: [PATCH 0449/1398] tty: don't panic on OOM in tty_set_ldisc() [ Upstream commit 5362544bebe85071188dd9e479b5a5040841c895 ] If tty_ldisc_open() fails in tty_set_ldisc(), it tries to go back to the old discipline or N_TTY. But that can fail as well, in such case it panics. This is not a graceful way to handle OOM. Leave ldisc==NULL if all attempts fail instead. Also use existing tty_ldisc_reinit() helper function instead of tty_ldisc_restore(). Also don't WARN/BUG in tty_ldisc_reinit() if N_TTY fails, which would have the same net effect of bringing kernel down on OOM. Instead print a single line message about what has happened. Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Peter Hurley Cc: One Thousand Gnomes Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 85 ++++++++--------------------------------- 1 file changed, 16 insertions(+), 69 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 68947f6de5ad..c3956ca022e4 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -488,41 +488,6 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) tty_ldisc_debug(tty, "%p: closed\n", ld); } -/** - * tty_ldisc_restore - helper for tty ldisc change - * @tty: tty to recover - * @old: previous ldisc - * - * Restore the previous line discipline or N_TTY when a line discipline - * change fails due to an open error - */ - -static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) -{ - struct tty_ldisc *new_ldisc; - int r; - - /* There is an outstanding reference here so this is safe */ - old = tty_ldisc_get(tty, old->ops->num); - WARN_ON(IS_ERR(old)); - tty->ldisc = old; - tty_set_termios_ldisc(tty, old->ops->num); - if (tty_ldisc_open(tty, old) < 0) { - tty_ldisc_put(old); - /* This driver is always present */ - new_ldisc = tty_ldisc_get(tty, N_TTY); - if (IS_ERR(new_ldisc)) - panic("n_tty: get"); - tty->ldisc = new_ldisc; - tty_set_termios_ldisc(tty, N_TTY); - r = tty_ldisc_open(tty, new_ldisc); - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty), r); - } -} - /** * tty_set_ldisc - set line discipline * @tty: the terminal to set @@ -536,12 +501,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) int tty_set_ldisc(struct tty_struct *tty, int disc) { - int retval; - struct tty_ldisc *old_ldisc, *new_ldisc; - - new_ldisc = tty_ldisc_get(tty, disc); - if (IS_ERR(new_ldisc)) - return PTR_ERR(new_ldisc); + int retval, old_disc; tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); @@ -554,7 +514,8 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) } /* Check the no-op case */ - if (tty->ldisc->ops->num == disc) + old_disc = tty->ldisc->ops->num; + if (old_disc == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { @@ -563,34 +524,25 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) goto out; } - old_ldisc = tty->ldisc; - - /* Shutdown the old discipline. */ - tty_ldisc_close(tty, old_ldisc); - - /* Now set up the new line discipline. */ - tty->ldisc = new_ldisc; - tty_set_termios_ldisc(tty, disc); - - retval = tty_ldisc_open(tty, new_ldisc); + retval = tty_ldisc_reinit(tty, disc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ - tty_ldisc_put(new_ldisc); - tty_ldisc_restore(tty, old_ldisc); + if (tty_ldisc_reinit(tty, old_disc) < 0) { + pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n"); + if (tty_ldisc_reinit(tty, N_TTY) < 0) { + /* At this point we have tty->ldisc == NULL. */ + pr_err("tty: reinitializing N_TTY failed\n"); + } + } } - if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { + if (tty->ldisc && tty->ldisc->ops->num != old_disc && + tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } - /* At this point we hold a reference to the new ldisc and a - reference to the old ldisc, or we hold two references to - the old ldisc (if it was restored as part of error cleanup - above). In either case, releasing a single reference from - the old ldisc is correct. */ - new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); @@ -598,7 +550,6 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) already running */ tty_buffer_restart_work(tty->port); err: - tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } @@ -659,10 +610,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) int retval; ld = tty_ldisc_get(tty, disc); - if (IS_ERR(ld)) { - BUG_ON(disc == N_TTY); + if (IS_ERR(ld)) return PTR_ERR(ld); - } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); @@ -674,10 +623,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - if (!WARN_ON(disc == N_TTY)) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; - } + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; } return retval; } -- GitLab From 2a47e7de0822ec4794c6a44d8a43c66c49f4d355 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sat, 4 Mar 2017 13:46:12 +0100 Subject: [PATCH 0450/1398] tty: fix data race in tty_ldisc_ref_wait() [ Upstream commit a4a3e061149f09c075f108b6f1cf04d9739a6bc2 ] tty_ldisc_ref_wait() checks tty->ldisc under tty->ldisc_sem. But if ldisc==NULL it releases them sem and reloads tty->ldisc without holding the sem. This is wrong and can lead to returning non-NULL ldisc without protection. Don't reload tty->ldisc second time. Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Peter Hurley Cc: One Thousand Gnomes Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index c3956ca022e4..b0500a0a87b8 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -271,10 +271,13 @@ const struct file_operations tty_ldiscs_proc_fops = { struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { + struct tty_ldisc *ld; + ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT); - if (!tty->ldisc) + ld = tty->ldisc; + if (!ld) ldsem_up_read(&tty->ldisc_sem); - return tty->ldisc; + return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); -- GitLab From e30b840d466898a02df6986dbb633b967f0ea594 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Mar 2017 22:53:37 +0100 Subject: [PATCH 0451/1398] perf symbols: Fix symbols__fixup_end heuristic for corner cases [ Upstream commit e7ede72a6d40cb3a30c087142d79381ca8a31dab ] The current symbols__fixup_end() heuristic for the last entry in the rb tree is suboptimal as it leads to not being able to recognize the symbol in the call graph in a couple of corner cases, for example: i) If the symbol has a start address (f.e. exposed via kallsyms) that is at a page boundary, then the roundup(curr->start, 4096) for the last entry will result in curr->start == curr->end with a symbol length of zero. ii) If the symbol has a start address that is shortly before a page boundary, then also here, curr->end - curr->start will just be very few bytes, where it's unrealistic that we could perform a match against. Instead, change the heuristic to roundup(curr->start, 4096) + 4096, so that we can catch such corner cases and have a better chance to find that specific symbol. It's still just best effort as the real end of the symbol is unknown to us (and could even be at a larger offset than the current range), but better than the current situation. Alexei reported that he recently run into case i) with a JITed eBPF program (these are all page aligned) as the last symbol which wasn't properly shown in the call graph (while other eBPF program symbols in the rb tree were displayed correctly). Since this is a generic issue, lets try to improve the heuristic a bit. Reported-and-Tested-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Fixes: 2e538c4a1847 ("perf tools: Improve kernel/modules symbol lookup") Link: http://lkml.kernel.org/r/bb5c80d27743be6f12afc68405f1956a330e1bc9.1489614365.git.daniel@iogearbox.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f7b35e178582..f199d5b11d76 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols) /* Last entry */ if (curr->end == curr->start) - curr->end = roundup(curr->start, 4096); + curr->end = roundup(curr->start, 4096) + 4096; } void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) -- GitLab From 88af4e34771c88a09ed722bbf4346a2ddfee6dbe Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 7 Feb 2017 13:08:23 -0600 Subject: [PATCH 0452/1398] efi/esrt: Cleanup bad memory map log messages [ Upstream commit 822f5845f710e57d7e2df1fd1ee00d6e19d334fe ] The Intel Compute Stick STCK1A8LFC and Weibu F3C platforms both log 2 error messages during boot: efi: requested map not found. esrt: ESRT header is not in the memory map. Searching the web, this seems to affect many other platforms too. Since these messages are logged as errors, they appear on-screen during the boot process even when using the "quiet" boot parameter used by distros. Demote the ESRT error to a warning so that it does not appear on-screen, and delete the error logging from efi_mem_desc_lookup; both callsites of that function log more specific messages upon failure. Out of curiosity I looked closer at the Weibu F3C. There is no entry in the UEFI-provided memory map which corresponds to the ESRT pointer, but hacking the code to map it anyway, the ESRT does appear to be valid with 2 entries. Signed-off-by: Daniel Drake Cc: Matt Fleming Acked-by: Peter Jones Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 1 - drivers/firmware/efi/esrt.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 2f48f848865f..2f47c5b5f4cb 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -384,7 +384,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) return 0; } } - pr_err_once("requested map not found.\n"); return -ENOENT; } diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 307ec1c11276..311c9d0e8cbb 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -251,7 +251,7 @@ void __init efi_esrt_init(void) rc = efi_mem_desc_lookup(efi.esrt, &md); if (rc < 0) { - pr_err("ESRT header is not in the memory map.\n"); + pr_warn("ESRT header is not in the memory map.\n"); return; } -- GitLab From 5d460d359abefe59e1cac738d37296f3cd4bc1f8 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 8 Mar 2017 14:39:15 -0500 Subject: [PATCH 0453/1398] NFSv4.1 respect server's max size in CREATE_SESSION [ Upstream commit 033853325fe3bdc70819a8b97915bd3bca41d3af ] Currently client doesn't respect max sizes server returns in CREATE_SESSION. nfs4_session_set_rwsize() gets called and server->rsize, server->wsize are 0 so they never get set to the sizes returned by the server. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 074ac7131459..f6b0848cc831 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1004,9 +1004,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (server->rsize > server_resp_sz) + if (!server->rsize || server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) + if (!server->wsize || server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } -- GitLab From 8f60ef94477cb0dd3023b337640ea634886ac1d1 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 10 Mar 2017 16:45:44 -0500 Subject: [PATCH 0454/1398] btrfs: add missing memset while reading compressed inline extents [ Upstream commit e1699d2d7bf6e6cce3e1baff19f9dd4595a58664 ] This is a story about 4 distinct (and very old) btrfs bugs. Commit c8b978188c ("Btrfs: Add zlib compression support") added three data corruption bugs for inline extents (bugs #1-3). Commit 93c82d5750 ("Btrfs: zero page past end of inline file items") fixed bug #1: uncompressed inline extents followed by a hole and more extents could get non-zero data in the hole as they were read. The fix was to add a memset in btrfs_get_extent to zero out the hole. Commit 166ae5a418 ("btrfs: fix inline compressed read err corruption") fixed bug #2: compressed inline extents which contained non-zero bytes might be replaced with zero bytes in some cases. This patch removed an unhelpful memset from uncompress_inline, but the case where memset is required was missed. There is also a memset in the decompression code, but this only covers decompressed data that is shorter than the ram_bytes from the extent ref record. This memset doesn't cover the region between the end of the decompressed data and the end of the page. It has also moved around a few times over the years, so there's no single patch to refer to. This patch fixes bug #3: compressed inline extents followed by a hole and more extents could get non-zero data in the hole as they were read (i.e. bug #3 is the same as bug #1, but s/uncompressed/compressed/). The fix is the same: zero out the hole in the compressed case too, by putting a memset back in uncompress_inline, but this time with correct parameters. The last and oldest bug, bug #0, is the cause of the offending inline extent/hole/extent pattern. Bug #0 is a subtle and mostly-harmless quirk of behavior somewhere in the btrfs write code. In a few special cases, an inline extent and hole are allowed to persist where they normally would be combined with later extents in the file. A fast reproducer for bug #0 is presented below. A few offending extents are also created in the wild during large rsync transfers with the -S flag. A Linux kernel build (git checkout; make allyesconfig; make -j8) will produce a handful of offending files as well. Once an offending file is created, it can present different content to userspace each time it is read. Bug #0 is at least 4 and possibly 8 years old. I verified every vX.Y kernel back to v3.5 has this behavior. There are fossil records of this bug's effects in commits all the way back to v2.6.32. I have no reason to believe bug #0 wasn't present at the beginning of btrfs compression support in v2.6.29, but I can't easily test kernels that old to be sure. It is not clear whether bug #0 is worth fixing. A fix would likely require injecting extra reads into currently write-only paths, and most of the exceptional cases caused by bug #0 are already handled now. Whether we like them or not, bug #0's inline extents followed by holes are part of the btrfs de-facto disk format now, and we need to be able to read them without data corruption or an infoleak. So enough about bug #0, let's get back to bug #3 (this patch). An example of on-disk structure leading to data corruption found in the wild: item 61 key (606890 INODE_ITEM 0) itemoff 9662 itemsize 160 inode generation 50 transid 50 size 47424 nbytes 49141 block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0 flags 0x0(none) item 62 key (606890 INODE_REF 603050) itemoff 9642 itemsize 20 inode ref index 3 namelen 10 name: DB_File.so item 63 key (606890 EXTENT_DATA 0) itemoff 8280 itemsize 1362 inline extent data size 1341 ram 4085 compress(zlib) item 64 key (606890 EXTENT_DATA 4096) itemoff 8227 itemsize 53 extent data disk byte 5367308288 nr 20480 extent data offset 0 nr 45056 ram 45056 extent compression(zlib) Different data appears in userspace during each read of the 11 bytes between 4085 and 4096. The extent in item 63 is not long enough to fill the first page of the file, so a memset is required to fill the space between item 63 (ending at 4085) and item 64 (beginning at 4096) with zero. Here is a reproducer from Liu Bo, which demonstrates another method of creating the same inline extent and hole pattern: Using 'page_poison=on' kernel command line (or enable CONFIG_PAGE_POISONING) run the following: # touch foo # chattr +c foo # xfs_io -f -c "pwrite -W 0 1000" foo # xfs_io -f -c "falloc 4 8188" foo # od -x foo # echo 3 >/proc/sys/vm/drop_caches # od -x foo This produce the following on my box: Correct output: file contains 1000 data bytes followed by zeros: 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0001740 cdcd cdcd cdcd cdcd 0000 0000 0000 0000 0001760 0000 0000 0000 0000 0000 0000 0000 0000 * 0020000 Actual output: the data after the first 1000 bytes will be different each run: 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0001740 cdcd cdcd cdcd cdcd 6c63 7400 635f 006d 0001760 5f74 6f43 7400 435f 0053 5f74 7363 7400 0002000 435f 0056 5f74 6164 7400 645f 0062 5f74 (...) Signed-off-by: Zygo Blaxell Reviewed-by: Liu Bo Reviewed-by: Chris Mason Signed-off-by: Chris Mason Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f089d7d8afe7..894d56361ea9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6812,6 +6812,20 @@ static noinline int uncompress_inline(struct btrfs_path *path, max_size = min_t(unsigned long, PAGE_SIZE, max_size); ret = btrfs_decompress(compress_type, tmp, page, extent_offset, inline_size, max_size); + + /* + * decompression code contains a memset to fill in any space between the end + * of the uncompressed data and the end of max_size in case the decompressed + * data ends up shorter than ram_bytes. That doesn't cover the hole between + * the end of an inline extent and the beginning of the next block, so we + * cover that region here. + */ + + if (max_size + pg_offset < PAGE_SIZE) { + char *map = kmap(page); + memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset); + kunmap(page); + } kfree(tmp); return ret; } -- GitLab From 0d34f4770ea11c92af2c6a15c73aaebc75ea94ed Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:26 -0600 Subject: [PATCH 0455/1398] target: Use system workqueue for ALUA transitions [ Upstream commit 207ee84133c00a8a2a5bdec94df4a5b37d78881c ] If tcmu-runner is processing a STPG and needs to change the kernel's ALUA state then we cannot use the same work queue for task management requests and ALUA transitions, because we could deadlock. The problem occurs when a STPG times out before tcmu-runner is able to call into target_tg_pt_gp_alua_access_state_store-> core_alua_do_port_transition -> core_alua_do_transition_tg_pt -> queue_work. In this case, the tmr is on the work queue waiting for the STPG to complete, but the STPG transition is now queued behind the waiting tmr. Note: This bug will also be fixed by this patch: http://www.spinics.net/lists/target-devel/msg14560.html which switches the tmr code to use the system workqueues. For both, I am not sure if we need a dedicated workqueue since it is not a performance path and I do not think we need WQ_MEM_RECLAIM to make forward progress to free up memory like the block layer does. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_alua.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 4c82bbe19003..60891241858e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1118,13 +1118,11 @@ static int core_alua_do_transition_tg_pt( unsigned long transition_tmo; transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); + schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, + transition_tmo); } else { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, 0); + schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } -- GitLab From 892e4f9bc2e218b0126bb4ae7191be366fb4b421 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:48 -0600 Subject: [PATCH 0456/1398] target: fix ALUA transition timeout handling [ Upstream commit d7175373f2745ed4abe5b388d5aabd06304f801e ] The implicit transition time tells initiators the min time to wait before timing out a transition. We currently schedule the transition to occur in tg_pt_gp_implicit_trans_secs seconds so there is no room for delays. If core_alua_do_transition_tg_pt_work->core_alua_update_tpg_primary_metadata needs to write out info to a remote file, then the initiator can easily time out the operation. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_alua.c | 23 ++++++++--------------- include/target/target_core_base.h | 2 +- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 60891241858e..dc06f723133e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1010,7 +1010,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) static void core_alua_do_transition_tg_pt_work(struct work_struct *work) { struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); @@ -1073,13 +1073,12 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs && - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == + if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { /* Just in case */ tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; tg_pt_gp->tg_pt_gp_transition_complete = &wait; - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; return 0; @@ -1114,15 +1113,9 @@ static int core_alua_do_transition_tg_pt( atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) { - unsigned long transition_tmo; - - transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); - } else { + schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); + if (explicit) { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } @@ -1690,8 +1683,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); + INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, + core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); @@ -1799,7 +1792,7 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index a87e8940fe57..eb3b23b6ec54 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -297,7 +297,7 @@ struct t10_alua_tg_pt_gp { struct list_head tg_pt_gp_lun_list; struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; - struct delayed_work tg_pt_gp_transition_work; + struct work_struct tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; -- GitLab From 25b0b3f2373d31d40151b0fbdf35c0035d332a61 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:50 -0600 Subject: [PATCH 0457/1398] target: fix race during implicit transition work flushes [ Upstream commit 760bf578edf8122f2503a3a6a3f4b0de3b6ce0bb ] This fixes the following races: 1. core_alua_do_transition_tg_pt could have read tg_pt_gp_alua_access_state and gone into this if chunk: if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { and then core_alua_do_transition_tg_pt_work could update the state. core_alua_do_transition_tg_pt would then only set tg_pt_gp_alua_pending_state and the tg_pt_gp_alua_access_state would not get updated with the second calls state. 2. core_alua_do_transition_tg_pt could be setting tg_pt_gp_transition_complete while the tg_pt_gp_transition_work is already completing. core_alua_do_transition_tg_pt then waits on the completion that will never be called. To handle these issues, we just call flush_work which will return when core_alua_do_transition_tg_pt_work has completed so there is no need to do the complete/wait. And, if core_alua_do_transition_tg_pt_work was running, instead of trying to sneak in the state change, we just schedule up another core_alua_do_transition_tg_pt_work call. Note that this does not handle a possible race where there are multiple threads call core_alua_do_transition_tg_pt at the same time. I think we need a mutex in target_tg_pt_gp_alua_access_state_store. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_alua.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index dc06f723133e..ee5b29aed54b 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1073,16 +1073,8 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == - ALUA_ACCESS_STATE_TRANSITION) { - /* Just in case */ - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; - tg_pt_gp->tg_pt_gp_transition_complete = &wait; + if (!explicit) flush_work(&tg_pt_gp->tg_pt_gp_transition_work); - wait_for_completion(&wait); - tg_pt_gp->tg_pt_gp_transition_complete = NULL; - return 0; - } /* * Save the old primary ALUA access state, and set the current state -- GitLab From e15628b293a7dad278d5bbf7d56a539858b0be8f Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:23 +0800 Subject: [PATCH 0458/1398] Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when booting" [ Upstream commit c962cff17dfa11f4a8227ac16de2b28aea3312e4 ] Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting") The mapping of "cpuid <-> nodeid" is established at boot time via ACPI tables to keep associations of workqueues and other node related items consistent across cpu hotplug. But, ACPI tables are unreliable and failures with that boot time mapping have been reported on machines where the ACPI table and the physical information which is retrieved at actual hotplug is inconsistent. Revert the mapping implementation so it can be replaced with a less error prone approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 2 +- drivers/acpi/acpi_processor.c | 5 --- drivers/acpi/bus.c | 1 - drivers/acpi/processor_core.c | 73 ----------------------------------- include/linux/acpi.h | 3 -- 5 files changed, 1 insertion(+), 83 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b89bef95f63b..11cc600f4df0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -720,7 +720,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 3de3b6b8f0f1..f43a586236ea 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu) void __weak arch_unregister_cpu(int cpu) {} -int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ - return -ENODEV; -} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 56190d00fd87..0a3ca20f99af 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1197,7 +1197,6 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); - acpi_set_processor_mapping(); return 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 5c78ee1860b0..fd59ae871db3 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -280,79 +280,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static bool __init -map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid) -{ - int type, id; - u32 acpi_id; - acpi_status status; - acpi_object_type acpi_type; - unsigned long long tmp; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - - status = acpi_get_type(handle, &acpi_type); - if (ACPI_FAILURE(status)) - return false; - - switch (acpi_type) { - case ACPI_TYPE_PROCESSOR: - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) - return false; - acpi_id = object.processor.proc_id; - - /* validate the acpi_id */ - if(acpi_processor_validate_proc_id(acpi_id)) - return false; - break; - case ACPI_TYPE_DEVICE: - status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); - if (ACPI_FAILURE(status)) - return false; - acpi_id = tmp; - break; - default: - return false; - } - - type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; - - *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false); - id = acpi_map_cpuid(*phys_id, acpi_id); - - if (id < 0) - return false; - *cpuid = id; - return true; -} - -static acpi_status __init -set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context, - void **rv) -{ - phys_cpuid_t phys_id; - int cpu_id; - - if (!map_processor(handle, &phys_id, &cpu_id)) - return AE_ERROR; - - acpi_map_cpu2node(handle, cpu_id, phys_id); - return AE_OK; -} - -void __init acpi_set_processor_mapping(void) -{ - /* Set persistent cpu <-> node mapping for all processors. */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, set_processor_node_mapping, - NULL, NULL, NULL); -} -#else -void __init acpi_set_processor_mapping(void) {} -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base, u64 *phys_addr, int *ioapic_id) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 61a3d90f32b3..ca2b4c4aec42 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -276,11 +276,8 @@ bool acpi_processor_validate_proc_id(int proc_id); /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif -- GitLab From da73389e8aa7975a46b9e85234fcbd7885d0eabd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 10 Nov 2017 10:01:43 +0100 Subject: [PATCH 0459/1398] HID: cp2112: fix broken gpio_direction_input callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7da85fbf1c87d4f73621e0e7666a3387497075a9 ] When everything goes smoothly, ret is set to 0 which makes the function to return EIO error. Fixes: 8e9faa15469e ("HID: cp2112: fix gpio-callback error handling") Signed-off-by: Sébastien Szymanski Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-cp2112.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index e06c1344c913..7af77818efc3 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -188,6 +188,8 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) HID_REQ_GET_REPORT); if (ret != CP2112_GPIO_CONFIG_LENGTH) { hid_err(hdev, "error requesting GPIO config: %d\n", ret); + if (ret >= 0) + ret = -EIO; goto exit; } @@ -197,8 +199,10 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); - if (ret < 0) { + if (ret != CP2112_GPIO_CONFIG_LENGTH) { hid_err(hdev, "error setting GPIO config: %d\n", ret); + if (ret >= 0) + ret = -EIO; goto exit; } @@ -206,7 +210,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) exit: mutex_unlock(&dev->lock); - return ret < 0 ? ret : -EIO; + return ret; } static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) -- GitLab From 0312ab0f0e437cadb9eabef72a1e26a676d2dd64 Mon Sep 17 00:00:00 2001 From: Robert Stonehouse Date: Tue, 7 Nov 2017 17:30:30 +0000 Subject: [PATCH 0460/1398] sfc: don't warn on successful change of MAC [ Upstream commit cbad52e92ad7f01f0be4ca58bde59462dc1afe3a ] Fixes: 535a61777f44e ("sfc: suppress handled MCDI failures when changing the MAC address") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 1d85109cb8ed..3d5d5d54c103 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4967,7 +4967,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); - } else { + } else if (rc) { efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, sizeof(inbuf), NULL, 0, rc); } -- GitLab From aecce5fc047a99c057931f08977ba6042bb931c4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 9 Nov 2017 18:09:33 +0100 Subject: [PATCH 0461/1398] fbdev: controlfb: Add missing modes to fix out of bounds access [ Upstream commit ac831a379d34109451b3c41a44a20ee10ecb615f ] Dan's static analysis says: drivers/video/fbdev/controlfb.c:560 control_setup() error: buffer overflow 'control_mac_modes' 20 <= 21 Indeed, control_mac_modes[] has only 20 elements, while VMODE_MAX is 22, which may lead to an out of bounds read when parsing vmode commandline options. The bug was introduced in v2.4.5.6, when 2 new modes were added to macmodes.h, but control_mac_modes[] wasn't updated: https://kernel.opensuse.org/cgit/kernel/diff/include/video/macmodes.h?h=v2.5.2&id=29f279c764808560eaceb88fef36cbc35c529aad Augment control_mac_modes[] with the two new video modes to fix this. Reported-by: Dan Carpenter Signed-off-by: Geert Uytterhoeven Cc: Dan Carpenter Cc: Benjamin Herrenschmidt Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/controlfb.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/controlfb.h b/drivers/video/fbdev/controlfb.h index 6026c60fc100..261522fabdac 100644 --- a/drivers/video/fbdev/controlfb.h +++ b/drivers/video/fbdev/controlfb.h @@ -141,5 +141,7 @@ static struct max_cmodes control_mac_modes[] = { {{ 1, 2}}, /* 1152x870, 75Hz */ {{ 0, 1}}, /* 1280x960, 75Hz */ {{ 0, 1}}, /* 1280x1024, 75Hz */ + {{ 1, 2}}, /* 1152x768, 60Hz */ + {{ 0, 1}}, /* 1600x1024, 60Hz */ }; -- GitLab From 92c3c7db8336f6678b31ee0d08ecf3bab81a80ff Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Thu, 9 Nov 2017 18:09:30 +0100 Subject: [PATCH 0462/1398] video: udlfb: Fix read EDID timeout [ Upstream commit c98769475575c8a585f5b3952f4b5f90266f699b ] While usb_control_msg function expects timeout in miliseconds, a value of HZ is used. Replace it with USB_CTRL_GET_TIMEOUT and also fix error message which looks like: udlfb: Read EDID byte 78 failed err ffffff92 as error is either negative errno or number of bytes transferred use %d format specifier. Returned EDID is in second byte, so return error when less than two bytes are received. Fixes: 18dffdf8913a ("staging: udlfb: enhance EDID and mode handling support") Signed-off-by: Ladislav Michl Cc: Bernie Thompson Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/udlfb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index e9c2f7ba3c8e..53326badfb61 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -769,11 +769,11 @@ static int dlfb_get_edid(struct dlfb_data *dev, char *edid, int len) for (i = 0; i < len; i++) { ret = usb_control_msg(dev->udev, - usb_rcvctrlpipe(dev->udev, 0), (0x02), - (0x80 | (0x02 << 5)), i << 8, 0xA1, rbuf, 2, - HZ); - if (ret < 1) { - pr_err("Read EDID byte %d failed err %x\n", i, ret); + usb_rcvctrlpipe(dev->udev, 0), 0x02, + (0x80 | (0x02 << 5)), i << 8, 0xA1, + rbuf, 2, USB_CTRL_GET_TIMEOUT); + if (ret < 2) { + pr_err("Read EDID byte %d failed: %d\n", i, ret); i--; break; } -- GitLab From 90e2591f6f3fd15ac766ffbaed119f43f01fef33 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 9 Nov 2017 18:09:28 +0100 Subject: [PATCH 0463/1398] video: fbdev: au1200fb: Release some resources if a memory allocation fails [ Upstream commit 451f130602619a17c8883dd0b71b11624faffd51 ] We should go through the error handling code instead of returning -ENOMEM directly. Signed-off-by: Christophe JAILLET Cc: Tejun Heo Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/au1200fb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index 6c2b2ca4a909..29d1639f5c5e 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1700,7 +1700,8 @@ static int au1200fb_drv_probe(struct platform_device *dev) if (!fbdev->fb_mem) { print_err("fail to allocate frambuffer (size: %dK))", fbdev->fb_len / 1024); - return -ENOMEM; + ret = -ENOMEM; + goto failed; } /* -- GitLab From cf53526f3312f972043d0c6fa745cfb9f777a0c9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 9 Nov 2017 18:09:28 +0100 Subject: [PATCH 0464/1398] video: fbdev: au1200fb: Return an error code if a memory allocation fails [ Upstream commit 8cae353e6b01ac3f18097f631cdbceb5ff28c7f3 ] 'ret' is known to be 0 at this point. In case of memory allocation error in 'framebuffer_alloc()', return -ENOMEM instead. Signed-off-by: Christophe JAILLET Cc: Tejun Heo Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/au1200fb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index 29d1639f5c5e..44c2be15a08b 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1681,8 +1681,10 @@ static int au1200fb_drv_probe(struct platform_device *dev) fbi = framebuffer_alloc(sizeof(struct au1200fb_device), &dev->dev); - if (!fbi) + if (!fbi) { + ret = -ENOMEM; goto failed; + } _au1200fb_infos[plane] = fbi; fbdev = fbi->par; -- GitLab From 9146b10f8cd64c2e467c8f98ccb19dcb15812e83 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 7 Nov 2017 13:12:17 +0100 Subject: [PATCH 0465/1398] rtc: pcf8563: fix output clock rate [ Upstream commit a3350f9c57ffad569c40f7320b89da1f3061c5bb ] The pcf8563_clkout_recalc_rate function erroneously ignores the frequency index read from the CLKO register and always returns 32768 Hz. Fixes: a39a6405d5f9 ("rtc: pcf8563: add CLKOUT to common clock framework") Signed-off-by: Philipp Zabel Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-pcf8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 1227ceab61ee..a4b8b603c807 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -422,7 +422,7 @@ static unsigned long pcf8563_clkout_recalc_rate(struct clk_hw *hw, return 0; buf &= PCF8563_REG_CLKO_F_MASK; - return clkout_rates[ret]; + return clkout_rates[buf]; } static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, -- GitLab From 03a48dc96505f5b900e44d50e69ea3ee3686c279 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Tue, 7 Nov 2017 16:16:19 +0530 Subject: [PATCH 0466/1398] ASoC: Intel: Skylake: Fix uuid_module memory leak in failure case [ Upstream commit f8e066521192c7debe59127d90abbe2773577e25 ] In the loop that adds the uuid_module to the uuid_list list, allocated memory is not properly freed in the error path free uuid_list whenever any of the memory allocation in the loop fails to avoid memory leak. Signed-off-by: Pankaj Bharadiya Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-sst-utils.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index ea162fbf68e5..d5adc04bb724 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -295,6 +295,7 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, struct uuid_module *module; struct firmware stripped_fw; unsigned int safe_file; + int ret = 0; /* Get the FW pointer to derive ADSP header */ stripped_fw.data = fw->data; @@ -343,8 +344,10 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, for (i = 0; i < num_entry; i++, mod_entry++) { module = kzalloc(sizeof(*module), GFP_KERNEL); - if (!module) - return -ENOMEM; + if (!module) { + ret = -ENOMEM; + goto free_uuid_list; + } uuid_bin = (uuid_le *)mod_entry->uuid.id; memcpy(&module->uuid, uuid_bin, sizeof(module->uuid)); @@ -355,8 +358,8 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, size = sizeof(int) * mod_entry->instance_max_count; module->instance_id = devm_kzalloc(ctx->dev, size, GFP_KERNEL); if (!module->instance_id) { - kfree(module); - return -ENOMEM; + ret = -ENOMEM; + goto free_uuid_list; } list_add_tail(&module->list, &skl->uuid_list); @@ -367,6 +370,10 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, } return 0; + +free_uuid_list: + skl_freeup_uuid_list(skl); + return ret; } void skl_freeup_uuid_list(struct skl_sst *ctx) -- GitLab From 5a7192bc389ea5ac194d046d6a218b7431680a31 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 8 Nov 2017 12:02:25 +0200 Subject: [PATCH 0467/1398] dmaengine: ti-dma-crossbar: Correct am335x/am43xx mux value type [ Upstream commit 288e7560e4d3e259aa28f8f58a8dfe63627a1bf6 ] The used 0x1f mask is only valid for am335x family of SoC, different family using this type of crossbar might have different number of electable events. In case of am43xx family 0x3f mask should have been used for example. Instead of trying to handle each family's mask, just use u8 type to store the mux value since the event offsets are aligned to byte offset. Fixes: 42dbdcc6bf965 ("dmaengine: ti-dma-crossbar: Add support for crossbar on AM33xx/AM43xx") Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ti-dma-crossbar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 88a00d06def6..43e88d85129e 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -49,12 +49,12 @@ struct ti_am335x_xbar_data { struct ti_am335x_xbar_map { u16 dma_line; - u16 mux_val; + u8 mux_val; }; -static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u16 val) +static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val) { - writeb_relaxed(val & 0x1f, iomem + event); + writeb_relaxed(val, iomem + event); } static void ti_am335x_xbar_free(struct device *dev, void *route_data) @@ -105,7 +105,7 @@ static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec, } map->dma_line = (u16)dma_spec->args[0]; - map->mux_val = (u16)dma_spec->args[2]; + map->mux_val = (u8)dma_spec->args[2]; dma_spec->args[2] = 0; dma_spec->args_count = 2; -- GitLab From f44d28e0348dbb081784ab02be57954d75efa13e Mon Sep 17 00:00:00 2001 From: Qiang Date: Thu, 28 Sep 2017 11:54:34 +0800 Subject: [PATCH 0468/1398] PCI/PME: Handle invalid data when reading Root Status [ Upstream commit 3ad3f8ce50914288731a3018b27ee44ab803e170 ] PCIe PME and native hotplug share the same interrupt number, so hotplug interrupts are also processed by PME. In some cases, e.g., a Link Down interrupt, a device may be present but unreachable, so when we try to read its Root Status register, the read fails and we get all ones data (0xffffffff). Previously, we interpreted that data as PCI_EXP_RTSTA_PME being set, i.e., "some device has asserted PME," so we scheduled pcie_pme_work_fn(). This caused an infinite loop because pcie_pme_work_fn() tried to handle PME requests until PCI_EXP_RTSTA_PME is cleared, but with the link down, PCI_EXP_RTSTA_PME can't be cleared. Check for the invalid 0xffffffff data everywhere we read the Root Status register. 1469d17dd341 ("PCI: pciehp: Handle invalid data when reading from non-existent devices") added similar checks in the hotplug driver. Signed-off-by: Qiang Zheng [bhelgaas: changelog, also check in pcie_pme_work_fn(), use "~0" to follow other similar checks] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/pme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 4b703492376a..00f61225386c 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -232,6 +232,9 @@ static void pcie_pme_work_fn(struct work_struct *work) break; pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); + if (rtsta == (u32) ~0) + break; + if (rtsta & PCI_EXP_RTSTA_PME) { /* * Clear PME status of the port. If there are other @@ -279,7 +282,7 @@ static irqreturn_t pcie_pme_irq(int irq, void *context) spin_lock_irqsave(&data->lock, flags); pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); - if (!(rtsta & PCI_EXP_RTSTA_PME)) { + if (rtsta == (u32) ~0 || !(rtsta & PCI_EXP_RTSTA_PME)) { spin_unlock_irqrestore(&data->lock, flags); return IRQ_NONE; } -- GitLab From 29a404be7b30237e0894057422407722d3f8bf45 Mon Sep 17 00:00:00 2001 From: Shriya Date: Fri, 13 Oct 2017 10:06:41 +0530 Subject: [PATCH 0469/1398] powerpc/powernv/cpufreq: Fix the frequency read by /proc/cpuinfo [ Upstream commit cd77b5ce208c153260ed7882d8910f2395bfaabd ] The call to /proc/cpuinfo in turn calls cpufreq_quick_get() which returns the last frequency requested by the kernel, but may not reflect the actual frequency the processor is running at. This patch makes a call to cpufreq_get() instead which returns the current frequency reported by the hardware. Fixes: fb5153d05a7d ("powerpc: powernv: Implement ppc_md.get_proc_freq()") Signed-off-by: Shriya Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index efe8b6bb168b..b33faa0015cc 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -289,7 +289,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; - ret_freq = cpufreq_quick_get(cpu) * 1000ul; + ret_freq = cpufreq_get(cpu) * 1000ul; /* * If the backend cpufreq driver does not exist, -- GitLab From 9a4bf05126f42c2632729ab0da503021d74ed454 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 13 Oct 2017 21:35:43 +0300 Subject: [PATCH 0470/1398] PCI: Do not allocate more buses than available in parent [ Upstream commit a20c7f36bd3d20d245616ae223bb9d05dfb6f050 ] One can ask more buses to be reserved for hotplug bridges by passing pci=hpbussize=N in the kernel command line. If the parent bus does not have enough bus space available we incorrectly create child bus with the requested number of subordinate buses. In the example below hpbussize is set to one more than we have available buses in the root port: pci 0000:07:00.0: [8086:1578] type 01 class 0x060400 pci 0000:07:00.0: scanning [bus 00-00] behind bridge, pass 0 pci 0000:07:00.0: bridge configuration invalid ([bus 00-00]), reconfiguring pci 0000:07:00.0: scanning [bus 00-00] behind bridge, pass 1 pci_bus 0000:08: busn_res: can not insert [bus 08-ff] under [bus 07-3f] (conflicts with (null) [bus 07-3f]) pci_bus 0000:08: scanning bus ... pci_bus 0000:0a: bus scan returning with max=40 pci_bus 0000:0a: busn_res: [bus 0a-ff] end is updated to 40 pci_bus 0000:0a: [bus 0a-40] partially hidden behind bridge 0000:07 [bus 07-3f] pci_bus 0000:08: bus scan returning with max=40 pci_bus 0000:08: busn_res: [bus 08-ff] end is updated to 40 Instead of allowing this, limit the subordinate number to be less than or equal the maximum subordinate number allocated for the parent bus (if it has any). Signed-off-by: Mika Westerberg [bhelgaas: remove irrelevant dmesg messages] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 60bada90cd75..a98be6db7e93 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -932,7 +932,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) child = pci_add_new_bus(bus, dev, max+1); if (!child) goto out; - pci_bus_insert_busn_res(child, max+1, 0xff); + pci_bus_insert_busn_res(child, max+1, + bus->busn_res.end); } max++; buses = (buses & 0xff000000) @@ -2136,6 +2137,10 @@ unsigned int pci_scan_child_bus(struct pci_bus *bus) if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) { if (max - bus->busn_res.start < pci_hotplug_bus_size - 1) max = bus->busn_res.start + pci_hotplug_bus_size - 1; + + /* Do not allocate more buses than we have room left */ + if (max > bus->busn_res.end) + max = bus->busn_res.end; } /* -- GitLab From b3b6d1eea0de7115353a3706e5b0c3ff56be0663 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Mon, 30 Oct 2017 12:37:55 +0100 Subject: [PATCH 0471/1398] iommu/mediatek: Fix driver name [ Upstream commit 395df08d2e1de238a9c8c33fdcd0e2160efd63a9 ] There exist two Mediatek iommu drivers for the two different generations of the device. But both drivers have the same name "mtk-iommu". This breaks the registration of the second driver: Error: Driver 'mtk-iommu' is already registered, aborting... Fix this by changing the name for first generation to "mtk-iommu-v1". Fixes: b17336c55d89 ("iommu/mediatek: add support for mtk iommu generation one HW") Signed-off-by: Matthias Brugger Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/mtk_iommu_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index b8aeb0768483..68c6050d1efb 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -703,7 +703,7 @@ static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, .remove = mtk_iommu_remove, .driver = { - .name = "mtk-iommu", + .name = "mtk-iommu-v1", .of_match_table = mtk_iommu_of_ids, .pm = &mtk_iommu_pm_ops, } -- GitLab From a463f9c5dfd1bb62f128e6883103117af6f3db1f Mon Sep 17 00:00:00 2001 From: KUWAZAWA Takuya Date: Sun, 15 Oct 2017 20:54:10 +0900 Subject: [PATCH 0472/1398] netfilter: ipvs: Fix inappropriate output of procfs [ Upstream commit c5504f724c86ee925e7ffb80aa342cfd57959b13 ] Information about ipvs in different network namespace can be seen via procfs. How to reproduce: # ip netns add ns01 # ip netns add ns02 # ip netns exec ns01 ip a add dev lo 127.0.0.1/8 # ip netns exec ns02 ip a add dev lo 127.0.0.1/8 # ip netns exec ns01 ipvsadm -A -t 10.1.1.1:80 # ip netns exec ns02 ipvsadm -A -t 10.1.1.2:80 The ipvsadm displays information about its own network namespace only. # ip netns exec ns01 ipvsadm -Ln IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 10.1.1.1:80 wlc # ip netns exec ns02 ipvsadm -Ln IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 10.1.1.2:80 wlc But I can see information about other network namespace via procfs. # ip netns exec ns01 cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 0A010101:0050 wlc TCP 0A010102:0050 wlc # ip netns exec ns02 cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 0A010102:0050 wlc Signed-off-by: KUWAZAWA Takuya Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a6e44ef2ec9a..2155c2498aed 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2040,12 +2040,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; + if (svc->ipvs != ipvs) + return 0; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) -- GitLab From d7e7c431d62101505a6a0321986dba498252fd7f Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Fri, 22 Sep 2017 16:58:00 -0700 Subject: [PATCH 0473/1398] powerpc/opal: Fix EBUSY bug in acquiring tokens [ Upstream commit 71e24d7731a2903b1ae2bba2b2971c654d9c2aa6 ] The current code checks the completion map to look for the first token that is complete. In some cases, a completion can come in but the token can still be on lease to the caller processing the completion. If this completed but unreleased token is the first token found in the bitmap by another tasks trying to acquire a token, then the __test_and_set_bit call will fail since the token will still be on lease. The acquisition will then fail with an EBUSY. This patch reorganizes the acquisition code to look at the opal_async_token_map for an unleased token. If the token has no lease it must have no outstanding completions so we should never see an EBUSY, unless we have leased out too many tokens. Since opal_async_get_token_inrerruptible is protected by a semaphore, we will practically never see EBUSY anymore. Fixes: 8d7248232208 ("powerpc/powernv: Infrastructure to support OPAL async completion") Signed-off-by: William A. Kennington III Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-async.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 83bebeec0fea..0f7b16e29347 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -39,18 +39,18 @@ int __opal_async_get_token(void) int token; spin_lock_irqsave(&opal_async_comp_lock, flags); - token = find_first_bit(opal_async_complete_map, opal_max_async_tokens); + token = find_first_zero_bit(opal_async_token_map, opal_max_async_tokens); if (token >= opal_max_async_tokens) { token = -EBUSY; goto out; } - if (__test_and_set_bit(token, opal_async_token_map)) { + if (!__test_and_clear_bit(token, opal_async_complete_map)) { token = -EBUSY; goto out; } - __clear_bit(token, opal_async_complete_map); + __set_bit(token, opal_async_token_map); out: spin_unlock_irqrestore(&opal_async_comp_lock, flags); -- GitLab From 6e5a846d5172c5225bda764797a07c828b5b2650 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 Oct 2017 11:16:47 +0200 Subject: [PATCH 0474/1398] powerpc/ipic: Fix status get and status clear [ Upstream commit 6b148a7ce72a7f87c81cbcde48af014abc0516a9 ] IPIC Status is provided by register IPIC_SERSR and not by IPIC_SERMR which is the mask register. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/ipic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index f267ee0afc08..716353b247de 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -845,12 +845,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq) u32 ipic_get_mcp_status(void) { - return ipic_read(primary_ipic->regs, IPIC_SERMR); + return ipic_read(primary_ipic->regs, IPIC_SERSR); } void ipic_clear_mcp_status(u32 mask) { - ipic_write(primary_ipic->regs, IPIC_SERMR, mask); + ipic_write(primary_ipic->regs, IPIC_SERSR, mask); } /* Return an interrupt vector or 0 if no interrupt is pending. */ -- GitLab From abc4b4420af8d2d31fe38dddf998855f5cf9d8dc Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Sun, 29 Oct 2017 02:49:54 -0700 Subject: [PATCH 0475/1398] platform/x86: intel_punit_ipc: Fix resource ioremap warning [ Upstream commit 6cc8cbbc8868033f279b63e98b26b75eaa0006ab ] For PUNIT device, ISPDRIVER_IPC and GTDDRIVER_IPC resources are not mandatory. So when PMC IPC driver creates a PUNIT device, if these resources are not available then it creates dummy resource entries for these missing resources. But during PUNIT device probe, doing ioremap on these dummy resources generates following warning messages. intel_punit_ipc: can't request region for resource [mem 0x00000000] intel_punit_ipc: can't request region for resource [mem 0x00000000] intel_punit_ipc: can't request region for resource [mem 0x00000000] intel_punit_ipc: can't request region for resource [mem 0x00000000] This patch fixes this issue by adding extra check for resource size before performing ioremap operation. Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_punit_ipc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index a47a41fc10ad..b5b890127479 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -252,28 +252,28 @@ static int intel_punit_get_bars(struct platform_device *pdev) * - GTDRIVER_IPC BASE_IFACE */ res = platform_get_resource(pdev, IORESOURCE_MEM, 2); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[ISPDRIVER_IPC][BASE_DATA] = addr; } res = platform_get_resource(pdev, IORESOURCE_MEM, 3); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[ISPDRIVER_IPC][BASE_IFACE] = addr; } res = platform_get_resource(pdev, IORESOURCE_MEM, 4); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[GTDRIVER_IPC][BASE_DATA] = addr; } res = platform_get_resource(pdev, IORESOURCE_MEM, 5); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[GTDRIVER_IPC][BASE_IFACE] = addr; -- GitLab From e086a82a926ace86fc14da0442437c2024d68001 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 31 Oct 2017 11:03:17 -0700 Subject: [PATCH 0476/1398] target/iscsi: Fix a race condition in iscsit_add_reject_from_cmd() [ Upstream commit cfe2b621bb18d86e93271febf8c6e37622da2d14 ] Avoid that cmd->se_cmd.se_tfo is read after a command has already been freed. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 0d578297d9f9..72e926d9868f 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -841,6 +841,7 @@ static int iscsit_add_reject_from_cmd( unsigned char *buf) { struct iscsi_conn *conn; + const bool do_put = cmd->se_cmd.se_tfo != NULL; if (!cmd->conn) { pr_err("cmd->conn is NULL for ITT: 0x%08x\n", @@ -871,7 +872,7 @@ static int iscsit_add_reject_from_cmd( * Perform the kref_put now if se_cmd has already been setup by * scsit_setup_scsi_cmd() */ - if (cmd->se_cmd.se_tfo != NULL) { + if (do_put) { pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); target_put_sess_cmd(&cmd->se_cmd); } -- GitLab From a4f54ec403da9f5eafaba60d67ae8b2613a1bae5 Mon Sep 17 00:00:00 2001 From: tangwenji Date: Fri, 15 Sep 2017 16:03:13 +0800 Subject: [PATCH 0477/1398] iscsi-target: fix memory leak in lio_target_tiqn_addtpg() [ Upstream commit 12d5a43b2dffb6cd28062b4e19024f7982393288 ] tpg must free when call core_tpg_register() return fail Signed-off-by: tangwenji Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_configfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 9cbbc9cf63fb..8a4bc15bc3f5 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1144,7 +1144,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( ret = core_tpg_register(wwn, &tpg->tpg_se_tpg, SCSI_PROTOCOL_ISCSI); if (ret < 0) - return NULL; + goto free_out; ret = iscsit_tpg_add_portal_group(tiqn, tpg); if (ret != 0) @@ -1156,6 +1156,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( return &tpg->tpg_se_tpg; out: core_tpg_deregister(&tpg->tpg_se_tpg); +free_out: kfree(tpg); return NULL; } -- GitLab From 998201fdc5c9144a2dfaec8811a722c3c5e731d3 Mon Sep 17 00:00:00 2001 From: tangwenji Date: Thu, 24 Aug 2017 19:59:37 +0800 Subject: [PATCH 0478/1398] target:fix condition return in core_pr_dump_initiator_port() [ Upstream commit 24528f089d0a444070aa4f715ace537e8d6bf168 ] When is pr_reg->isid_present_at_reg is false,this function should return. This fixes a regression originally introduced by: commit d2843c173ee53cf4c12e7dfedc069a5bc76f0ac5 Author: Andy Grover Date: Thu May 16 10:40:55 2013 -0700 target: Alter core_pr_dump_initiator_port for ease of use Signed-off-by: tangwenji Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 47463c99c318..df20921c233c 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -56,8 +56,10 @@ void core_pr_dump_initiator_port( char *buf, u32 size) { - if (!pr_reg->isid_present_at_reg) + if (!pr_reg->isid_present_at_reg) { buf[0] = '\0'; + return; + } snprintf(buf, size, ",i,0x%s", pr_reg->pr_reg_isid); } -- GitLab From dcdca123814c5303f0e4d0fba67756d35bb0fd1f Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Fri, 11 Aug 2017 11:29:44 +0800 Subject: [PATCH 0479/1398] target/file: Do not return error for UNMAP if length is zero [ Upstream commit 594e25e73440863981032d76c9b1e33409ceff6e ] The function fd_execute_unmap() in target_core_file.c calles ret = file->f_op->fallocate(file, mode, pos, len); Some filesystems implement fallocate() to return error if length is zero (e.g. btrfs) but according to SCSI Block Commands spec UNMAP should return success for zero length. Signed-off-by: Jiang Yi Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 29f807b29e74..97928b42ad62 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -466,6 +466,10 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) struct inode *inode = file->f_mapping->host; int ret; + if (!nolb) { + return 0; + } + if (cmd->se_dev->dev_attrib.pi_prot_type) { ret = fd_do_prot_unmap(cmd, lba, nolb); if (ret) -- GitLab From cb0acb37010c216e4781224ad4ab4c12ca9cb5fe Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 3 Nov 2017 11:24:44 -0600 Subject: [PATCH 0480/1398] badblocks: fix wrong return value in badblocks_set if badblocks are disabled [ Upstream commit 39b4954c0a1556f8f7f1fdcf59a227117fcd8a0b ] MD's rdev_set_badblocks() expects that badblocks_set() returns 1 if badblocks are disabled, otherwise, rdev_set_badblocks() will record superblock changes and return success in that case and md will fail to report an IO error which it should. This bug has existed since badblocks were introduced in commit 9e0e252a048b ("badblocks: Add core badblock management code"). Signed-off-by: Liu Bo Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/badblocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/badblocks.c b/block/badblocks.c index 6ebcef282314..2fe6c117ac96 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -178,7 +178,7 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors, if (bb->shift < 0) /* badblocks are disabled */ - return 0; + return 1; if (bb->shift) { /* round the start down, and the end up */ -- GitLab From 03bfadfb0d7990bf558dabd6b26e4f06bc32c45b Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 3 Nov 2017 10:50:34 -0600 Subject: [PATCH 0481/1398] iommu/amd: Limit the IOVA page range to the specified addresses [ Upstream commit b92b4fb5c14257c0e7eae291ecc1f7b1962e1699 ] The extent of pages specified when applying a reserved region should include up to the last page of the range, but not the page following the range. Signed-off-by: Gary R Hook Fixes: 8d54d6c8b8f3 ('iommu/amd: Implement apply_dm_region call-back') Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1a0b110f12c0..0c910a863581 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3211,7 +3211,7 @@ static void amd_iommu_apply_dm_region(struct device *dev, unsigned long start, end; start = IOVA_PFN(region->start); - end = IOVA_PFN(region->start + region->length); + end = IOVA_PFN(region->start + region->length - 1); WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); } -- GitLab From c843e9f8f97ff0d7c3a8232a314b20c1328ba697 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Wed, 1 Nov 2017 21:43:50 -0700 Subject: [PATCH 0482/1398] xfs: truncate pagecache before writeback in xfs_setattr_size() [ Upstream commit 350976ae21873b0d36584ea005076356431b8f79 ] On truncate down, if new size is not block size aligned, we zero the rest of block to avoid exposing stale data to user, and iomap_truncate_page() skips zeroing if the range is already in unwritten state or a hole. Then we writeback from on-disk i_size to the new size if this range hasn't been written to disk yet, and truncate page cache beyond new EOF and set in-core i_size. The problem is that we could write data between di_size and newsize before removing the page cache beyond newsize, as the extents may still be in unwritten state right after a buffer write. As such, the page of data that newsize lies in has not been zeroed by page cache invalidation before it is written, and xfs_do_writepage() hasn't triggered it's "zero data beyond EOF" case because we haven't updated in-core i_size yet. Then a subsequent mmap read could see non-zeros past EOF. I occasionally see this in fsx runs in fstests generic/112, a simplified fsx operation sequence is like (assuming 4k block size xfs): fallocate 0x0 0x1000 0x0 keep_size write 0x0 0x1000 0x0 truncate 0x0 0x800 0x1000 punch_hole 0x0 0x800 0x800 mapread 0x0 0x800 0x800 where fallocate allocates unwritten extent but doesn't update i_size, buffer write populates the page cache and extent is still unwritten, truncate skips zeroing page past new EOF and writes the page to disk, punch_hole invalidates the page cache, at last mapread reads the block back and sees non-zero beyond EOF. Fix it by moving truncate_setsize() to before writeback so the page cache invalidation zeros the partial page at the new EOF. This also triggers "zero data beyond EOF" in xfs_do_writepage() at writeback time, because newsize has been set and page straddles the newsize. Also fixed the wrong 'end' param of filemap_write_and_wait_range() call while we're at it, the 'end' is inclusive and should be 'newsize - 1'. Suggested-by: Dave Chinner Signed-off-by: Eryu Guan Acked-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iops.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 5b81f7f41b80..33c389934238 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -870,22 +870,6 @@ xfs_setattr_size( if (error) return error; - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. Note that this includes any block zeroing we did above; - * otherwise those blocks may not be zeroed after a crash. - */ - if (did_zeroing || - (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ip->i_d.di_size, newsize); - if (error) - return error; - } - /* * We've already locked out new page faults, so now we can safely remove * pages from the page cache knowing they won't get refaulted until we @@ -902,9 +886,29 @@ xfs_setattr_size( * user visible changes). There's not much we can do about this, except * to hope that the caller sees ENOMEM and retries the truncate * operation. + * + * And we update in-core i_size and truncate page cache beyond newsize + * before writeback the [di_size, newsize] range, so we're guaranteed + * not to write stale data past the new EOF on truncate down. */ truncate_setsize(inode, newsize); + /* + * We are going to log the inode size change in this transaction so + * any previous writes that are beyond the on disk EOF and the new + * EOF that have not been written out need to be written here. If we + * do not write the data out, we expose ourselves to the null files + * problem. Note that this includes any block zeroing we did above; + * otherwise those blocks may not be zeroed after a crash. + */ + if (did_zeroing || + (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + ip->i_d.di_size, newsize - 1); + if (error) + return error; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) return error; -- GitLab From 2ed46cbf23fc48ea63e3a3eaa820de5d54534cb6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 3 Nov 2017 11:45:18 +0000 Subject: [PATCH 0483/1398] arm-ccn: perf: Prevent module unload while PMU is in use [ Upstream commit c7f5828bf77dcbd61d51f4736c1d5aa35663fbb4 ] When the PMU driver is built as a module, the perf expects the pmu->module to be valid, so that the driver is prevented from being unloaded while it is in use. Fix the CCN pmu driver to fill in this field. Fixes: a33b0daab73a0 ("bus: ARM CCN PMU driver") Cc: Pawel Moll Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index f0249899fc96..45d7ecc66b22 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1280,6 +1280,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) /* Perf driver registration */ ccn->dt.pmu = (struct pmu) { + .module = THIS_MODULE, .attr_groups = arm_ccn_pmu_attr_groups, .task_ctx_nr = perf_invalid_context, .event_init = arm_ccn_pmu_event_init, -- GitLab From 18498f1c7089035faba2bc9ba64c5c96542ae0c4 Mon Sep 17 00:00:00 2001 From: Robert Baronescu Date: Tue, 10 Oct 2017 13:22:00 +0300 Subject: [PATCH 0484/1398] crypto: tcrypt - fix buffer lengths in test_aead_speed() [ Upstream commit 7aacbfcb331ceff3ac43096d563a1f93ed46e35e ] Fix the way the length of the buffers used for encryption / decryption are computed. For e.g. in case of encryption, input buffer does not contain an authentication tag. Signed-off-by: Robert Baronescu Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/tcrypt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index ae22f05d5936..e3af318af2db 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -342,7 +342,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, } sg_init_aead(sg, xbuf, - *b_size + (enc ? authsize : 0)); + *b_size + (enc ? 0 : authsize)); sg_init_aead(sgout, xoutbuf, *b_size + (enc ? authsize : 0)); @@ -350,7 +350,9 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, sg_set_buf(&sg[0], assoc, aad_size); sg_set_buf(&sgout[0], assoc, aad_size); - aead_request_set_crypt(req, sg, sgout, *b_size, iv); + aead_request_set_crypt(req, sg, sgout, + *b_size + (enc ? 0 : authsize), + iv); aead_request_set_ad(req, aad_size); if (secs) -- GitLab From 2850c3ec0d25a4f729bd7f2212e178c9303e697a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 3 Nov 2017 12:21:21 +0100 Subject: [PATCH 0485/1398] mm: Handle 0 flags in _calc_vm_trans() macro [ Upstream commit 592e254502041f953e84d091eae2c68cba04c10b ] _calc_vm_trans() does not handle the situation when some of the passed flags are 0 (which can happen if these VM flags do not make sense for the architecture). Improve the _calc_vm_trans() macro to return 0 in such situation. Since all passed flags are constant, this does not add any runtime overhead. Signed-off-by: Jan Kara Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/mman.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mman.h b/include/linux/mman.h index 634c4c51fe3a..c540001ca861 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -63,8 +63,9 @@ static inline bool arch_validate_prot(unsigned long prot) * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ + ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ - : ((x) & (bit1)) / ((bit1) / (bit2))) + : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. -- GitLab From d6b6302c36b5459f3f2fa8d5e978b3e25f7e2723 Mon Sep 17 00:00:00 2001 From: Chen Zhong Date: Thu, 5 Oct 2017 11:50:23 +0800 Subject: [PATCH 0486/1398] clk: mediatek: add the option for determining PLL source clock [ Upstream commit c955bf3998efa3355790a4d8c82874582f1bc727 ] Since the previous setup always sets the PLL using crystal 26MHz, this doesn't always happen in every MediaTek platform. So the patch added flexibility for assigning extra member for determining the PLL source clock. Signed-off-by: Chen Zhong Signed-off-by: Sean Wang Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/mediatek/clk-mtk.h | 1 + drivers/clk/mediatek/clk-pll.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 9f24fcfa304f..e425e50173c5 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -185,6 +185,7 @@ struct mtk_pll_data { uint32_t pcw_reg; int pcw_shift; const struct mtk_pll_div_table *div_table; + const char *parent_name; }; void mtk_clk_register_plls(struct device_node *node, diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c index 0c2deac17ce9..1502384a3093 100644 --- a/drivers/clk/mediatek/clk-pll.c +++ b/drivers/clk/mediatek/clk-pll.c @@ -302,7 +302,10 @@ static struct clk *mtk_clk_register_pll(const struct mtk_pll_data *data, init.name = data->name; init.ops = &mtk_pll_ops; - init.parent_names = &parent_name; + if (data->parent_name) + init.parent_names = &data->parent_name; + else + init.parent_names = &parent_name; init.num_parents = 1; clk = clk_register(NULL, &pll->hw); -- GitLab From 47b63ea40ee158fbd032047935cf8b4d388ccd24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Tue, 1 Aug 2017 12:40:07 +0200 Subject: [PATCH 0487/1398] clk: imx6: refine hdmi_isfr's parent to make HDMI work on i.MX6 SoCs w/o VPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c68ee58d9ee7b856ac722f18f4f26579c8fbd2b4 ] On i.MX6 SoCs without VPU (in my case MCIMX6D4AVT10AC), the hdmi driver fails to probe: [ 2.540030] dwhdmi-imx 120000.hdmi: Unsupported HDMI controller (0000:00:00) [ 2.548199] imx-drm display-subsystem: failed to bind 120000.hdmi (ops dw_hdmi_imx_ops): -19 [ 2.557403] imx-drm display-subsystem: master bind failed: -19 That's because hdmi_isfr's parent, video_27m, is not correctly ungated. As explained in commit 5ccc248cc537 ("ARM: imx6q: clk: Add support for mipi_core_cfg clock as a shared clock gate"), video_27m is gated by CCM_CCGR3[CG8]. On i.MX6 SoCs with VPU, the hdmi is working thanks to the CCM_CMEOR[mod_en_ov_vpu] bit which makes the video_27m ungated whatever is in CCM_CCGR3[CG8]. The issue can be reproduced by setting CCMEOR[mod_en_ov_vpu] to 0. Make the HDMI work in every case by setting hdmi_isfr's parent to mipi_core_cfg. Signed-off-by: Sébastien Szymanski Reviewed-by: Fabio Estevam Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index ce8ea10407e4..93a19667003d 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -487,7 +487,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu2d_core_podf", base + 0x6c, 24); clk[IMX6QDL_CLK_GPU3D_CORE] = imx_clk_gate2("gpu3d_core", "gpu3d_core_podf", base + 0x6c, 26); clk[IMX6QDL_CLK_HDMI_IAHB] = imx_clk_gate2("hdmi_iahb", "ahb", base + 0x70, 0); - clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "video_27m", base + 0x70, 4); + clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "mipi_core_cfg", base + 0x70, 4); clk[IMX6QDL_CLK_I2C1] = imx_clk_gate2("i2c1", "ipg_per", base + 0x70, 6); clk[IMX6QDL_CLK_I2C2] = imx_clk_gate2("i2c2", "ipg_per", base + 0x70, 8); clk[IMX6QDL_CLK_I2C3] = imx_clk_gate2("i2c3", "ipg_per", base + 0x70, 10); -- GitLab From 54809e38a6298fde41c0b72c2423f5d7c9e847fe Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 1 Sep 2017 08:47:14 +0800 Subject: [PATCH 0488/1398] clk: hi6220: mark clock cs_atb_syspll as critical [ Upstream commit d2a3671ebe6479483a12f94fcca63c058d95ad64 ] Clock cs_atb_syspll is pll used for coresight trace bus; when clock cs_atb_syspll is disabled and operates its child clock node cs_atb results in system hang. So mark clock cs_atb_syspll as critical to keep it enabled. Cc: Guodong Xu Cc: Zhangfei Gao Cc: Haojian Zhuang Signed-off-by: Leo Yan Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/1504226835-2115-2-git-send-email-leo.yan@linaro.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/hisilicon/clk-hi6220.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c index c0e8e1f196aa..2bfaf22e6ffc 100644 --- a/drivers/clk/hisilicon/clk-hi6220.c +++ b/drivers/clk/hisilicon/clk-hi6220.c @@ -144,7 +144,7 @@ static struct hisi_gate_clock hi6220_separated_gate_clks_sys[] __initdata = { { HI6220_BBPPLL_SEL, "bbppll_sel", "pll0_bbp_gate", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 9, 0, }, { HI6220_MEDIA_PLL_SRC, "media_pll_src", "pll_media_gate", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 10, 0, }, { HI6220_MMC2_SEL, "mmc2_sel", "mmc2_mux1", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 11, 0, }, - { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 12, 0, }, + { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IS_CRITICAL, 0x270, 12, 0, }, }; static struct hisi_mux_clock hi6220_mux_clks_sys[] __initdata = { -- GitLab From 27f5597c98590cdd01f2c9bebbf36cb4bce28253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 19 Sep 2017 04:48:10 +0200 Subject: [PATCH 0489/1398] clk: tegra: Fix cclk_lp divisor register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 54eff2264d3e9fd7e3987de1d7eba1d3581c631e ] According to comments in code and common sense, cclk_lp uses its own divisor, not cclk_g's. Fixes: b08e8c0ecc42 ("clk: tegra: add clock support for Tegra30") Signed-off-by: Michał Mirosław Acked-By: Peter De Schrijver Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/tegra/clk-tegra30.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 8e2db5ead8da..af520d81525f 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -963,7 +963,7 @@ static void __init tegra30_super_clk_init(void) * U71 divider of cclk_lp. */ clk = tegra_clk_register_divider("pll_p_out3_cclklp", "pll_p_out3", - clk_base + SUPER_CCLKG_DIVIDER, 0, + clk_base + SUPER_CCLKLP_DIVIDER, 0, TEGRA_DIVIDER_INT, 16, 8, 1, NULL); clk_register_clkdev(clk, "pll_p_out3_cclklp", NULL); -- GitLab From cf16dac8bd9868cf22203ac498e8e997ad7b0ca1 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 31 Oct 2017 18:25:37 +0800 Subject: [PATCH 0490/1398] ppp: Destroy the mutex when cleanup [ Upstream commit f02b2320b27c16b644691267ee3b5c110846f49e ] The mutex_destroy only makes sense when enable DEBUG_MUTEX. For the good readbility, it's better to invoke it in exit func when the init func invokes mutex_init. Signed-off-by: Gao Feng Acked-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 440d5f42810f..b883af93929c 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -958,6 +958,7 @@ static __net_exit void ppp_exit_net(struct net *net) unregister_netdevice_many(&list); rtnl_unlock(); + mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); } -- GitLab From 019433db872f9032cabd2a4a0fbd3df2bb14f539 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 1 Nov 2017 07:16:58 +0000 Subject: [PATCH 0491/1398] ASoC: rsnd: rsnd_ssi_run_mods() needs to care ssi_parent_mod [ Upstream commit 21781e87881f9c420871b1d1f3f29d4cd7bffb10 ] SSI parent mod might be NULL. ssi_parent_mod() needs to care about it. Otherwise, it uses negative shift. This patch fixes it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/ssi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 6cb6db005fc4..560cf4b51a99 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -172,10 +172,15 @@ static u32 rsnd_ssi_run_mods(struct rsnd_dai_stream *io) { struct rsnd_mod *ssi_mod = rsnd_io_to_mod_ssi(io); struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); + u32 mods; - return rsnd_ssi_multi_slaves_runtime(io) | - 1 << rsnd_mod_id(ssi_mod) | - 1 << rsnd_mod_id(ssi_parent_mod); + mods = rsnd_ssi_multi_slaves_runtime(io) | + 1 << rsnd_mod_id(ssi_mod); + + if (ssi_parent_mod) + mods |= 1 << rsnd_mod_id(ssi_parent_mod); + + return mods; } u32 rsnd_ssi_multi_slaves_runtime(struct rsnd_dai_stream *io) -- GitLab From d8914530f247900885850af7cccf81a2b33907b9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:58 +0200 Subject: [PATCH 0492/1398] thermal/drivers/step_wise: Fix temperature regulation misbehavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 07209fcf33542c1ff1e29df2dbdf8f29cdaacb10 ] There is a particular situation when the cooling device is cpufreq and the heat dissipation is not efficient enough where the temperature increases little by little until reaching the critical threshold and leading to a SoC reset. The behavior is reproducible on a hikey6220 with bad heat dissipation (eg. stacked with other boards). Running a simple C program doing while(1); for each CPU of the SoC makes the temperature to reach the passive regulation trip point and ends up to the maximum allowed temperature followed by a reset. This issue has been also reported by running the libhugetlbfs test suite. What is observed is a ping pong between two cpu frequencies, 1.2GHz and 900MHz while the temperature continues to grow. It appears the step wise governor calls get_target_state() the first time with the throttle set to true and the trend to 'raising'. The code selects logically the next state, so the cpu frequency decreases from 1.2GHz to 900MHz, so far so good. The temperature decreases immediately but still stays greater than the trip point, then get_target_state() is called again, this time with the throttle set to true *and* the trend to 'dropping'. From there the algorithm assumes we have to step down the state and the cpu frequency jumps back to 1.2GHz. But the temperature is still higher than the trip point, so get_target_state() is called with throttle=1 and trend='raising' again, we jump to 900MHz, then get_target_state() is called with throttle=1 and trend='dropping', we jump to 1.2GHz, etc ... but the temperature does not stabilizes and continues to increase. [ 237.922654] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=1,throttle=1 [ 237.922678] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=1,throttle=1 [ 237.922690] thermal cooling_device0: cur_state=0 [ 237.922701] thermal cooling_device0: old_target=0, target=1 [ 238.026656] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=2,throttle=1 [ 238.026680] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=2,throttle=1 [ 238.026694] thermal cooling_device0: cur_state=1 [ 238.026707] thermal cooling_device0: old_target=1, target=0 [ 238.134647] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=1,throttle=1 [ 238.134667] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=1,throttle=1 [ 238.134679] thermal cooling_device0: cur_state=0 [ 238.134690] thermal cooling_device0: old_target=0, target=1 In this situation the temperature continues to increase while the trend is oscillating between 'dropping' and 'raising'. We need to keep the current state untouched if the throttle is set, so the temperature can decrease or a higher state could be selected, thus preventing this oscillation. Keeping the next_target untouched when 'throttle' is true at 'dropping' time fixes the issue. The following traces show the governor does not change the next state if trend==2 (dropping) and throttle==1. [ 2306.127987] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=1,throttle=1 [ 2306.128009] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=1,throttle=1 [ 2306.128021] thermal cooling_device0: cur_state=0 [ 2306.128031] thermal cooling_device0: old_target=0, target=1 [ 2306.231991] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=2,throttle=1 [ 2306.232016] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=2,throttle=1 [ 2306.232030] thermal cooling_device0: cur_state=1 [ 2306.232042] thermal cooling_device0: old_target=1, target=1 [ 2306.335982] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=0,throttle=1 [ 2306.336006] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=0,throttle=1 [ 2306.336021] thermal cooling_device0: cur_state=1 [ 2306.336034] thermal cooling_device0: old_target=1, target=1 [ 2306.439984] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=2,throttle=1 [ 2306.440008] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=2,throttle=0 [ 2306.440022] thermal cooling_device0: cur_state=1 [ 2306.440034] thermal cooling_device0: old_target=1, target=0 [ ... ] After a while, if the temperature continues to increase, the next state becomes 2 which is 720MHz on the hikey. That results in the temperature stabilizing around the trip point. [ 2455.831982] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=1,throttle=1 [ 2455.832006] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=1,throttle=0 [ 2455.832019] thermal cooling_device0: cur_state=1 [ 2455.832032] thermal cooling_device0: old_target=1, target=1 [ 2455.935985] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=0,throttle=1 [ 2455.936013] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=0,throttle=0 [ 2455.936027] thermal cooling_device0: cur_state=1 [ 2455.936040] thermal cooling_device0: old_target=1, target=1 [ 2456.043984] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=0,throttle=1 [ 2456.044009] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=0,throttle=0 [ 2456.044023] thermal cooling_device0: cur_state=1 [ 2456.044036] thermal cooling_device0: old_target=1, target=1 [ 2456.148001] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=1,throttle=1 [ 2456.148028] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=1,throttle=1 [ 2456.148042] thermal cooling_device0: cur_state=1 [ 2456.148055] thermal cooling_device0: old_target=1, target=2 [ 2456.252009] thermal thermal_zone0: Trip0[type=1,temp=65000]:trend=2,throttle=1 [ 2456.252041] thermal thermal_zone0: Trip1[type=1,temp=75000]:trend=2,throttle=0 [ 2456.252058] thermal cooling_device0: cur_state=2 [ 2456.252075] thermal cooling_device0: old_target=2, target=1 IOW, this change is needed to keep the state for a cooling device if the temperature trend is oscillating while the temperature increases slightly. Without this change, the situation above leads to a catastrophic crash by a hardware reset on hikey. This issue has been reported to happen on an OMAP dra7xx also. Signed-off-by: Daniel Lezcano Cc: Keerthy Cc: John Stultz Cc: Leo Yan Tested-by: Keerthy Reviewed-by: Keerthy Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/step_wise.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index bcef2e7c4ec9..1eea63caa451 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -31,8 +31,7 @@ * If the temperature is higher than a trip point, * a. if the trend is THERMAL_TREND_RAISING, use higher cooling * state for this trip point - * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling - * state for this trip point + * b. if the trend is THERMAL_TREND_DROPPING, do nothing * c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit * for this trip point * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit @@ -94,9 +93,11 @@ static unsigned long get_target_state(struct thermal_instance *instance, if (!throttle) next_target = THERMAL_NO_TARGET; } else { - next_target = cur_state - 1; - if (next_target > instance->upper) - next_target = instance->upper; + if (!throttle) { + next_target = cur_state - 1; + if (next_target > instance->upper) + next_target = instance->upper; + } } break; case THERMAL_TREND_DROP_FULL: -- GitLab From 026ffaf65879b3ecc12ffa9c45dfa5894370bd60 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sun, 29 Oct 2017 10:47:19 -0400 Subject: [PATCH 0493/1398] scsi: scsi_debug: write_same: fix error report [ Upstream commit e33d7c56450b0a5c7290cbf9e1581fab5174f552 ] The scsi_debug driver incorrectly suggests there is an error with the SCSI WRITE SAME command when the number_of_logical_blocks is greater than 1. It will also suggest there is an error when NDOB (no data-out buffer) is set and the number_of_logical_blocks is greater than 0. Both are valid, fix. Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index cf04a364fd8b..2b0e61557317 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2996,11 +2996,11 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, if (-1 == ret) { write_unlock_irqrestore(&atomic_rw, iflags); return DID_ERROR << 16; - } else if (sdebug_verbose && (ret < (num * sdebug_sector_size))) + } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size)) sdev_printk(KERN_INFO, scp->device, - "%s: %s: cdb indicated=%u, IO sent=%d bytes\n", + "%s: %s: lb size=%u, IO sent=%d bytes\n", my_name, "write same", - num * sdebug_sector_size, ret); + sdebug_sector_size, ret); /* Copy first sector to remaining blocks */ for (i = 1 ; i < num ; i++) -- GitLab From 75f66eeae657688f1e600214ae297422834d3c43 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 20 Sep 2017 08:30:04 -0500 Subject: [PATCH 0494/1398] GFS2: Take inode off order_write list when setting jdata flag [ Upstream commit cc555b09d8c3817aeebda43a14ab67049a5653f7 ] This patch fixes a deadlock caused when the jdata flag is set for inodes that are already on the ordered write list. Since it is on the ordered write list, log_flush calls gfs2_ordered_write which calls filemap_fdatawrite. But since the inode had the jdata flag set, that calls gfs2_jdata_writepages, which tries to start a new transaction. A new transaction cannot be started because it tries to acquire the log_flush rwsem which is already locked by the log flush operation. The bottom line is: We cannot switch an inode from ordered to jdata until we eliminate any ordered data pages (via log flush) or any log_flush operation afterward will create the circular dependency above. So we need to flush the log before setting the diskflags to switch the file mode, then we need to remove the inode from the ordered writes list. Before this patch, the log flush was done for jdata->ordered, but that's wrong. If we're going from jdata to ordered, we don't need to call gfs2_log_flush because the call to filemap_fdatawrite will do it for us: filemap_fdatawrite() -> __filemap_fdatawrite_range() __filemap_fdatawrite_range() -> do_writepages() do_writepages() -> gfs2_jdata_writepages() gfs2_jdata_writepages() -> gfs2_log_flush() This patch modifies function do_gfs2_set_flags so that if a file has its jdata flag set, and it's already on the ordered write list, the log will be flushed and it will be removed from the list before setting the flag. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Acked-by: Abhijith Das Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e23ff70b3435..39c382f16272 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out; } if ((flags ^ new_flags) & GFS2_DIF_JDATA) { - if (flags & GFS2_DIF_JDATA) + if (new_flags & GFS2_DIF_JDATA) gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); error = filemap_fdatawrite(inode->i_mapping); if (error) @@ -264,6 +264,8 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) error = filemap_fdatawait(inode->i_mapping); if (error) goto out; + if (new_flags & GFS2_DIF_JDATA) + gfs2_ordered_del_inode(ip); } error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) -- GitLab From c2a0531f59c363fd5b50be46b97248091b13484b Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Mon, 30 Oct 2017 14:46:35 -0700 Subject: [PATCH 0495/1398] bcache: explicitly destroy mutex while exiting [ Upstream commit 330a4db89d39a6b43f36da16824eaa7a7509d34d ] mutex_destroy does nothing most of time, but it's better to call it to make the code future proof and it also has some meaning for like mutex debug. As Coly pointed out in a previous review, bcache_exit() may not be able to handle all the references properly if userspace registers cache and backing devices right before bch_debug_init runs and bch_debug_init failes later. So not exposing userspace interface until everything is ready to avoid that issue. Signed-off-by: Liang Chen Reviewed-by: Michael Lyle Reviewed-by: Coly Li Reviewed-by: Eric Wheeler Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f4557f558b24..28ce342348a9 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2091,6 +2091,7 @@ static void bcache_exit(void) if (bcache_major) unregister_blkdev(bcache_major, "bcache"); unregister_reboot_notifier(&reboot); + mutex_destroy(&bch_register_lock); } static int __init bcache_init(void) @@ -2109,14 +2110,15 @@ static int __init bcache_init(void) bcache_major = register_blkdev(0, "bcache"); if (bcache_major < 0) { unregister_reboot_notifier(&reboot); + mutex_destroy(&bch_register_lock); return bcache_major; } if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || - sysfs_create_files(bcache_kobj, files) || bch_request_init() || - bch_debug_init(bcache_kobj)) + bch_debug_init(bcache_kobj) || + sysfs_create_files(bcache_kobj, files)) goto err; return 0; -- GitLab From 9102ed6a5f6a53434f69264ac8457994fde14a88 Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Mon, 30 Oct 2017 14:46:34 -0700 Subject: [PATCH 0496/1398] bcache: fix wrong cache_misses statistics [ Upstream commit c157313791a999646901b3e3c6888514ebc36d62 ] Currently, Cache missed IOs are identified by s->cache_miss, but actually, there are many situations that missed IOs are not assigned a value for s->cache_miss in cached_dev_cache_miss(), for example, a bypassed IO (s->iop.bypass = 1), or the cache_bio allocate failed. In these situations, it will go to out_put or out_submit, and s->cache_miss is null, which leads bch_mark_cache_accounting() to treat this IO as a hit IO. [ML: applied by 3-way merge] Signed-off-by: tang.junhui Reviewed-by: Michael Lyle Reviewed-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ab8a1b36af21..edb8d1a1a69f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -468,6 +468,7 @@ struct search { unsigned recoverable:1; unsigned write:1; unsigned read_dirty_data:1; + unsigned cache_missed:1; unsigned long start_time; @@ -653,6 +654,7 @@ static inline struct search *search_alloc(struct bio *bio, s->orig_bio = bio; s->cache_miss = NULL; + s->cache_missed = 0; s->d = d; s->recoverable = 1; s->write = op_is_write(bio_op(bio)); @@ -771,7 +773,7 @@ static void cached_dev_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s->iop.c, s->d, - !s->cache_miss, s->iop.bypass); + !s->cache_missed, s->iop.bypass); trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); if (s->iop.error) @@ -790,6 +792,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); struct bio *miss, *cache_bio; + s->cache_missed = 1; + if (s->cache_miss || s->iop.bypass) { miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); ret = miss == bio ? MAP_DONE : MAP_CONTINUE; -- GitLab From 52aaa748a948a1bd50fa2ef0d2b9ffad11b0e672 Mon Sep 17 00:00:00 2001 From: Patel Jay P Date: Mon, 23 Oct 2017 06:05:53 -0700 Subject: [PATCH 0497/1398] Ib/hfi1: Return actual operational VLs in port info query [ Upstream commit 00f9203119dd2774564407c7a67b17d81916298b ] __subn_get_opa_portinfo stores value returned by hfi1_get_ib_cfg() as operational vls. hfi1_get_ib_cfg() returns vls_operational field in hfi1_pportdata. The problem with this is that the value is always equal to vls_supported field in hfi1_pportdata. The logic to calculate operational_vls is to set value passed by FM (in __subn_set_opa_portinfo routine). If no value is passed then default value is stored in operational_vls. Field actual_vls_operational is calculated on the basis of buffer control table. Hence, modifying hfi1_get_ib_cfg() to return actual_operational_vls when used with HFI1_IB_CFG_OP_VLS parameter Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Patel Jay P Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 24d0820873cf..4682909b021b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9769,7 +9769,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) goto unimplemented; case HFI1_IB_CFG_OP_VLS: - val = ppd->vls_operational; + val = ppd->actual_vls_operational; break; case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */ val = VL_ARB_HIGH_PRIO_TABLE_SIZE; -- GitLab From b7ada2c0ea291e7bf22b51745afd9f1ccfc91292 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 27 Oct 2017 09:33:41 -0700 Subject: [PATCH 0498/1398] arm64: prevent regressions in compressed kernel image size when upgrading to binutils 2.27 [ Upstream commit fd9dde6abcb9bfe6c6bee48834e157999f113971 ] Upon upgrading to binutils 2.27, we found that our lz4 and gzip compressed kernel images were significantly larger, resulting is 10ms boot time regressions. As noted by Rahul: "aarch64 binaries uses RELA relocations, where each relocation entry includes an addend value. This is similar to x86_64. On x86_64, the addend values are also stored at the relocation offset for relative relocations. This is an optimization: in the case where code does not need to be relocated, the loader can simply skip processing relative relocations. In binutils-2.25, both bfd and gold linkers did this for x86_64, but only the gold linker did this for aarch64. The kernel build here is using the bfd linker, which stored zeroes at the relocation offsets for relative relocations. Since a set of zeroes compresses better than a set of non-zero addend values, this behavior was resulting in much better lz4 compression. The bfd linker in binutils-2.27 is now storing the actual addend values at the relocation offsets. The behavior is now consistent with what it does for x86_64 and what gold linker does for both architectures. The change happened in this upstream commit: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=1f56df9d0d5ad89806c24e71f296576d82344613 Since a bunch of zeroes got replaced by non-zero addend values, we see the side effect of lz4 compressed image being a bit bigger. To get the old behavior from the bfd linker, "--no-apply-dynamic-relocs" flag can be used: $ LDFLAGS="--no-apply-dynamic-relocs" make With this flag, the compressed image size is back to what it was with binutils-2.25. If the kernel is using ASLR, there aren't additional runtime costs to --no-apply-dynamic-relocs, as the relocations will need to be applied again anyway after the kernel is relocated to a random address. If the kernel is not using ASLR, then presumably the current default behavior of the linker is better. Since the static linker performed the dynamic relocs, and the kernel is not moved to a different address at load time, it can skip applying the relocations all over again." Some measurements: $ ld -v GNU ld (binutils-2.25-f3d35cf6) 2.25.51.20141117 ^ $ ls -l vmlinux -rwxr-x--- 1 ndesaulniers eng 300652760 Oct 26 11:57 vmlinux $ ls -l Image.lz4-dtb -rw-r----- 1 ndesaulniers eng 16932627 Oct 26 11:57 Image.lz4-dtb $ ld -v GNU ld (binutils-2.27-53dd00a1) 2.27.0.20170315 ^ pre patch: $ ls -l vmlinux -rwxr-x--- 1 ndesaulniers eng 300376208 Oct 26 11:43 vmlinux $ ls -l Image.lz4-dtb -rw-r----- 1 ndesaulniers eng 18159474 Oct 26 11:43 Image.lz4-dtb post patch: $ ls -l vmlinux -rwxr-x--- 1 ndesaulniers eng 300376208 Oct 26 12:06 vmlinux $ ls -l Image.lz4-dtb -rw-r----- 1 ndesaulniers eng 16932466 Oct 26 12:06 Image.lz4-dtb By Siqi's measurement w/ gzip: binutils 2.27 with this patch (with --no-apply-dynamic-relocs): Image 41535488 Image.gz 13404067 binutils 2.27 without this patch (without --no-apply-dynamic-relocs): Image 41535488 Image.gz 14125516 Any compression scheme should be able to get better results from the longer runs of zeros, not just GZIP and LZ4. 10ms boot time savings isn't anything to get excited about, but users of arm64+compression+bfd-2.27 should not have to pay a penalty for no runtime improvement. Reported-by: Gopinath Elanchezhian Reported-by: Sindhuri Pentyala Reported-by: Wei Wang Suggested-by: Ard Biesheuvel Suggested-by: Rahul Chaudhry Suggested-by: Siqi Lin Suggested-by: Stephen Hines Signed-off-by: Nick Desaulniers Reviewed-by: Ard Biesheuvel [will: added comment to Makefile] Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 3635b8662724..92110c2c6c59 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -14,8 +14,12 @@ LDFLAGS_vmlinux :=-p --no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 -ifneq ($(CONFIG_RELOCATABLE),) -LDFLAGS_vmlinux += -pie -shared -Bsymbolic +ifeq ($(CONFIG_RELOCATABLE), y) +# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour +# for relative relocs, since this leads to better Image compression +# with the relocation offsets always being zero. +LDFLAGS_vmlinux += -pie -shared -Bsymbolic \ + $(call ld-option, --no-apply-dynamic-relocs) endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) -- GitLab From 7fab68e1f7308b2b7e3ebdd2483eb7a9572ccec5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 10 Sep 2017 13:19:38 +0200 Subject: [PATCH 0499/1398] btrfs: tests: Fix a memory leak in error handling path in 'run_test()' [ Upstream commit 9ca2e97fa3c3216200afe35a3b111ec51cc796d2 ] If 'btrfs_alloc_path()' fails, we must free the resources already allocated, as done in the other error handling paths in this function. Signed-off-by: Christophe JAILLET Reviewed-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tests/free-space-tree-tests.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 6e144048a72e..a724d9a79bd2 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -501,7 +501,8 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, path = btrfs_alloc_path(); if (!path) { test_msg("Couldn't allocate path\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } ret = add_block_group_free_space(&trans, root->fs_info, cache); -- GitLab From e2fce5a5578d175cd61822473228ab454782bec7 Mon Sep 17 00:00:00 2001 From: Osama Khan Date: Sat, 21 Oct 2017 10:42:21 +0000 Subject: [PATCH 0500/1398] platform/x86: hp_accel: Add quirk for HP ProBook 440 G4 [ Upstream commit 163ca80013aafb6dc9cb295de3db7aeab9ab43f8 ] Added support for HP ProBook 440 G4 laptops by including the accelerometer orientation quirk for that device. Testing was performed based on the axis orientation guidelines here: https://www.kernel.org/doc/Documentation/misc-devices/lis3lv02d which states "If the left side is elevated, X increases (becomes positive)". When tested, on lifting the left edge, x values became increasingly negative thus indicating an inverted x-axis on the installed lis3lv02d chip. This was compensated by adding an entry for this device in hp_accel.c specifying the quirk as x_inverted. The patch was tested on a ProBook 440 G4 device and x-axis as well as y and z-axis values are now generated as per spec. Signed-off-by: Osama Khan Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp_accel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 09356684c32f..abd9d83f6009 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -240,6 +240,7 @@ static const struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HDX18", "HP HDX 18", x_inverted), AXIS_DMI_MATCH("HPB432x", "HP ProBook 432", xy_rotated_left), AXIS_DMI_MATCH("HPB440G3", "HP ProBook 440 G3", x_inverted_usd), + AXIS_DMI_MATCH("HPB440G4", "HP ProBook 440 G4", x_inverted), AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left), AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted), AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap), -- GitLab From 6a559523ee6626d191be1da9031845c368e4a6a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:20:01 +0200 Subject: [PATCH 0501/1398] nvme: use kref_get_unless_zero in nvme_find_get_ns [ Upstream commit 2dd4122854f697afc777582d18548dded03ce5dd ] For kref_get_unless_zero to protect against lookup vs free races we need to use it in all places where we aren't guaranteed to already hold a reference. There is no such guarantee in nvme_find_get_ns, so switch to kref_get_unless_zero in this function. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fbeca065f18c..719ee5fb2626 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1619,7 +1619,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->ns_id == nsid) { - kref_get(&ns->kref); + if (!kref_get_unless_zero(&ns->kref)) + continue; ret = ns; break; } -- GitLab From fc4177eacfa6e95a243402790b362736879f1cfd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 25 Oct 2017 15:57:55 +0200 Subject: [PATCH 0502/1398] l2tp: cleanup l2tp_tunnel_delete calls [ Upstream commit 4dc12ffeaeac939097a3f55c881d3dc3523dff0c ] l2tp_tunnel_delete does not return anything since commit 62b982eeb458 ("l2tp: fix race condition in l2tp_tunnel_delete"). But call sites of l2tp_tunnel_delete still do casts to void to avoid unused return value warnings. Kill these now useless casts. Signed-off-by: Jiri Slaby Cc: Sabrina Dubroca Cc: Guillaume Nault Cc: David S. Miller Cc: netdev@vger.kernel.org Acked-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 2 +- net/l2tp/l2tp_netlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b06acd0f400d..cfc4dd8997e5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1944,7 +1944,7 @@ static __net_exit void l2tp_exit_net(struct net *net) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - (void)l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 1ccd310d01a5..ee03bc866d1b 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -287,7 +287,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_DELETE); - (void) l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); out: return ret; -- GitLab From c82209949bba864177300a681e0c736f982c87ab Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 26 Oct 2017 09:31:16 -0700 Subject: [PATCH 0503/1398] xfs: fix log block underflow during recovery cycle verification [ Upstream commit 9f2a4505800607e537e9dd9dea4f55c4b0c30c7a ] It is possible for mkfs to format very small filesystems with too small of an internal log with respect to the various minimum size and block count requirements. If this occurs when the log happens to be smaller than the scan window used for cycle verification and the scan wraps the end of the log, the start_blk calculation in xlog_find_head() underflows and leads to an attempt to scan an invalid range of log blocks. This results in log recovery failure and a failed mount. Since there may be filesystems out in the wild with this kind of geometry, we cannot simply refuse to mount. Instead, cap the scan window for cycle verification to the size of the physical log. This ensures that the cycle verification proceeds as expected when the scan wraps the end of the log. Reported-by: Zorro Lang Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 05909269f973..1e26f4504eed 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -753,7 +753,7 @@ xlog_find_head( * in the in-core log. The following number can be made tighter if * we actually look at the block size of the filesystem. */ - num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); + num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log)); if (head_blk >= num_scan_bblks) { /* * We are guaranteed that the entire check can be performed -- GitLab From 769bca9339f0775aa2985d4cae9be0e0d72092af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Oct 2017 14:16:19 -0700 Subject: [PATCH 0504/1398] xfs: fix incorrect extent state in xfs_bmap_add_extent_unwritten_real [ Upstream commit 5e422f5e4fd71d18bc6b851eeb3864477b3d842e ] There was one spot in xfs_bmap_add_extent_unwritten_real that didn't use the passed in new extent state but always converted to normal, leading to wrong behavior when converting from normal to unwritten. Only found by code inspection, it seems like this code path to move partial extent from written to unwritten while merging it with the next extent is rarely exercised. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7eb99701054f..8ad65d43b65d 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2713,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real( &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; + cur->bc_rec.b.br_state = new->br_state; if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); -- GitLab From 8f84f861f99c44f03f5ed57b40e3bfe951cbae3a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 07:41:11 +0300 Subject: [PATCH 0505/1398] RDMA/cxgb4: Declare stag as __be32 [ Upstream commit 35fb2a88ed4b77356fa679a8525c869a3594e287 ] The scqe.stag is actually __b32, fix it. drivers/infiniband/hw/cxgb4/cq.c:754:52: warning: cast to restricted __be32 Cc: Steve Wise Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/t4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 862381aa83c8..b55adf53c758 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -171,7 +171,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 stag; + __be32 stag; u16 nada2; u16 cidx; } scqe; -- GitLab From 237e053346f1b049ca9f571040c51c61f0740766 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 11 Oct 2017 15:35:56 -0600 Subject: [PATCH 0506/1398] PCI: Detach driver before procfs & sysfs teardown on device remove [ Upstream commit 16b6c8bb687cc3bec914de09061fcb8411951fda ] When removing a device, for example a VF being removed due to SR-IOV teardown, a "soft" hot-unplug via 'echo 1 > remove' in sysfs, or an actual hot-unplug, we first remove the procfs and sysfs attributes for the device before attempting to release the device from any driver bound to it. Unbinding the driver from the device can take time. The device might need to write out data or it might be actively in use. If it's in use by userspace through a vfio driver, the unbind might block until the user releases the device. This leads to a potentially non-trivial amount of time where the device exists, but we've torn down the interfaces that userspace uses to examine devices, for instance lspci might generate this sort of error: pcilib: Cannot open /sys/bus/pci/devices/0000:01:0a.3/config lspci: Unable to read the standard configuration space header of device 0000:01:0a.3 We don't seem to have any dependence on this teardown ordering in the kernel, so let's unbind the driver first, which is also more symmetric with the instantiation of the device in pci_bus_add_device(). Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/remove.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index f9357e09e9b3..b6b9b5b74e30 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -19,9 +19,9 @@ static void pci_stop_dev(struct pci_dev *dev) pci_pme_active(dev, false); if (dev->is_added) { + device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); - device_release_driver(&dev->dev); dev->is_added = 0; } -- GitLab From 1723d6668df5107c6026cbbf7afd2c615d74e006 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Fri, 20 Oct 2017 16:51:14 -0500 Subject: [PATCH 0507/1398] scsi: hpsa: cleanup sas_phy structures in sysfs when unloading [ Upstream commit 55ca38b4255bb336c2d35990bdb2b368e19b435a ] I am resubmitting this patch on behalf of Martin Wilck with his permission. The original patch can be found here: https://www.spinics.net/lists/linux-scsi/msg102083.html This patch did not help until Hannes's commit 9441284fbc39 ("scsi-fixup-kernel-warning-during-rmmod") was applied to the kernel. -------------------------------------- Original patch description from Martin: -------------------------------------- When the hpsa module is unloaded using rmmod, dangling symlinks remain under /sys/class/sas_phy. Fix this by calling sas_phy_delete() rather than sas_phy_free (which, according to comments, should not be called for PHYs that have been set up successfully, anyway). Tested-by: Don Brace Reviewed-by: Don Brace Signed-off-by: Martin Wilck Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 11a9ffcf9113..bbe651fb10f9 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -9632,9 +9632,9 @@ static void hpsa_free_sas_phy(struct hpsa_sas_phy *hpsa_sas_phy) struct sas_phy *phy = hpsa_sas_phy->phy; sas_port_delete_phy(hpsa_sas_phy->parent_port->port, phy); - sas_phy_free(phy); if (hpsa_sas_phy->added_to_port) list_del(&hpsa_sas_phy->phy_list_entry); + sas_phy_delete(phy); kfree(hpsa_sas_phy); } -- GitLab From ab9d2571149322120efcf3d6ce9c24003c946ca1 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Fri, 20 Oct 2017 16:51:08 -0500 Subject: [PATCH 0508/1398] scsi: hpsa: destroy sas transport properties before scsi_host [ Upstream commit dfb2e6f46b3074eb85203d8f0888b71ec1c2e37a ] This patch cleans up a lot of warnings when unloading the driver. A current example of the stack trace starts with: [ 142.570715] sysfs group 'power' not found for kobject 'port-5:0' There can be hundreds of these messages during a driver unload. I am resubmitting this patch on behalf of Martin Wilck with his permission. His original patch can be found here: https://www.spinics.net/lists/linux-scsi/msg102085.html This patch did not help until Hannes's commit 9441284fbc39 ("scsi-fixup-kernel-warning-during-rmmod") was applied to the kernel. --------------------------- Original patch description: --------------------------- Unloading the hpsa driver causes warnings [ 1063.793652] WARNING: CPU: 1 PID: 4850 at ../fs/sysfs/group.c:237 device_del+0x54/0x240() [ 1063.793659] sysfs group ffffffff81cf21a0 not found for kobject 'port-2:0' with two different stacks: 1) [ 1063.793774] [] device_del+0x54/0x240 [ 1063.793780] [] transport_remove_classdev+0x4a/0x60 [ 1063.793784] [] attribute_container_device_trigger+0xa6/0xb0 [ 1063.793802] [] sas_port_delete+0x126/0x160 [scsi_transport_sas] [ 1063.793819] [] hpsa_free_sas_port+0x3c/0x70 [hpsa] 2) [ 1063.797103] [] device_del+0x54/0x240 [ 1063.797118] [] sas_port_delete+0x12e/0x160 [scsi_transport_sas] [ 1063.797134] [] hpsa_free_sas_port+0x3c/0x70 [hpsa] This is caused by the fact that host device hostX is deleted before the SAS transport devices hostX/port-a:b. This patch fixes this by reverting the order of device deletions. Tested-by: Don Brace Reviewed-by: Don Brace Signed-off-by: Martin Wilck Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index bbe651fb10f9..99623701fc3d 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -9105,6 +9105,8 @@ static void hpsa_remove_one(struct pci_dev *pdev) destroy_workqueue(h->rescan_ctlr_wq); destroy_workqueue(h->resubmit_wq); + hpsa_delete_sas_host(h); + /* * Call before disabling interrupts. * scsi_remove_host can trigger I/O operations especially @@ -9139,8 +9141,6 @@ static void hpsa_remove_one(struct pci_dev *pdev) h->lockup_detected = NULL; /* init_one 2 */ /* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */ - hpsa_delete_sas_host(h); - kfree(h); /* init_one 1 */ } -- GitLab From 7745382fe86ce5354dc459948c086d3b5778fb9d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 9 Oct 2017 21:52:44 +1100 Subject: [PATCH 0509/1398] powerpc/perf/hv-24x7: Fix incorrect comparison in memord [ Upstream commit 05c14c03138532a3cb2aa29c2960445c8753343b ] In the hv-24x7 code there is a function memord() which tries to implement a sort function return -1, 0, 1. However one of the conditions is incorrect, such that it can never be true, because we will have already returned. I don't believe there is a bug in practice though, because the comparisons are an optimisation prior to calling memcmp(). Fix it by swapping the second comparision, so it can be true. Reported-by: David Binderman Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/hv-24x7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 7b2ca16b1eb4..991c6a517ddc 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -516,7 +516,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2) { if (s1 < s2) return 1; - if (s2 > s1) + if (s1 > s2) return -1; return memcmp(d1, d2, s1); -- GitLab From 421910e924b63f2ccd4d433c77c5cd88063546ba Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Sat, 21 Oct 2017 10:17:47 +0200 Subject: [PATCH 0510/1398] soc: mediatek: pwrap: fix compiler errors [ Upstream commit fb2c1934f30577756e55e24e8870b45c78da3bc2 ] When compiling using sparse, we got the following error: drivers/soc/mediatek/mtk-pmic-wrap.c:686:25: error: dubious one-bit signed bitfield Changing the data type to unsigned fixes this. Signed-off-by: Matthias Brugger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/soc/mediatek/mtk-pmic-wrap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c b/drivers/soc/mediatek/mtk-pmic-wrap.c index a5f10936fb9c..e929f5142862 100644 --- a/drivers/soc/mediatek/mtk-pmic-wrap.c +++ b/drivers/soc/mediatek/mtk-pmic-wrap.c @@ -522,7 +522,7 @@ struct pmic_wrapper_type { u32 int_en_all; u32 spi_w; u32 wdt_src; - int has_bridge:1; + unsigned int has_bridge:1; int (*init_reg_clock)(struct pmic_wrapper *wrp); int (*init_soc_specific)(struct pmic_wrapper *wrp); }; -- GitLab From 92ad6c13e17ebea6175b978dc0f5b485e7e68cf1 Mon Sep 17 00:00:00 2001 From: nixiaoming Date: Fri, 15 Sep 2017 17:45:56 +0800 Subject: [PATCH 0511/1398] tty fix oops when rmmod 8250 [ Upstream commit c79dde629d2027ca80329c62854a7635e623d527 ] After rmmod 8250.ko tty_kref_put starts kwork (release_one_tty) to release proc interface oops when accessing driver->driver_name in proc_tty_unregister_driver Use jprobe, found driver->driver_name point to 8250.ko static static struct uart_driver serial8250_reg .driver_name= serial, Use name in proc_dir_entry instead of driver->driver_name to fix oops test on linux 4.1.12: BUG: unable to handle kernel paging request at ffffffffa01979de IP: [] strchr+0x0/0x30 PGD 1a0d067 PUD 1a0e063 PMD 851c1f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: ... ... [last unloaded: 8250] CPU: 7 PID: 116 Comm: kworker/7:1 Tainted: G O 4.1.12 #1 Hardware name: Insyde RiverForest/Type2 - Board Product Name1, BIOS NE5KV904 12/21/2015 Workqueue: events release_one_tty task: ffff88085b684960 ti: ffff880852884000 task.ti: ffff880852884000 RIP: 0010:[] [] strchr+0x0/0x30 RSP: 0018:ffff880852887c90 EFLAGS: 00010282 RAX: ffffffff81a5eca0 RBX: ffffffffa01979de RCX: 0000000000000004 RDX: ffff880852887d10 RSI: 000000000000002f RDI: ffffffffa01979de RBP: ffff880852887cd8 R08: 0000000000000000 R09: ffff88085f5d94d0 R10: 0000000000000195 R11: 0000000000000000 R12: ffffffffa01979de R13: ffff880852887d00 R14: ffffffffa01979de R15: ffff88085f02e840 FS: 0000000000000000(0000) GS:ffff88085f5c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa01979de CR3: 0000000001a0c000 CR4: 00000000001406e0 Stack: ffffffff812349b1 ffff880852887cb8 ffff880852887d10 ffff88085f5cd6c2 ffff880852800a80 ffffffffa01979de ffff880852800a84 0000000000000010 ffff88085bb28bd8 ffff880852887d38 ffffffff812354f0 ffff880852887d08 Call Trace: [] ? __xlate_proc_name+0x71/0xd0 [] remove_proc_entry+0x40/0x180 [] ? _raw_spin_lock_irqsave+0x41/0x60 [] ? destruct_tty_driver+0x60/0xe0 [] proc_tty_unregister_driver+0x28/0x40 [] destruct_tty_driver+0x88/0xe0 [] tty_driver_kref_put+0x1d/0x20 [] release_one_tty+0x5a/0xd0 [] process_one_work+0x139/0x420 [] worker_thread+0x121/0x450 [] ? process_scheduled_works+0x40/0x40 [] kthread+0xec/0x110 [] ? tg_rt_schedulable+0x210/0x220 [] ? kthread_freezable_should_stop+0x80/0x80 [] ret_from_fork+0x42/0x70 [] ? kthread_freezable_should_stop+0x80/0x80 Signed-off-by: nixiaoming Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_tty.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 15f327bed8c6..7340c36978a3 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -14,6 +14,7 @@ #include #include #include +#include "internal.h" /* * The /proc/tty directory inodes... @@ -164,7 +165,7 @@ void proc_tty_unregister_driver(struct tty_driver *driver) if (!ent) return; - remove_proc_entry(driver->driver_name, proc_tty_driver); + remove_proc_entry(ent->name, proc_tty_driver); driver->proc_entry = NULL; } -- GitLab From afeeff4d6156b4965af9cf30ba05b05f04b9c7b0 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 5 Dec 2017 08:45:30 -0600 Subject: [PATCH 0512/1398] usb: musb: da8xx: fix babble condition handling commit bd3486ded7a0c313a6575343e6c2b21d14476645 upstream. When babble condition happens, the musb controller might automatically turns off VBUS. On DA8xx platform, the controller generates drvvbus interrupt for turning off VBUS along with the babble interrupt. In this case, we should handle the babble interrupt first and recover from the babble condition. This change ignores the drvvbus interrupt if babble interrupt is also generated at the same time, so the babble recovery routine works properly. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/da8xx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index bacee0fd4dd3..ea5bad49394b 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -302,7 +302,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci) musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; portstate(musb->port1_status |= USB_PORT_STAT_POWER); del_timer(&otg_workaround); - } else { + } else if (!(musb->int_usb & MUSB_INTR_BABBLE)){ + /* + * When babble condition happens, drvvbus interrupt + * is also generated. Ignore this drvvbus interrupt + * and let babble interrupt handler recovers the + * controller; otherwise, the host-mode flag is lost + * due to the MUSB_DEV_MODE() call below and babble + * recovery logic will not called. + */ musb->is_active = 0; MUSB_DEV_MODE(musb); otg->default_a = 0; -- GitLab From 4fdb10391bcafe698f06dd105b0d829744eb1926 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 11 Oct 2017 11:57:15 +0200 Subject: [PATCH 0513/1398] pinctrl: adi2: Fix Kconfig build problem [ Upstream commit 1c363531dd814dc4fe10865722bf6b0f72ce4673 ] The build robot is complaining on Blackfin: drivers/pinctrl/pinctrl-adi2.c: In function 'port_setup': >> drivers/pinctrl/pinctrl-adi2.c:221:21: error: dereferencing pointer to incomplete type 'struct gpio_port_t' writew(readw(®s->port_fer) & ~BIT(offset), ^~ drivers/pinctrl/pinctrl-adi2.c: In function 'adi_gpio_ack_irq': >> drivers/pinctrl/pinctrl-adi2.c:266:18: error: dereferencing pointer to incomplete type 'struct bfin_pint_regs' if (readl(®s->invert_set) & pintbit) ^~ It seems the driver need to include and to compile. The Blackfin architecture was re-defining the Kconfig PINCTRL symbol which is not OK, so replaced this with PINCTRL_BLACKFIN_ADI2 which selects PINCTRL and PINCTRL_ADI2 just like most arches do. Further, the old GPIO driver symbol GPIO_ADI was possible to select at the same time as selecting PINCTRL. This was not working because the arch-local header contains an explicit #ifndef PINCTRL clause making compilation break if you combine them. The same is true for DEBUG_MMRS. Make sure the ADI2 pinctrl driver is not selected at the same time as the old GPIO implementation. (This should be converted to use gpiolib or pincontrol and move to drivers/...) Also make sure the old GPIO_ADI driver or DEBUG_MMRS is not selected at the same time as the new PINCTRL implementation, and only make PINCTRL_ADI2 selectable for the Blackfin families that actually have it. This way it is still possible to add e.g. I2C-based pin control expanders on the Blackfin. Cc: Steven Miao Cc: Huanhuan Feng Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/blackfin/Kconfig | 7 +++++-- arch/blackfin/Kconfig.debug | 1 + drivers/pinctrl/Kconfig | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 3c1bd640042a..88c4b77ec8d2 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -319,11 +319,14 @@ config BF53x config GPIO_ADI def_bool y + depends on !PINCTRL depends on (BF51x || BF52x || BF53x || BF538 || BF539 || BF561) -config PINCTRL +config PINCTRL_BLACKFIN_ADI2 def_bool y - depends on BF54x || BF60x + depends on (BF54x || BF60x) + select PINCTRL + select PINCTRL_ADI2 config MEM_MT48LC64M4A2FB_7E bool diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug index f3337ee03621..a93cf06a4d6f 100644 --- a/arch/blackfin/Kconfig.debug +++ b/arch/blackfin/Kconfig.debug @@ -17,6 +17,7 @@ config DEBUG_VERBOSE config DEBUG_MMRS tristate "Generate Blackfin MMR tree" + depends on !PINCTRL select DEBUG_FS help Create a tree of Blackfin MMRs via the debugfs tree. If diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 671610c989b6..b0c0fa0444dd 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -26,7 +26,8 @@ config DEBUG_PINCTRL config PINCTRL_ADI2 bool "ADI pin controller driver" - depends on BLACKFIN + depends on (BF54x || BF60x) + depends on !GPIO_ADI select PINMUX select IRQ_DOMAIN help -- GitLab From f39486bd37ee3edd8658aff1226337c47e4ba32e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Oct 2017 16:18:36 +1100 Subject: [PATCH 0514/1398] raid5: Set R5_Expanded on parity devices as well as data. [ Upstream commit 235b6003fb28f0dd8e7ed8fbdb088bb548291766 ] When reshaping a fully degraded raid5/raid6 to a larger nubmer of devices, the new device(s) are not in-sync and so that can make the newly grown stripe appear to be "failed". To avoid this, we set the R5_Expanded flag to say "Even though this device is not fully in-sync, this block is safe so don't treat the device as failed for this stripe". This flag is set for data devices, not not for parity devices. Consequently, if you have a RAID6 with two devices that are partly recovered and a spare, and start a reshape to include the spare, then when the reshape gets past the point where the recovery was up to, it will think the stripes are failed and will get into an infinite loop, failing to make progress. So when contructing parity on an EXPAND_READY stripe, set R5_Expanded. Reported-by: Curt Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7aea0221530c..475a7a1bcfe0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1689,8 +1689,11 @@ static void ops_complete_reconstruct(void *stripe_head_ref) struct r5dev *dev = &sh->dev[i]; if (dev->written || i == pd_idx || i == qd_idx) { - if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) + if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { set_bit(R5_UPTODATE, &dev->flags); + if (test_bit(STRIPE_EXPAND_READY, &sh->state)) + set_bit(R5_Expanded, &dev->flags); + } if (fua) set_bit(R5_WantFUA, &dev->flags); if (sync) -- GitLab From c744ecec01ae6a92541c381d3548f8753aa50ba2 Mon Sep 17 00:00:00 2001 From: Kurt Garloff Date: Tue, 17 Oct 2017 09:10:45 +0200 Subject: [PATCH 0515/1398] scsi: scsi_devinfo: Add REPORTLUN2 to EMC SYMMETRIX blacklist entry [ Upstream commit 909cf3e16a5274fe2127cf3cea5c8dba77b2c412 ] All EMC SYMMETRIX support REPORT_LUNS, even if configured to report SCSI-2 for whatever reason. Signed-off-by: Kurt Garloff Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_devinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 246456925335..26e6b05d05fc 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -160,7 +160,7 @@ static struct { {"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, storage on LUN 0 */ {"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, no storage on LUN 0 */ {"EMC", "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, - {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN}, + {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2}, {"EMULEX", "MD21/S2 ESDI", NULL, BLIST_SINGLELUN}, {"easyRAID", "16P", NULL, BLIST_NOREPORTLUN}, {"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN}, -- GitLab From 04d5a2d5d2d079ce5966d7e67324c9c4f648c951 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:16 +0300 Subject: [PATCH 0516/1398] IB/core: Fix calculation of maximum RoCE MTU [ Upstream commit 99260132fde7bddc6e0132ce53da94d1c9ccabcb ] The original code only took into consideration the largest header possible after the IB_BTH_BYTES. This was incorrect, as the largest possible header size is the largest possible combination of headers we might run into. The new code accounts for all possible headers in the largest possible combination and subtracts that from the MTU to make sure that all packets will fit on the wire. Link: https://www.spinics.net/lists/linux-rdma/msg54558.html Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reported-by: Roland Dreier Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_addr.h | 7 ++++--- include/rdma/ib_pack.h | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 1beab5532035..818a38f99221 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -243,10 +243,11 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g static inline enum ib_mtu iboe_get_mtu(int mtu) { /* - * reduce IB headers from effective IBoE MTU. 28 stands for - * atomic header which is the biggest possible header after BTH + * Reduce IB headers from effective IBoE MTU. */ - mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index b13419ce99ff..e02b78a38eba 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -37,14 +37,17 @@ #include enum { - IB_LRH_BYTES = 8, - IB_ETH_BYTES = 14, - IB_VLAN_BYTES = 4, - IB_GRH_BYTES = 40, - IB_IP4_BYTES = 20, - IB_UDP_BYTES = 8, - IB_BTH_BYTES = 12, - IB_DETH_BYTES = 8 + IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, + IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8, + IB_EXT_ATOMICETH_BYTES = 28, + IB_EXT_XRC_BYTES = 4, + IB_ICRC_BYTES = 4 }; struct ib_field { -- GitLab From 28e006e14ff9b2fe55078c02bb92212b09664517 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 9 Oct 2017 16:45:55 +0800 Subject: [PATCH 0517/1398] vt6655: Fix a possible sleep-in-atomic bug in vt6655_suspend [ Upstream commit 42c8eb3f6e15367981b274cb79ee4657e2c6949d ] The driver may sleep under a spinlock, and the function call path is: vt6655_suspend (acquire the spinlock) pci_set_power_state __pci_start_power_transition (drivers/pci/pci.c) msleep --> may sleep To fix it, pci_set_power_state is called without having a spinlock. This bug is found by my static analysis tool and my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index f109eeac358d..ab96629b7889 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1698,10 +1698,11 @@ static int vt6655_suspend(struct pci_dev *pcid, pm_message_t state) MACbShutdown(priv); pci_disable_device(pcid); - pci_set_power_state(pcid, pci_choose_state(pcid, state)); spin_unlock_irqrestore(&priv->lock, flags); + pci_set_power_state(pcid, pci_choose_state(pcid, state)); + return 0; } -- GitLab From 6641d3e307f5b42237a99f2aa961051a71c50393 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 8 Oct 2017 19:54:45 +0800 Subject: [PATCH 0518/1398] rtl8188eu: Fix a possible sleep-in-atomic bug in rtw_createbss_cmd [ Upstream commit 2bf9806d4228f7a6195f8e03eda0479d2a93b411 ] The driver may sleep under a spinlock, and the function call path is: rtw_surveydone_event_callback(acquire the spinlock) rtw_createbss_cmd kzalloc(GFP_KERNEL) --> may sleep To fix it, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool and my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_cmd.c b/drivers/staging/rtl8188eu/core/rtw_cmd.c index f1f4788dbd86..318d5b002272 100644 --- a/drivers/staging/rtl8188eu/core/rtw_cmd.c +++ b/drivers/staging/rtl8188eu/core/rtw_cmd.c @@ -342,7 +342,7 @@ u8 rtw_createbss_cmd(struct adapter *padapter) else RT_TRACE(_module_rtl871x_cmd_c_, _drv_info_, (" createbss for SSid:%s\n", pmlmepriv->assoc_ssid.Ssid)); - pcmd = kzalloc(sizeof(struct cmd_obj), GFP_KERNEL); + pcmd = kzalloc(sizeof(struct cmd_obj), GFP_ATOMIC); if (!pcmd) { res = _FAIL; goto exit; -- GitLab From 8315bcf841ae26d266dbaecd8a47956632478799 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 8 Oct 2017 19:54:07 +0800 Subject: [PATCH 0519/1398] rtl8188eu: Fix a possible sleep-in-atomic bug in rtw_disassoc_cmd [ Upstream commit 08880f8e08cbd814e870e9d3ab9530abc1bce226 ] The driver may sleep under a spinlock, and the function call path is: rtw_set_802_11_bssid(acquire the spinlock) rtw_disassoc_cmd kzalloc(GFP_KERNEL) --> may sleep To fix it, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool and my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_cmd.c b/drivers/staging/rtl8188eu/core/rtw_cmd.c index 318d5b002272..6051a7ba0797 100644 --- a/drivers/staging/rtl8188eu/core/rtw_cmd.c +++ b/drivers/staging/rtl8188eu/core/rtw_cmd.c @@ -522,7 +522,7 @@ u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueu if (enqueue) { /* need enqueue, prepare cmd_obj and enqueue */ - cmdobj = kzalloc(sizeof(*cmdobj), GFP_KERNEL); + cmdobj = kzalloc(sizeof(*cmdobj), GFP_ATOMIC); if (!cmdobj) { res = _FAIL; kfree(param); -- GitLab From 1cafdac8919136bd2ee6e0f607c49aacb9bb854b Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 12 Oct 2017 14:57:06 +0800 Subject: [PATCH 0520/1398] scsi: sd: change manage_start_stop to bool in sysfs interface [ Upstream commit 623401ee33e42cee64d333877892be8db02951eb ] /sys/class/scsi_disk/0:2:0:0/manage_start_stop can be changed to 0 unexpectly by writing an invalid string. Signed-off-by: weiping zhang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 09fa1fd0c4ce..66e1c2ea79d7 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -234,11 +234,15 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr, { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; + bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - sdp->manage_start_stop = simple_strtoul(buf, NULL, 10); + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdp->manage_start_stop = v; return count; } -- GitLab From 64da4e8d00f16bc7db25f6d819c4beac2656a90f Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 12 Oct 2017 14:56:44 +0800 Subject: [PATCH 0521/1398] scsi: sd: change allow_restart to bool in sysfs interface [ Upstream commit 658e9a6dc1126f21fa417cd213e1cdbff8be0ba2 ] /sys/class/scsi_disk/0:2:0:0/allow_restart can be changed to 0 unexpectedly by writing an invalid string such as the following: echo asdf > /sys/class/scsi_disk/0:2:0:0/allow_restart Signed-off-by: weiping zhang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 66e1c2ea79d7..ace56c5e61e1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -260,6 +260,7 @@ static ssize_t allow_restart_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + bool v; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; @@ -269,7 +270,10 @@ allow_restart_store(struct device *dev, struct device_attribute *attr, if (sdp->type != TYPE_DISK) return -EINVAL; - sdp->allow_restart = simple_strtoul(buf, NULL, 10); + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdp->allow_restart = v; return count; } -- GitLab From 91e0cf85caea7f977361f662941100821370d74e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Oct 2017 10:50:37 +0300 Subject: [PATCH 0522/1398] scsi: bfa: integer overflow in debugfs [ Upstream commit 3e351275655d3c84dc28abf170def9786db5176d ] We could allocate less memory than intended because we do: bfad->regdata = kzalloc(len << 2, GFP_KERNEL); The shift can overflow leading to a crash. This is debugfs code so the impact is very small. I fixed the network version of this in March with commit 13e2d5187f6b ("bna: integer overflow bug in debugfs"). Fixes: ab2a9ba189e8 ("[SCSI] bfa: add debugfs support") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bfa/bfad_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c index 8dcd8c70c7ee..05f523971348 100644 --- a/drivers/scsi/bfa/bfad_debugfs.c +++ b/drivers/scsi/bfa/bfad_debugfs.c @@ -255,7 +255,8 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf, struct bfad_s *bfad = port->bfad; struct bfa_s *bfa = &bfad->bfa; struct bfa_ioc_s *ioc = &bfa->ioc; - int addr, len, rc, i; + int addr, rc, i; + u32 len; u32 *regbuf; void __iomem *rb, *reg_addr; unsigned long flags; @@ -266,7 +267,7 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf, return PTR_ERR(kern_buf); rc = sscanf(kern_buf, "%x:%x", &addr, &len); - if (rc < 2) { + if (rc < 2 || len > (UINT_MAX >> 2)) { printk(KERN_INFO "bfad[%d]: %s failed to read user buf\n", bfad->inst_no, __func__); -- GitLab From b64ab3ca9d31162cbdbd8acc2d5b68cf37302527 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 16 Oct 2017 11:38:11 +0200 Subject: [PATCH 0523/1398] udf: Avoid overflow when session starts at large offset [ Upstream commit abdc0eb06964fe1d2fea6dd1391b734d0590365d ] When session starts beyond offset 2^31 the arithmetics in udf_check_vsd() would overflow. Make sure the computation is done in large enough type. Reported-by: Cezary Sliwa Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 4942549e7dc8..4b1f6d5372c3 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -710,7 +710,7 @@ static loff_t udf_check_vsd(struct super_block *sb) else sectorsize = sb->s_blocksize; - sector += (sbi->s_session << sb->s_blocksize_bits); + sector += (((loff_t)sbi->s_session) << sb->s_blocksize_bits); udf_debug("Starting at sector %u (%ld byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), -- GitLab From 31eb4108e107aefc9cd5d662b3624533327848fd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 13 Oct 2017 13:40:24 -0700 Subject: [PATCH 0524/1398] macvlan: Only deliver one copy of the frame to the macvlan interface [ Upstream commit dd6b9c2c332b40f142740d1b11fb77c653ff98ea ] This patch intoduces a slight adjustment for macvlan to address the fact that in source mode I was seeing two copies of any packet addressed to the macvlan interface being delivered where there should have been only one. The issue appears to be that one copy was delivered based on the source MAC address and then the second copy was being delivered based on the destination MAC address. To fix it I am just treating a unicast address match as though it is not a match since source based macvlan isn't supposed to be matching based on the destination MAC anyway. Fixes: 79cf79abce71 ("macvlan: add source mode") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index dc8ccac0a01d..6d55049cd3dc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -452,7 +452,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) struct macvlan_dev, list); else vlan = macvlan_hash_lookup(port, eth->h_dest); - if (vlan == NULL) + if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE) return RX_HANDLER_PASS; dev = vlan->dev; -- GitLab From 0c70b35bf1583fa5fc9134b87d32cf0bc27d5023 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:45 -0700 Subject: [PATCH 0525/1398] RDMA/cma: Avoid triggering undefined behavior [ Upstream commit c0b64f58e8d49570aa9ee55d880f92c20ff0166b ] According to the C standard the behavior of computations with integer operands is as follows: * A computation involving unsigned operands can never overflow, because a result that cannot be represented by the resulting unsigned integer type is reduced modulo the number that is one greater than the largest value that can be represented by the resulting type. * The behavior for signed integer underflow and overflow is undefined. Hence only use unsigned integers when checking for integer overflow. This patch is what I came up with after having analyzed the following smatch warnings: drivers/infiniband/core/cma.c:3448: cma_resolve_ib_udp() warn: signed overflow undefined. 'offset + conn_param->private_data_len < conn_param->private_data_len' drivers/infiniband/core/cma.c:3505: cma_connect_ib() warn: signed overflow undefined. 'offset + conn_param->private_data_len < conn_param->private_data_len' Signed-off-by: Bart Van Assche Acked-by: Sean Hefty Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 809a02800102..a09d6eed3b88 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1482,7 +1482,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, return id_priv; } -static inline int cma_user_data_offset(struct rdma_id_private *id_priv) +static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) { return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } @@ -1877,7 +1877,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; - int offset, ret; + u8 offset; + int ret; listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) @@ -3309,7 +3310,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct ib_cm_sidr_req_param req; struct ib_cm_id *id; void *private_data; - int offset, ret; + u8 offset; + int ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); @@ -3366,7 +3368,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_route *route; void *private_data; struct ib_cm_id *id; - int offset, ret; + u8 offset; + int ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); -- GitLab From 2eb165b9fbb722416252c054ac2ef2c3eb777460 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 10 Oct 2017 10:36:41 +0300 Subject: [PATCH 0526/1398] IB/ipoib: Grab rtnl lock on heavy flush when calling ndo_open/stop [ Upstream commit b4b678b06f6eef18bff44a338c01870234db0bc9 ] When ndo_open and ndo_stop are called RTNL lock should be held. In this specific case ipoib_ib_dev_open calls the offloaded ndo_open which re-sets the number of TX queue assuming RTNL lock is held. Since RTNL lock is not held, RTNL assert will fail. Signed-off-by: Alex Vesker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 830fecb6934c..335bd2c9e16e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1177,10 +1177,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { + rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - if (ipoib_ib_dev_open(dev) != 0) + + result = ipoib_ib_dev_open(dev); + rtnl_unlock(); + if (result) return; + if (netif_queue_stopped(dev)) netif_start_queue(dev); } -- GitLab From 8f23eb16afd8f230f15ee020159469275adabd15 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 12 Oct 2017 16:12:37 +0200 Subject: [PATCH 0527/1398] icmp: don't fail on fragment reassembly time exceeded [ Upstream commit 258bbb1b0e594ad5f5652cb526b3c63e6a7fad3d ] The ICMP implementation currently replies to an ICMP time exceeded message (type 11) with an ICMP host unreachable message (type 3, code 1). However, time exceeded messages can either represent "time to live exceeded in transit" (code 0) or "fragment reassembly time exceeded" (code 1). Unconditionally replying to "fragment reassembly time exceeded" with host unreachable messages might cause unjustified connection resets which are now easily triggered as UFO has been removed, because, in turn, sending large buffers triggers IP fragmentation. The issue can be easily reproduced by running a lot of UDP streams which is likely to trigger IP fragmentation: # start netserver in the test namespace ip netns add test ip netns exec test netserver # create a VETH pair ip link add name veth0 type veth peer name veth0 netns test ip link set veth0 up ip -n test link set veth0 up for i in $(seq 20 29); do # assign addresses to both ends ip addr add dev veth0 192.168.$i.1/24 ip -n test addr add dev veth0 192.168.$i.2/24 # start the traffic netperf -L 192.168.$i.1 -H 192.168.$i.2 -t UDP_STREAM -l 0 & done # wait send_data: data send error: No route to host (errno 113) netperf: send_omni: send_data failed: No route to host We need to differentiate instead: if fragment reassembly time exceeded is reported, we need to silently drop the packet, if time to live exceeded is reported, maintain the current behaviour. In both cases increment the related error count "icmpInTimeExcds". While at it, fix a typo in a comment, and convert the if statement into a switch to mate it more readable. Signed-off-by: Matteo Croce Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/icmp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 48734ee6293f..31f17f0bbd1c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -766,7 +766,7 @@ static bool icmp_tag_validation(int proto) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and * ICMP_PARAMETERPROB. */ @@ -794,7 +794,8 @@ static bool icmp_unreach(struct sk_buff *skb) if (iph->ihl < 5) /* Mangled header, drop. */ goto out_err; - if (icmph->type == ICMP_DEST_UNREACH) { + switch (icmph->type) { + case ICMP_DEST_UNREACH: switch (icmph->code & 15) { case ICMP_NET_UNREACH: case ICMP_HOST_UNREACH: @@ -830,8 +831,16 @@ static bool icmp_unreach(struct sk_buff *skb) } if (icmph->code > NR_ICMP_UNREACH) goto out; - } else if (icmph->type == ICMP_PARAMETERPROB) + break; + case ICMP_PARAMETERPROB: info = ntohl(icmph->un.gateway) >> 24; + break; + case ICMP_TIME_EXCEEDED: + __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS); + if (icmph->code == ICMP_EXC_FRAGTIME) + goto out; + break; + } /* * Throw it at our lower layers -- GitLab From ed70a2212526bb26b5850f538b5a53793fdd4abf Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 27 Sep 2017 09:13:34 +0800 Subject: [PATCH 0528/1398] ath9k: fix tx99 potential info leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ee0a47186e2fa9aa1c56cadcea470ca0ba8c8692 ] When the user sets count to zero the string buffer would remain completely uninitialized which causes the kernel to parse its own stack data, potentially leading to an info leak. In addition to that, the string might be not terminated properly when the user data does not contain a 0-terminator. Signed-off-by: Miaoqing Pan Reviewed-by: Christoph Böhmwalder Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/tx99.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index 1fa7f844b5da..8e9480cc33e1 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -179,6 +179,9 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, ssize_t len; int r; + if (count < 1) + return -EINVAL; + if (sc->cur_chan->nvifs > 1) return -EOPNOTSUPP; @@ -186,6 +189,8 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, if (copy_from_user(buf, user_buf, len)) return -EFAULT; + buf[len] = '\0'; + if (strtobool(buf, &start)) return -EINVAL; -- GitLab From b632d710149f1a638ffa6e999cfbd9ee9fbdada6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Dec 2017 10:07:34 +0100 Subject: [PATCH 0529/1398] Linux 4.9.71 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7ad3271a1a1d..5f2736bb4877 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 70 +SUBLEVEL = 71 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 2506378791654badf0ade120478ee70439c410e3 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 20 Dec 2017 16:21:00 +0100 Subject: [PATCH 0530/1398] ANDROID: binder: Remove obsolete proc waitqueue. It was no longer being used. Change-Id: I7fc42b76f688a459ad990f59fbd7006b96bb91a6 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1ef2f68bfcb2..6b4a99162198 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -512,8 +512,6 @@ struct binder_priority { * (protected by @inner_lock) * @todo: list of work for this process * (protected by @inner_lock) - * @wait: wait queue head to wait for proc work - * (invariant after initialized) * @stats: per-process binder statistics * (atomics, no lock needed) * @delivered_death: list of delivered death notification @@ -554,7 +552,6 @@ struct binder_proc { bool is_dead; struct list_head todo; - wait_queue_head_t wait; struct binder_stats stats; struct list_head delivered_death; int max_threads; -- GitLab From 76fcdc8cbbce8b62d8f799eb87a22ff4ac6ec16d Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 23 Nov 2017 09:08:57 +0530 Subject: [PATCH 0531/1398] cxl: Check if vphb exists before iterating over AFU devices commit 12841f87b7a8ceb3d54f171660f72a86941bfcb3 upstream. During an eeh a kernel-oops is reported if no vPHB is allocated to the AFU. This happens as during AFU init, an error in creation of vPHB is a non-fatal error. Hence afu->phb should always be checked for NULL before iterating over it for the virtual AFU pci devices. This patch fixes the kenel-oops by adding a NULL pointer check for afu->phb before it is dereferenced. Fixes: 9e8df8a21963 ("cxl: EEH support") Signed-off-by: Vaibhav Jain Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index eef202d4399b..a5422f483ad5 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1758,6 +1758,9 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, /* There should only be one entry, but go through the list * anyway */ + if (afu->phb == NULL) + return result; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (!afu_dev->driver) continue; @@ -1801,6 +1804,11 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, /* Only participate in EEH if we are on a virtual PHB */ if (afu->phb == NULL) return PCI_ERS_RESULT_NONE; + + /* + * Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; @@ -1941,6 +1949,9 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) if (cxl_afu_select_best_mode(afu)) goto err; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -2003,6 +2014,9 @@ static void cxl_pci_resume(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (afu_dev->driver && afu_dev->driver->err_handler && afu_dev->driver->err_handler->resume) -- GitLab From bb95f1caee6177da78c8fa9a54bcd3695c903b97 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 4 Dec 2017 14:13:05 +0000 Subject: [PATCH 0532/1398] arm64: Initialise high_memory global variable earlier commit f24e5834a2c3f6c5f814a417f858226f0a010ade upstream. The high_memory global variable is used by cma_declare_contiguous(.) before it is defined. We don't notice this as we compute __pa(high_memory - 1), and it looks like we're processing a VA from the direct linear map. This problem becomes apparent when we flip the kernel virtual address space and the linear map is moved to the bottom of the kernel VA space. This patch moves the initialisation of high_memory before it used. Fixes: f7426b983a6a ("mm: cma: adjust address limit to avoid hitting low/high memory boundary") Signed-off-by: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 380ebe705093..9b8b477c363d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -296,6 +296,7 @@ void __init arm64_memblock_init(void) arm64_dma_phys_limit = max_zone_dma_phys(); else arm64_dma_phys_limit = PHYS_MASK + 1; + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); @@ -322,7 +323,6 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); - high_memory = __va((max << PAGE_SHIFT) - 1) + 1; memblock_dump_all(); } -- GitLab From 52c3323e41417ace0094ff1a70f548d94ffb6a49 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 9 Mar 2017 13:29:13 +0100 Subject: [PATCH 0533/1398] ALSA: hda - add support for docking station for HP 820 G2 [ Upstream commit 04d5466a976b096364a39a63ac264c1b3a5f8fa1 ] This tested patch adds missing initialization for Line-In/Out PINs for the docking station for HP 820 G2. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d7fa7373cb94..ba40596b9d92 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4854,6 +4854,7 @@ enum { ALC286_FIXUP_HP_GPIO_LED, ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY, ALC280_FIXUP_HP_DOCK_PINS, + ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, ALC280_FIXUP_HP_9480M, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -5394,6 +5395,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC280_FIXUP_HP_GPIO4 }, + [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x21011020 }, /* line-out */ + { 0x18, 0x2181103f }, /* line-in */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED + }, [ALC280_FIXUP_HP_9480M] = { .type = HDA_FIXUP_FUNC, .v.func = alc280_fixup_hp_9480m, @@ -5646,7 +5657,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), - SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), + SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), @@ -5812,6 +5823,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"}, {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"}, {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"}, + {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"}, {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"}, {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"}, -- GitLab From 2d9a34c064ad3f4296b5a00e29a75a4280e797c0 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 9 Mar 2017 13:30:09 +0100 Subject: [PATCH 0534/1398] ALSA: hda - add support for docking station for HP 840 G3 [ Upstream commit cc3a47a248d7791ef0d2c81a35c46769e55e4c6c ] This tested patch adds missing initialization for Line-In/Out PINs for the docking station for HP 840 G3. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index f2e4e99ce651..2c3065c1f3fb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -261,6 +261,7 @@ enum { CXT_FIXUP_HP_530, CXT_FIXUP_CAP_MIX_AMP_5047, CXT_FIXUP_MUTE_LED_EAPD, + CXT_FIXUP_HP_DOCK, CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, }; @@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_eapd, }, + [CXT_FIXUP_HP_DOCK] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x16, 0x21011020 }, /* line-out */ + { 0x18, 0x2181103f }, /* line-in */ + { } + } + }, [CXT_FIXUP_HP_SPECTRE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC), + SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), @@ -872,6 +882,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" }, { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" }, { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, + { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" }, {} }; -- GitLab From 2a7eee3d72b043d68af4098665eee89893386445 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 24 Jan 2017 14:06:48 +0100 Subject: [PATCH 0535/1398] kvm: fix usage of uninit spinlock in avic_vm_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3863dff0c3dd72984395c93b12383b393c5c3989 ] If avic is not enabled, avic_vm_init() does nothing and returns early. However, avic_vm_destroy() still tries to destroy what hasn't been created. The only bad consequence of this now is that avic_vm_destroy() uses svm_vm_data_hash_lock that hasn't been initialized (and is not meant to be used at all if avic is not enabled). Return early from avic_vm_destroy() if avic is not enabled. It has nothing to destroy. Signed-off-by: Dmitry Vyukov Cc: Joerg Roedel Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: David Hildenbrand Cc: kvm@vger.kernel.org Cc: syzkaller@googlegroups.com Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 23f1a6bd7a0d..8148d8ca7930 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1382,6 +1382,9 @@ static void avic_vm_destroy(struct kvm *kvm) unsigned long flags; struct kvm_arch *vm_data = &kvm->arch; + if (!avic) + return; + avic_free_vm_id(vm_data->avic_vm_id); if (vm_data->avic_logical_id_table_page) -- GitLab From e93ea3a50cc674f28c1f0634baf0ab4afd07ec1b Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Fri, 10 Feb 2017 18:23:00 +0000 Subject: [PATCH 0536/1398] HID: corsair: support for K65-K70 Rapidfire and Scimitar Pro RGB [ Upstream commit deaba636997557fce46ca7bcb509bff5ea1b0558 ] Add quirks for several corsair gaming devices to avoid long delays on report initialization Supported devices: - Corsair K65RGB Rapidfire Gaming Keyboard - Corsair K70RGB Rapidfire Gaming Keyboard - Corsair Scimitar Pro RGB Gaming Mouse Signed-off-by: Oscar Campos Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 433d5f675c03..b365cb1df26e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -277,6 +277,9 @@ #define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 #define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 #define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 +#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38 +#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39 +#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e #define USB_VENDOR_ID_CREATIVELABS 0x041e #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 2b1620797959..1916f80a692d 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -80,6 +80,9 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, -- GitLab From 6e2a6941fa4ba76ffdd02006ff2d156455b4ef61 Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Mon, 6 Mar 2017 21:02:39 +0000 Subject: [PATCH 0537/1398] HID: corsair: Add driver Scimitar Pro RGB gaming mouse 1b1c:1b3e support to hid-corsair [ Upstream commit 01adc47e885f1127b29d76d0dfb21d8262f9d6b4 ] This mouse sold by Corsair as Scimitar PRO RGB defines two consecutive Logical Minimum items in its Application (Consumer.0001) report making it non parseable. This patch fixes the report descriptor overriding byte 77 in rdesc from 0x16 (Logical Minimum with 16 bits value) to 0x26 (Logical Maximum with 16 bits value). Signed-off-by: Oscar Campos Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/Kconfig | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-corsair.c | 47 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index db607d51ee2b..8eed456a67be 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -190,6 +190,7 @@ config HID_CORSAIR Supported devices: - Vengeance K90 + - Scimitar PRO RGB config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index bdde8859e191..6615b4d2f392 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1872,6 +1872,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index c0303f61c26a..9ba5d98a1180 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -3,8 +3,10 @@ * * Supported devices: * - Vengeance K90 Keyboard + * - Scimitar PRO RGB Gaming Mouse * * Copyright (c) 2015 Clement Vuchener + * Copyright (c) 2017 Oscar Campos */ /* @@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev, return 0; } +/* + * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is + * non parseable as they define two consecutive Logical Minimum for + * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16 + * that should be obviousy 0x26 for Logical Magimum of 16 bits. This + * prevents poper parsing of the report descriptor due Logical + * Minimum being larger than Logical Maximum. + * + * This driver fixes the report descriptor for: + * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse + */ + +static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + + if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { + /* + * Corsair Scimitar RGB Pro report descriptor is broken and + * defines two different Logical Minimum for the Consumer + * Application. The byte 77 should be a 0x26 defining a 16 + * bits integer for the Logical Maximum but it is a 0x16 + * instead (Logical Minimum) + */ + switch (hdev->product) { + case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB: + if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16 + && rdesc[78] == 0xff && rdesc[79] == 0x0f) { + hid_info(hdev, "Fixing up report descriptor\n"); + rdesc[77] = 0x26; + } + break; + } + + } + return rdesc; +} + static const struct hid_device_id corsair_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), .driver_data = CORSAIR_USE_K90_MACRO | CORSAIR_USE_K90_BACKLIGHT }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, + USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, {} }; @@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = { .event = corsair_event, .remove = corsair_remove, .input_mapping = corsair_input_mapping, + .report_fixup = corsair_mouse_report_fixup, }; module_hid_driver(corsair_driver); MODULE_LICENSE("GPL"); +/* Original K90 driver author */ MODULE_AUTHOR("Clement Vuchener"); +/* Scimitar PRO RGB driver author */ +MODULE_AUTHOR("Oscar Campos"); MODULE_DESCRIPTION("HID driver for Corsair devices"); -- GitLab From d0ee8d5b86b81d5e1cd282efb9b6e3cde221cc0d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:05:59 +0900 Subject: [PATCH 0538/1398] arm: kprobes: Fix the return address of multiple kretprobes [ Upstream commit 06553175f585b52509c7df37d6f4a50aacb7b211 ] This is arm port of commit 737480a0d525 ("kprobes/x86: Fix the return address of multiple kretprobes"). Fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. For example: # cd /sys/kernel/debug/tracing # echo "r:event1 sys_symlink" > kprobe_events # echo "r:event2 sys_symlink" >> kprobe_events # echo 1 > events/kprobes/enable # ln -s /tmp/foo /tmp/bar (without this patch) # cat trace | grep -v ^# ln-82 [000] dn.2 68.446525: event1: (kretprobe_trampoline+0x0/0x18 <- SyS_symlink) ln-82 [000] dn.2 68.447831: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) (with this patch) # cat trace | grep -v ^# ln-81 [000] dn.1 39.463469: event1: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) ln-81 [000] dn.1 39.464701: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) Signed-off-by: Masami Hiramatsu Cc: KUMANO Syuhei Signed-off-by: Jon Medhurst Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/core.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index a4ec240ee7ba..3eb018fa1a1f 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -433,6 +433,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -455,14 +456,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __this_cpu_write(current_kprobe, NULL); } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -474,7 +495,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { -- GitLab From c9bbd2727d1e02584ee98efe075e3483c287adb0 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 2 Mar 2017 13:04:09 +0000 Subject: [PATCH 0539/1398] arm: kprobes: Align stack to 8-bytes in test code [ Upstream commit 974310d047f3c7788a51d10c8d255eebdb1fa857 ] kprobes test cases need to have a stack that is aligned to an 8-byte boundary because they call other functions (and the ARM ABI mandates that alignment) and because test cases include 64-bit accesses to the stack. Unfortunately, GCC doesn't ensure this alignment for inline assembler and for the code in question seems to always misalign it by pushing just the LR register onto the stack. We therefore need to explicitly perform stack alignment at the start of each test case. Without this fix, some test cases will generate alignment faults on systems where alignment is enforced. Even if the kernel is configured to handle these faults in software, triggering them is ugly. It also exposes limitations in the fault handling code which doesn't cope with writes to the stack. E.g. when handling this instruction strd r6, [sp, #-64]! the fault handling code will write to a stack location below the SP value at the point the fault occurred, which coincides with where the exception handler has pushed the saved register context. This results in corruption of those registers. Signed-off-by: Jon Medhurst Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/test-core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 9775de22e2ff..a48354de1aa1 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -976,7 +976,10 @@ static void coverage_end(void) void __naked __kprobes_test_case_start(void) { __asm__ __volatile__ ( - "stmdb sp!, {r4-r11} \n\t" + "mov r2, sp \n\t" + "bic r3, r2, #7 \n\t" + "mov sp, r3 \n\t" + "stmdb sp!, {r2-r11} \n\t" "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" @@ -996,7 +999,8 @@ void __naked __kprobes_test_case_end_32(void) "movne pc, r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "mov pc, r0 \n\t" ); } @@ -1012,7 +1016,8 @@ void __naked __kprobes_test_case_end_16(void) "bxne r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "bx r0 \n\t" ); } -- GitLab From 8f21b63c9dcf5a9242cecaa6289b8ce19026cb66 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Mar 2017 13:27:51 +0200 Subject: [PATCH 0540/1398] nvme-loop: handle cpu unplug when re-establishing the controller [ Upstream commit 945dd5bacc8978439af276976b5dcbbd42333dbc ] If a cpu unplug event has occured, we need to take the minimum of the provided nr_io_queues and the number of online cpus, otherwise we won't be able to connect them as blk-mq mapping won't dispatch to those queues. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/loop.c | 88 ++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index c8e612c1c72f..e56ca3fb107e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, struct nvme_loop_iod *iod, unsigned int queue_idx) { - BUG_ON(queue_idx >= ctrl->queue_count); - iod->req.cmd = &iod->cmd; iod->req.rsp = &iod->rsp; iod->queue = &ctrl->queues[queue_idx]; @@ -314,6 +312,43 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) kfree(ctrl); } +static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +} + +static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; + int ret, i; + + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret || !nr_io_queues) + return ret; + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); + + for (i = 1; i <= nr_io_queues; i++) { + ctrl->queues[i].ctrl = ctrl; + ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); + if (ret) + goto out_destroy_queues; + + ctrl->queue_count++; + } + + return 0; + +out_destroy_queues: + nvme_loop_destroy_io_queues(ctrl); + return ret; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -385,17 +420,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { - int i; - nvme_stop_keep_alive(&ctrl->ctrl); if (ctrl->queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); } if (ctrl->ctrl.state == NVME_CTRL_LIVE) @@ -467,19 +498,14 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_disable; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_free_queues; - - ctrl->queue_count++; - } + ret = nvme_loop_init_io_queues(ctrl); + if (ret) + goto out_destroy_admin; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { + for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) - goto out_free_queues; + goto out_destroy_io; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -492,9 +518,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) return; -out_free_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +out_destroy_io: + nvme_loop_destroy_io_queues(ctrl); +out_destroy_admin: nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -533,25 +559,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret, i; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret || !opts->nr_io_queues) + ret = nvme_loop_init_io_queues(ctrl); + if (ret) return ret; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - for (i = 1; i <= opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_destroy_queues; - - ctrl->queue_count++; - } - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; @@ -575,7 +588,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i <= opts->nr_io_queues; i++) { + for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) goto out_cleanup_connect_q; @@ -588,8 +601,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) out_free_tagset: blk_mq_free_tag_set(&ctrl->tag_set); out_destroy_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); return ret; } -- GitLab From 9712b2b73d7c61f811f08c9a59c7b2b61f20a3fe Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Sun, 19 Mar 2017 00:51:59 +0530 Subject: [PATCH 0541/1398] cpuidle: Validate cpu_dev in cpuidle_add_sysfs() [ Upstream commit ad0a45fd9c14feebd000b6e84189d0edff265170 ] If a given cpu is not in cpu_present and cpu hotplug is disabled, arch can skip setting up the cpu_dev. Arch cpuidle driver should pass correct cpu mask for registration, but failing to do so by the driver causes error to propagate and crash like this: [ 30.076045] Unable to handle kernel paging request for data at address 0x00000048 [ 30.076100] Faulting instruction address: 0xc0000000007b2f30 cpu 0x4d: Vector: 300 (Data Access) at [c000003feb18b670] pc: c0000000007b2f30: kobject_get+0x20/0x70 lr: c0000000007b3c94: kobject_add_internal+0x54/0x3f0 sp: c000003feb18b8f0 msr: 9000000000009033 dar: 48 dsisr: 40000000 current = 0xc000003fd2ed8300 paca = 0xc00000000fbab500 softe: 0 irq_happened: 0x01 pid = 1, comm = swapper/0 Linux version 4.11.0-rc2-svaidy+ (sv@sagarika) (gcc version 6.2.0 20161005 (Ubuntu 6.2.0-5ubuntu12) ) #10 SMP Sun Mar 19 00:08:09 IST 2017 enter ? for help [c000003feb18b960] c0000000007b3c94 kobject_add_internal+0x54/0x3f0 [c000003feb18b9f0] c0000000007b43a4 kobject_init_and_add+0x64/0xa0 [c000003feb18ba70] c000000000e284f4 cpuidle_add_sysfs+0xb4/0x130 [c000003feb18baf0] c000000000e26038 cpuidle_register_device+0x118/0x1c0 [c000003feb18bb30] c000000000e26c48 cpuidle_register+0x78/0x120 [c000003feb18bbc0] c00000000168fd9c powernv_processor_idle_init+0x110/0x1c4 [c000003feb18bc40] c00000000000cff8 do_one_initcall+0x68/0x1d0 [c000003feb18bd00] c0000000016242f4 kernel_init_freeable+0x280/0x360 [c000003feb18bdc0] c00000000000d864 kernel_init+0x24/0x160 [c000003feb18be30] c00000000000b4e8 ret_from_kernel_thread+0x5c/0x74 Validating cpu_dev fixes the crash and reports correct error message like: [ 30.163506] Failed to register cpuidle device for cpu136 [ 30.173329] Registration of powernv driver failed. Signed-off-by: Vaidyanathan Srinivasan [ rjw: Comment massage ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/sysfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 832a2c3f01ff..9e98a5fbbc1d 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -613,6 +613,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; + /* + * Return if cpu_device is not setup for this CPU. + * + * This could happen if the arch did not set up cpu_device + * since this CPU is not in cpu_present mask and the + * driver did not send a correct CPU mask during registration. + * Without this check we would end up passing bogus + * value for &cpu_dev->kobj in kobject_init_and_add() + */ + if (!cpu_dev) + return -ENODEV; + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); if (!kdev) return -ENOMEM; -- GitLab From ddfc9f75993e1bb48719045ba20831b70d92a72c Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 14 Mar 2017 14:15:20 +0800 Subject: [PATCH 0542/1398] r8152: fix the list rx_done may be used without initialization [ Upstream commit 98d068ab52b4b11d403995ed14154660797e7136 ] The list rx_done would be initialized when the linking on occurs. Therefore, if a napi is scheduled without any linking on before, the following kernel panic would happen. BUG: unable to handle kernel NULL pointer dereference at 000000000000008 IP: [] r8152_poll+0xe1e/0x1210 [r8152] PGD 0 Oops: 0002 [#1] SMP Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index afb953a258cd..823219cf95ef 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1362,6 +1362,7 @@ static int alloc_all_mem(struct r8152 *tp) spin_lock_init(&tp->rx_lock); spin_lock_init(&tp->tx_lock); INIT_LIST_HEAD(&tp->tx_free); + INIT_LIST_HEAD(&tp->rx_done); skb_queue_head_init(&tp->tx_queue); skb_queue_head_init(&tp->rx_queue); -- GitLab From 7ff28d3307b6238765c6ec05bcfa016ea7f7f1ec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 14 Mar 2017 18:25:57 +0800 Subject: [PATCH 0543/1398] crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex [ Upstream commit 8a0f5ccfb33b0b8b51de65b7b3bf342ba10b4fb6 ] On Tue, Mar 14, 2017 at 10:44:10AM +0100, Dmitry Vyukov wrote: > > Yes, please. > Disregarding some reports is not a good way long term. Please try this patch. ---8<--- Subject: netlink: Annotate nlk cb_mutex by protocol Currently all occurences of nlk->cb_mutex are annotated by lockdep as a single class. This causes a false lcokdep cycle involving genl and crypto_user. This patch fixes it by dividing cb_mutex into individual classes based on the netlink protocol. As genl and crypto_user do not use the same netlink protocol this breaks the false dependency loop. Reported-by: Dmitry Vyukov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c9fac08a53b1..1ff497bd9c20 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); +static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; + +static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { + "nlk_cb_mutex-ROUTE", + "nlk_cb_mutex-1", + "nlk_cb_mutex-USERSOCK", + "nlk_cb_mutex-FIREWALL", + "nlk_cb_mutex-SOCK_DIAG", + "nlk_cb_mutex-NFLOG", + "nlk_cb_mutex-XFRM", + "nlk_cb_mutex-SELINUX", + "nlk_cb_mutex-ISCSI", + "nlk_cb_mutex-AUDIT", + "nlk_cb_mutex-FIB_LOOKUP", + "nlk_cb_mutex-CONNECTOR", + "nlk_cb_mutex-NETFILTER", + "nlk_cb_mutex-IP6_FW", + "nlk_cb_mutex-DNRTMSG", + "nlk_cb_mutex-KOBJECT_UEVENT", + "nlk_cb_mutex-GENERIC", + "nlk_cb_mutex-17", + "nlk_cb_mutex-SCSITRANSPORT", + "nlk_cb_mutex-ECRYPTFS", + "nlk_cb_mutex-RDMA", + "nlk_cb_mutex-CRYPTO", + "nlk_cb_mutex-SMC", + "nlk_cb_mutex-23", + "nlk_cb_mutex-24", + "nlk_cb_mutex-25", + "nlk_cb_mutex-26", + "nlk_cb_mutex-27", + "nlk_cb_mutex-28", + "nlk_cb_mutex-29", + "nlk_cb_mutex-30", + "nlk_cb_mutex-31", + "nlk_cb_mutex-MAX_LINKS" +}; + static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); @@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct socket *sock, } else { nlk->cb_mutex = &nlk->cb_def_mutex; mutex_init(nlk->cb_mutex); + lockdep_set_class_and_name(nlk->cb_mutex, + nlk_cb_mutex_keys + protocol, + nlk_cb_mutex_key_strings[protocol]); } init_waitqueue_head(&nlk->wait); -- GitLab From 6f1848e778d9a9f9dd89abee53d2a688277d1784 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:14 +0800 Subject: [PATCH 0544/1398] vsock: track pkt owner vsock [ Upstream commit 36d277bac8080202684e67162ebb157f16631581 ] So that we can cancel a queued pkt later if necessary. Signed-off-by: Peng Tao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/virtio_vsock.h | 3 +++ net/vmw_vsock/virtio_transport_common.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 9638bfeb0d1f..584f9a647ad4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,8 @@ struct virtio_vsock_pkt { struct virtio_vsock_hdr hdr; struct work_struct work; struct list_head list; + /* socket refcnt not held, only use for cancellation */ + struct vsock_sock *vsk; void *buf; u32 len; u32 off; @@ -56,6 +58,7 @@ struct virtio_vsock_pkt { struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; + struct vsock_sock *vsk; struct msghdr *msg; u32 pkt_len; u16 type; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 62c056ea403b..9c07c76c504d 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -57,6 +57,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->len = len; pkt->hdr.len = cpu_to_le32(len); pkt->reply = info->reply; + pkt->vsk = info->vsk; if (info->msg && len > 0) { pkt->buf = kmalloc(len, GFP_KERNEL); @@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .type = type, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .type = VIRTIO_VSOCK_TYPE_STREAM, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .type = VIRTIO_VSOCK_TYPE_STREAM, .msg = msg, .pkt_len = len, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .type = VIRTIO_VSOCK_TYPE_STREAM, .reply = !!pkt, + .vsk = vsk, }; /* Send RST only if the original pkt is not a RST pkt */ @@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); -- GitLab From 482b3f92aea249b031ba0bc24df73f3c48f1f5c2 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:15 +0800 Subject: [PATCH 0545/1398] vhost-vsock: add pkt cancel capability [ Upstream commit 16320f363ae128d9b9c70e60f00f2a572f57c23d ] To allow canceling all packets of a connection. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Peng Tao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/net/af_vsock.h | 3 +++ 2 files changed, 44 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index e3fad302b4fb..0ec970ca64ce 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -218,6 +218,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) return len; } +static int +vhost_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct vhost_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + LIST_HEAD(freeme); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); + if (!vsock) + return -ENODEV; + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) + vhost_poll_queue(&tx_vq->poll); + } + + return 0; +} + static struct virtio_vsock_pkt * vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, unsigned int out, unsigned int in) @@ -669,6 +709,7 @@ static struct virtio_transport vhost_transport = { .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_dequeue = virtio_transport_dgram_dequeue, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f2758964ce6f..f32ed9ac181a 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,6 +100,9 @@ struct vsock_transport { void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); + /* Cancel all pending packets sent on vsock. */ + int (*cancel_pkt)(struct vsock_sock *vsk); + /* Connections. */ int (*connect)(struct vsock_sock *); -- GitLab From 98d20e5902667f9b44a75116041a630823f81e46 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:17 +0800 Subject: [PATCH 0546/1398] vsock: cancel packets when failing to connect [ Upstream commit 380feae0def7e6a115124a3219c3ec9b654dca32 ] Otherwise we'll leave the packets queued until releasing vsock device. E.g., if guest is slow to start up, resulting ETIMEDOUT on connect, guest will get the connect requests from failed host sockets. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Peng Tao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2f633eec6b7a..ee12e176256c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1101,10 +1101,19 @@ static const struct proto_ops vsock_dgram_ops = { .sendpage = sock_no_sendpage, }; +static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) +{ + if (!transport->cancel_pkt) + return -EOPNOTSUPP; + + return transport->cancel_pkt(vsk); +} + static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; + int cancel = 0; vsk = container_of(work, struct vsock_sock, dwork.work); sk = sk_vsock(vsk); @@ -1115,8 +1124,11 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); + cancel = 1; } release_sock(sk); + if (cancel) + vsock_transport_cancel_pkt(vsk); sock_put(sk); } @@ -1223,11 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = sock_intr_errno(timeout); sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } -- GitLab From b3f662ccd3677915f79861bd19854250836aea4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2017 08:05:28 -0700 Subject: [PATCH 0547/1398] sch_dsmark: fix invalid skb_cow() usage [ Upstream commit aea92fb2e09e29653b023d4254ac9fbf94221538 ] skb_cow(skb, sizeof(ip header)) is not very helpful in this context. First we need to use pskb_may_pull() to make sure the ip header is in skb linear part, then use skb_try_make_writable() to address clones issues. Fixes: 4c30719f4f55 ("[PKT_SCHED] dsmark: handle cloned and non-linear skb's") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_dsmark.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1308bbf460f7..b56d57984439 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -200,9 +200,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { + int wlen = skb_network_offset(skb); + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - if (skb_cow_head(skb, sizeof(struct iphdr))) + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) @@ -210,7 +214,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case htons(ETH_P_IPV6): - if (skb_cow_head(skb, sizeof(struct ipv6hdr))) + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) -- GitLab From b4cf187a1bc533839e75278dca74dfbb31d32d82 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Mar 2017 23:52:35 +0300 Subject: [PATCH 0548/1398] bna: integer overflow bug in debugfs [ Upstream commit 13e2d5187f6b965ba3556caedb914baf81b98ed2 ] We could allocate less memory than intended because we do: bnad->regdata = kzalloc(len << 2, GFP_KERNEL); The shift can overflow leading to a crash. This is debugfs code so the impact is very small. Fixes: 7afc5dbde091 ("bna: Add debugfs interface.") Signed-off-by: Dan Carpenter Acked-by: Rasesh Mody Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 05c1c1dd7751..cebfe3bd086e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, return PTR_ERR(kern_buf); rc = sscanf(kern_buf, "%x:%x", &addr, &len); - if (rc < 2) { + if (rc < 2 || len > UINT_MAX >> 2) { netdev_warn(bnad->netdev, "failed to read user buffer\n"); kfree(kern_buf); return -EINVAL; -- GitLab From 9ed8f0fabae57cd0dd993e815fbe15135f2070fd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 18 Mar 2017 20:03:59 +0800 Subject: [PATCH 0549/1398] sctp: out_qlen should be updated when pruning unsent queue [ Upstream commit 23bb09cfbe04076ef647da3889a5a5ab6cbe6f15 ] This patch is to fix the issue that sctp_prsctp_prune_sent forgot to update q->out_qlen when removing a chunk from unsent queue. Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/outqueue.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 582585393d35..0994ce491e7c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, } static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, - struct sctp_sndrcvinfo *sinfo, - struct list_head *queue, int msg_len) + struct sctp_sndrcvinfo *sinfo, int msg_len) { + struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; - list_for_each_entry_safe(chk, temp, queue, list) { + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); + q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; @@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, return; } - sctp_prsctp_prune_unsent(asoc, sinfo, - &asoc->outqueue.out_chunk_list, - msg_len); + sctp_prsctp_prune_unsent(asoc, sinfo, msg_len); } /* Mark all the eligible packets on a transport for retransmission. */ -- GitLab From 4ee082a72731bc279a69b60d2e085a960686962d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 19 Mar 2017 09:19:57 -0700 Subject: [PATCH 0550/1398] net: qmi_wwan: Add USB IDs for MDM6600 modem on Motorola Droid 4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4071898bf0f4d79ff353db327af2a15123272548 ] This gets qmicli working with the MDM6600 modem. Cc: Bjørn Mork Reviewed-by: Sebastian Reichel Tested-by: Sebastian Reichel Signed-off-by: Tony Lindgren Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 62725655d8e4..105fbfb47e3a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -582,6 +582,10 @@ static const struct usb_device_id products[] = { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* Motorola Mapphone devices with MDM6600 */ + USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff), + .driver_info = (unsigned long)&qmi_wwan_info, + }, /* 2. Combined interface devices matching on class+protocol */ { /* Huawei E367 and possibly others in "Windows mode" */ -- GitLab From 2101ccbc2a91490fd160946291f09417fbc17b18 Mon Sep 17 00:00:00 2001 From: Alex Hemme Date: Tue, 7 Mar 2017 14:38:29 -0500 Subject: [PATCH 0551/1398] hwmon: (max31790) Set correct PWM value [ Upstream commit dd7406dd334a98ada3ff5371847a3eeb4ba16313 ] Traced fans not spinning to incorrect PWM value being written. The passed in value was written instead of the calulated value. Fixes: 54187ff9d766 ("hwmon: (max31790) Convert to use new hwmon registration API") Signed-off-by: Alex Hemme Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/max31790.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c index c1b9275978f9..281491cca510 100644 --- a/drivers/hwmon/max31790.c +++ b/drivers/hwmon/max31790.c @@ -311,7 +311,7 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel, data->pwm[channel] = val << 8; err = i2c_smbus_write_word_swapped(client, MAX31790_REG_PWMOUT(channel), - val); + data->pwm[channel]); break; case hwmon_pwm_enable: fan_config = data->fan_config[channel]; -- GitLab From 2b943bed33e78216eaf191173d0f23e622cd27ab Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 8 Mar 2017 16:05:44 +0200 Subject: [PATCH 0552/1398] usb: gadget: f_uvc: Sanity check wMaxPacketSize for SuperSpeed [ Upstream commit 16bb05d98c904a4f6c5ce7e2d992299f794acbf2 ] As per USB3.0 Specification "Table 9-20. Standard Endpoint Descriptor", for interrupt and isochronous endpoints, wMaxPacketSize must be set to 1024 if the endpoint defines bMaxBurst to be greater than zero. Reviewed-by: Laurent Pinchart Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index c7689d05356c..f8a1881609a2 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U); opts->streaming_maxburst = min(opts->streaming_maxburst, 15U); + /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */ + if (opts->streaming_maxburst && + (opts->streaming_maxpacket % 1024) != 0) { + opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024); + INFO(cdev, "overriding streaming_maxpacket to %d\n", + opts->streaming_maxpacket); + } + /* Fill in the FS/HS/SS Video Streaming specific descriptors from the * module parameters. * -- GitLab From 02197d86c56db3819832b1fb97aa86501d767c99 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 10 Mar 2017 15:39:32 -0600 Subject: [PATCH 0553/1398] usb: gadget: udc: remove pointer dereference after free [ Upstream commit 1f459262b0e1649a1e5ad12fa4c66eb76c2220ce ] Remove pointer dereference after free. Addresses-Coverity-ID: 1091173 Acked-by: Michal Nazarewicz Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/pch_udc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index a97da645c1b9..8a365aad66fe 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, td = phys_to_virt(addr); addr2 = (dma_addr_t)td->next; pci_pool_free(dev->data_requests, td, addr); - td->next = 0x00; addr = addr2; } req->chain_len = 1; -- GitLab From ec38fb443a091ac99d0d8bc3867db690c8f07fe4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Mar 2017 13:32:37 +0100 Subject: [PATCH 0554/1398] netfilter: nfnl_cthelper: fix runtime expectation policy updates [ Upstream commit 2c422257550f123049552b39f7af6e3428a60f43 ] We only allow runtime updates of expectation policies for timeout and maximum number of expectations, otherwise reject the update. Signed-off-by: Pablo Neira Ayuso Acked-by: Liping Zhang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 86 +++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index b1fcfa08f0b4..86325959fef0 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -255,6 +255,89 @@ nfnl_cthelper_create(const struct nlattr * const tb[], return ret; } +static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) @@ -265,8 +348,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } -- GitLab From 0f0ac218057f9a08c28083c11dd315ff21686d22 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 21 Mar 2017 15:07:10 +0800 Subject: [PATCH 0555/1398] netfilter: nfnl_cthelper: Fix memory leak [ Upstream commit f83bf8da1135ca635aac8f062cad3f001fcf3a26 ] We have memory leaks of nf_conntrack_helper & expect_policy. Signed-off-by: Jeffy Chen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 86325959fef0..ad65eb548157 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; return 0; -err: +err2: + kfree(helper->expect_policy); +err1: kfree(helper); return ret; } @@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, found = true; nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(cur); } } /* Make sure we return success if we flush and there is no helpers */ @@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(void) continue; nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(cur); } } } -- GitLab From 7171aa2680b2ff7c946c89ec36aa82a41f6fce55 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 20 Mar 2017 10:17:57 +0100 Subject: [PATCH 0556/1398] iommu/exynos: Workaround FLPD cache flush issues for SYSMMU v5 [ Upstream commit cd37a296a9f890586665bb8974a8b17ee2f17d6d ] For some unknown reasons, in some cases, FLPD cache invalidation doesn't work properly with SYSMMU v5 controllers found in Exynos5433 SoCs. This can be observed by a firmware crash during initialization phase of MFC video decoder available in the mentioned SoCs when IOMMU support is enabled. To workaround this issue perform a full TLB/FLPD invalidation in case of replacing any first level page descriptors in case of SYSMMU v5. Fixes: 740a01eee9ada ("iommu/exynos: Add support for v5 SYSMMU") CC: stable@vger.kernel.org # v4.10+ Signed-off-by: Marek Szyprowski Tested-by: Andrzej Hajda Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/exynos-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c7820b3ea80e..beef59eb94fa 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -543,7 +543,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) { clk_enable(data->clk_master); if (sysmmu_block(data)) { - __sysmmu_tlb_invalidate_entry(data, iova, 1); + if (data->version >= MAKE_MMU_VER(5, 0)) + __sysmmu_tlb_invalidate(data); + else + __sysmmu_tlb_invalidate_entry(data, iova, 1); sysmmu_unblock(data); } clk_disable(data->clk_master); -- GitLab From bfb38fbd868d5780f7a57d9ec46979041d387172 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Mon, 20 Mar 2017 16:13:45 +0800 Subject: [PATCH 0557/1398] r8152: fix the rx early size of RTL8153 [ Upstream commit b20cb60e2b865638459e6ec82ad3536d3734e555 ] revert commit a59e6d815226 ("r8152: correct the rx early size") and fix the rx early size as (rx buffer size - rx packet size - rx desc size - alignment) / 4 Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 823219cf95ef..5bc0377cdcdd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "8" +#define NET_VERSION "9" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -501,6 +501,8 @@ enum rtl_register_content { #define RTL8153_RMS RTL8153_MAX_PACKET #define RTL8152_TX_TIMEOUT (5 * HZ) #define RTL8152_NAPI_WEIGHT 64 +#define rx_reserved_size(x) ((x) + VLAN_ETH_HLEN + CRC_SIZE + \ + sizeof(struct rx_desc) + RX_ALIGN) /* rtl8152 flags */ enum rtl8152_flags { @@ -2253,8 +2255,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) static void r8153_set_rx_early_size(struct r8152 *tp) { - u32 mtu = tp->netdev->mtu; - u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8; + u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4; ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data); } -- GitLab From e6e8067ec34ad422125a54f88a0884dbb296009f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 21 Mar 2017 10:47:49 +0100 Subject: [PATCH 0558/1398] tipc: fix nametbl deadlock at tipc_nametbl_unsubscribe [ Upstream commit 557d054c01da0337ca81de9e9d9206d57245b57e ] Until now, tipc_nametbl_unsubscribe() is called at subscriptions reference count cleanup. Usually the subscriptions cleanup is called at subscription timeout or at subscription cancel or at subscriber delete. We have ignored the possibility of this being called from other locations, which causes deadlock as we try to grab the tn->nametbl_lock while holding it already. CPU1: CPU2: ---------- ---------------- tipc_nametbl_publish spin_lock_bh(&tn->nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <> CPU1: CPU2: ---------- ---------------- tipc_nametbl_stop spin_lock_bh(&tn->nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <> In this commit, we advance the calling of tipc_nametbl_unsubscribe() from the refcount cleanup to the intended callers. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: John Thompson Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/tipc/subscr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..271cd66e4b3b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); -- GitLab From 7656871eff6ae5957394767a08ee0ce581023ed7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Mar 2017 08:57:15 -0700 Subject: [PATCH 0559/1398] inet: frag: release spinlock before calling icmp_send() [ Upstream commit ec4fbd64751de18729eaa816ec69e4b504b5a7a2 ] Dmitry reported a lockdep splat [1] (false positive) that we can fix by releasing the spinlock before calling icmp_send() from ip_expire() This is a false positive because sending an ICMP message can not possibly re-enter the IP frag engine. [1] [ INFO: possible circular locking dependency detected ] 4.10.0+ #29 Not tainted ------------------------------------------------------- modprobe/12392 is trying to acquire lock: (_xmit_ETHER#2){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] (_xmit_ETHER#2){+.-...}, at: [] __netif_tx_lock include/linux/netdevice.h:3486 [inline] (_xmit_ETHER#2){+.-...}, at: [] sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180 but task is already holding lock: (&(&q->lock)->rlock){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] (&(&q->lock)->rlock){+.-...}, at: [] ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(&q->lock)->rlock){+.-...}: validate_chain kernel/locking/lockdep.c:2267 [inline] __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] ip_defrag+0x3a2/0x4130 net/ipv4/ip_fragment.c:669 ip_check_defrag+0x4e3/0x8b0 net/ipv4/ip_fragment.c:713 packet_rcv_fanout+0x282/0x800 net/packet/af_packet.c:1459 deliver_skb net/core/dev.c:1834 [inline] dev_queue_xmit_nit+0x294/0xa90 net/core/dev.c:1890 xmit_one net/core/dev.c:2903 [inline] dev_hard_start_xmit+0x16b/0xab0 net/core/dev.c:2923 sch_direct_xmit+0x31f/0x6d0 net/sched/sch_generic.c:182 __dev_xmit_skb net/core/dev.c:3092 [inline] __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 neigh_resolve_output+0x6b9/0xb10 net/core/neighbour.c:1308 neigh_output include/net/neighbour.h:478 [inline] ip_finish_output2+0x8b8/0x15a0 net/ipv4/ip_output.c:228 ip_do_fragment+0x1d93/0x2720 net/ipv4/ip_output.c:672 ip_fragment.constprop.54+0x145/0x200 net/ipv4/ip_output.c:545 ip_finish_output+0x82d/0xe10 net/ipv4/ip_output.c:314 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512 raw_sendmsg+0x26de/0x3a00 net/ipv4/raw.c:655 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 ___sys_sendmsg+0x4a3/0x9f0 net/socket.c:1985 __sys_sendmmsg+0x25c/0x750 net/socket.c:2075 SYSC_sendmmsg net/socket.c:2106 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2101 do_syscall_64+0x2e8/0x930 arch/x86/entry/common.c:281 return_from_SYSCALL_64+0x0/0x7a -> #0 (_xmit_ETHER#2){+.-...}: check_prev_add kernel/locking/lockdep.c:1830 [inline] check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940 validate_chain kernel/locking/lockdep.c:2267 [inline] __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] __netif_tx_lock include/linux/netdevice.h:3486 [inline] sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180 __dev_xmit_skb net/core/dev.c:3092 [inline] __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 neigh_hh_output include/net/neighbour.h:468 [inline] neigh_output include/net/neighbour.h:476 [inline] ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228 ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512 icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394 icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754 ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x960/0xcf0 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:657 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707 __read_once_size include/linux/compiler.h:254 [inline] atomic_read arch/x86/include/asm/atomic.h:26 [inline] rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline] __rcu_is_watching kernel/rcu/tree.c:1133 [inline] rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147 rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293 radix_tree_deref_slot include/linux/radix-tree.h:238 [inline] filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335 do_fault_around mm/memory.c:3231 [inline] do_read_fault mm/memory.c:3265 [inline] do_fault+0xbd5/0x2080 mm/memory.c:3370 handle_pte_fault mm/memory.c:3600 [inline] __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714 handle_mm_fault+0x1e2/0x480 mm/memory.c:3751 __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397 do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460 page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&q->lock)->rlock); lock(_xmit_ETHER#2); lock(&(&q->lock)->rlock); lock(_xmit_ETHER#2); *** DEADLOCK *** 10 locks held by modprobe/12392: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x2b8/0xb60 arch/x86/mm/fault.c:1336 #1: (rcu_read_lock){......}, at: [] filemap_map_pages+0x1e6/0x1570 mm/filemap.c:2324 #2: (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] #2: (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [] pte_alloc_one_map mm/memory.c:2944 [inline] #2: (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [] alloc_set_pte+0x13b8/0x1b90 mm/memory.c:3072 #3: (((&q->timer))){+.-...}, at: [] lockdep_copy_map include/linux/lockdep.h:175 [inline] #3: (((&q->timer))){+.-...}, at: [] call_timer_fn+0x1c2/0x820 kernel/time/timer.c:1258 #4: (&(&q->lock)->rlock){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] #4: (&(&q->lock)->rlock){+.-...}, at: [] ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201 #5: (rcu_read_lock){......}, at: [] ip_expire+0x1b3/0x6c0 net/ipv4/ip_fragment.c:216 #6: (slock-AF_INET){+.-...}, at: [] spin_trylock include/linux/spinlock.h:309 [inline] #6: (slock-AF_INET){+.-...}, at: [] icmp_xmit_lock net/ipv4/icmp.c:219 [inline] #6: (slock-AF_INET){+.-...}, at: [] icmp_send+0x803/0x1c80 net/ipv4/icmp.c:681 #7: (rcu_read_lock_bh){......}, at: [] ip_finish_output2+0x2c1/0x15a0 net/ipv4/ip_output.c:198 #8: (rcu_read_lock_bh){......}, at: [] __dev_queue_xmit+0x23e/0x1e60 net/core/dev.c:3324 #9: (dev->qdisc_running_key ?: &qdisc_running_key){+.....}, at: [] dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 stack backtrace: CPU: 0 PID: 12392 Comm: modprobe Not tainted 4.10.0+ #29 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:52 print_circular_bug+0x307/0x3b0 kernel/locking/lockdep.c:1204 check_prev_add kernel/locking/lockdep.c:1830 [inline] check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940 validate_chain kernel/locking/lockdep.c:2267 [inline] __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] __netif_tx_lock include/linux/netdevice.h:3486 [inline] sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180 __dev_xmit_skb net/core/dev.c:3092 [inline] __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 neigh_hh_output include/net/neighbour.h:468 [inline] neigh_output include/net/neighbour.h:476 [inline] ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228 ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512 icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394 icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754 ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x960/0xcf0 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:657 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707 RIP: 0010:__read_once_size include/linux/compiler.h:254 [inline] RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline] RIP: 0010:rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline] RIP: 0010:__rcu_is_watching kernel/rcu/tree.c:1133 [inline] RIP: 0010:rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147 RSP: 0000:ffff8801c391f120 EFLAGS: 00000a03 ORIG_RAX: ffffffffffffff10 RAX: dffffc0000000000 RBX: ffff8801c391f148 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 000055edd4374000 RDI: ffff8801dbe1ae0c RBP: ffff8801c391f1a0 R08: 0000000000000002 R09: 0000000000000000 R10: dffffc0000000000 R11: 0000000000000002 R12: 1ffff10038723e25 R13: ffff8801dbe1ae00 R14: ffff8801c391f680 R15: dffffc0000000000 rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293 radix_tree_deref_slot include/linux/radix-tree.h:238 [inline] filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335 do_fault_around mm/memory.c:3231 [inline] do_read_fault mm/memory.c:3265 [inline] do_fault+0xbd5/0x2080 mm/memory.c:3370 handle_pte_fault mm/memory.c:3600 [inline] __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714 handle_mm_fault+0x1e2/0x480 mm/memory.c:3751 __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397 do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460 page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011 RIP: 0033:0x7f83172f2786 RSP: 002b:00007fffe859ae80 EFLAGS: 00010293 RAX: 000055edd4373040 RBX: 00007f83175111c8 RCX: 000055edd4373238 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f8317510970 RBP: 00007fffe859afd0 R08: 0000000000000009 R09: 0000000000000000 R10: 0000000000000064 R11: 0000000000000000 R12: 000055edd4373040 R13: 0000000000000000 R14: 00007fffe859afe8 R15: 0000000000000000 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 453db950dc9f..4bf3b8af0257 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg) qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); + rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg) __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *head = qp->q.fragments; + struct sk_buff *clone, *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg) if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; - rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) - goto out_rcu_unlock; + goto out; + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) - goto out_rcu_unlock; + goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out_rcu_unlock; + goto out; + + clone = skb_clone(head, GFP_ATOMIC); /* Send an ICMP "Fragment Reassembly Timeout" message. */ - icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); -out_rcu_unlock: - rcu_read_unlock(); + if (clone) { + spin_unlock(&qp->q.lock); + icmp_send(clone, ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0); + consume_skb(clone); + goto out_rcu_unlock; + } } out: spin_unlock(&qp->q.lock); +out_rcu_unlock: + rcu_read_unlock(); ipq_put(qp); } -- GitLab From 0f4aa1f0f576b4a4706eb0f419a23922c45deb31 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 16 Mar 2017 18:26:02 +0100 Subject: [PATCH 0560/1398] pinctrl: st: add irq_request/release_resources callbacks [ Upstream commit e855fa9a65c40788b5069abb0d094537daa22e05 ] When using GPIO as IRQ source, the GPIO must be configured in INPUT. Callbacks dedicated for this was missing in pinctrl-st driver. This fix the following kernel error when trying to lock a gpio as IRQ: [ 7.521095] gpio gpiochip7: (PIO11): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ [ 7.526018] gpio gpiochip7: (PIO11): unable to lock HW IRQ 6 for IRQ [ 7.529405] genirq: Failed to request resources for 0-0053 (irq 81) on irqchip GPIO Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-st.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index b7bb37167969..50c45bdf93be 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1285,6 +1285,22 @@ static void st_gpio_irq_unmask(struct irq_data *d) writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK); } +static int st_gpio_irq_request_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + st_gpio_direction_input(gc, d->hwirq); + + return gpiochip_lock_as_irq(gc, d->hwirq); +} + +static void st_gpio_irq_release_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + gpiochip_unlock_as_irq(gc, d->hwirq); +} + static int st_gpio_irq_set_type(struct irq_data *d, unsigned type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1438,12 +1454,14 @@ static struct gpio_chip st_gpio_template = { }; static struct irq_chip st_gpio_irqchip = { - .name = "GPIO", - .irq_disable = st_gpio_irq_mask, - .irq_mask = st_gpio_irq_mask, - .irq_unmask = st_gpio_irq_unmask, - .irq_set_type = st_gpio_irq_set_type, - .flags = IRQCHIP_SKIP_SET_WAKE, + .name = "GPIO", + .irq_request_resources = st_gpio_irq_request_resources, + .irq_release_resources = st_gpio_irq_release_resources, + .irq_disable = st_gpio_irq_mask, + .irq_mask = st_gpio_irq_mask, + .irq_unmask = st_gpio_irq_unmask, + .irq_set_type = st_gpio_irq_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static int st_gpiolib_register_bank(struct st_pinctrl *info, -- GitLab From 3bd2017b6a205092de4d65d2fc67b8cfca6b676f Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Thu, 23 Mar 2017 08:47:18 -0400 Subject: [PATCH 0561/1398] scsi: lpfc: Fix PT2PT PRLI reject [ Upstream commit a71e3cdcfce4880a4578915e110e3eaed1659765 ] lpfc cannot establish connection with targets that send PRLI in P2P configurations. If lpfc rejects a PRLI that is sent from a target the target will not resend and will reject the PRLI send from the initiator. [mkp: applied by hand] Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_els.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 9c9563312a3d..fc7addaf24da 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7782,7 +7782,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, did, vport->port_state, ndlp->nlp_flag); phba->fc_stat.elsRcvPRLI++; - if (vport->port_state < LPFC_DISC_AUTH) { + if ((vport->port_state < LPFC_DISC_AUTH) && + (vport->fc_flag & FC_FABRIC)) { rjt_err = LSRJT_UNABLE_TPC; rjt_exp = LSEXP_NOTHING_MORE; break; -- GitLab From 8386ff5203e04f1b720582eec7eaab63712403f4 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 16 Mar 2017 13:53:59 -0700 Subject: [PATCH 0562/1398] kvm: vmx: Flush TLB when the APIC-access address changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fb6c8198431311027c3434d4e94ab8bc040f7aea ] Quoting from the Intel SDM, volume 3, section 28.3.3.4: Guidelines for Use of the INVEPT Instruction: If EPT was in use on a logical processor at one time with EPTP X, it is recommended that software use the INVEPT instruction with the "single-context" INVEPT type and with EPTP X in the INVEPT descriptor before a VM entry on the same logical processor that enables EPT with EPTP X and either (a) the "virtualize APIC accesses" VM-execution control was changed from 0 to 1; or (b) the value of the APIC-access address was changed. In the nested case, the burden falls on L1, unless L0 enables EPT in vmcs02 when L1 doesn't enable EPT in vmcs12. Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a929ca03b7ed..cc55661f9b4e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3816,6 +3816,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); } +static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) +{ + if (enable_ept) + vmx_flush_tlb(vcpu); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -8494,6 +8500,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb_ept_only(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -8519,8 +8526,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) */ if (!is_guest_mode(vcpu) || !nested_cpu_has2(get_vmcs12(&vmx->vcpu), - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); + vmx_flush_tlb_ept_only(vcpu); + } } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -10093,6 +10102,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); + } else if (nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) @@ -10833,6 +10845,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.change_vmcs01_virtual_x2apic_mode = false; vmx_set_virtual_x2apic_mode(vcpu, vcpu->arch.apic_base & X2APIC_ENABLE); + } else if (!nested_cpu_has_ept(vmcs12) && + nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ -- GitLab From e0249c023448333d2a9f2e385d036237b48c67fe Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:55 -0700 Subject: [PATCH 0563/1398] KVM: x86: correct async page present tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 24dccf83a121b8a4ad5c2ad383a8184ef6c266ee ] After async pf setup successfully, there is a broadcast wakeup w/ special token 0xffffffff which tells vCPU that it should wake up all processes waiting for APFs though there is no real process waiting at the moment. The async page present tracepoint print prematurely and fails to catch the special token setup. This patch fixes it by moving the async page present tracepoint after the special token setup. Before patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0x0 gva 0x0 After patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0xffffffff gva 0x0 Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26b580ad268f..f4d893713d54 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8443,11 +8443,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_ready(work->arch.token, work->gva); if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); + trace_kvm_async_pf_ready(work->arch.token, work->gva); if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { -- GitLab From 2df19698db73552428a47c47166a16b9c16d19ba Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 23 Mar 2017 05:30:08 -0700 Subject: [PATCH 0564/1398] KVM: VMX: Fix enable VPID conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 08d839c4b134b8328ec42f2157a9ca4b93227c03 ] This can be reproduced by running L2 on L1, and disable VPID on L0 if w/o commit "KVM: nVMX: Fix nested VPID vmx exec control", the L2 crash as below: KVM: entry failed, hardware error 0x7 EAX=00000000 EBX=00000000 ECX=00000000 EDX=000306c3 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =f000 ffff0000 0000ffff 00009b00 SS =0000 00000000 0000ffff 00009300 DS =0000 00000000 0000ffff 00009300 FS =0000 00000000 0000ffff 00009300 GS =0000 00000000 0000ffff 00009300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 00000000 0000ffff IDT= 00000000 0000ffff CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000000 Reference SDM 30.3 INVVPID: Protected Mode Exceptions - #UD - If not in VMX operation. - If the logical processor does not support VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=0). - If the logical processor supports VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=1) but does not support the INVVPID instruction (IA32_VMX_EPT_VPID_CAP[32]=0). So we should check both VPID enable bit in vmx exec control and INVVPID support bit in vmx capability MSRs to enable VPID. This patch adds the guarantee to not enable VPID if either INVVPID or single-context/all-context invalidation is not exposed in vmx capability MSRs. Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cc55661f9b4e..263e56059fd5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1199,6 +1199,11 @@ static inline bool cpu_has_vmx_invvpid_global(void) return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; } +static inline bool cpu_has_vmx_invvpid(void) +{ + return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; +} + static inline bool cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -6434,8 +6439,10 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); - if (!cpu_has_vmx_vpid()) + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || + !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) -- GitLab From 5700ffc4accb9df36999fdf4cef5778160f34236 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 21 Mar 2017 21:03:01 -0500 Subject: [PATCH 0565/1398] ARM: dts: ti: fix PCI bus dtc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d79f6098d82f8c09914d7799bc96891ad9c3baf ] dtc recently added PCI bus checks. Fix these warnings. Signed-off-by: Rob Herring Cc: "Benoît Cousson" Cc: Tony Lindgren Cc: linux-omap@vger.kernel.org Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 064d84f87e45..ce54a70b7695 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -282,6 +282,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; + bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; linux,pci-domain = <0>; @@ -318,6 +319,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; + bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; linux,pci-domain = <1>; -- GitLab From afa055f2a1d0ffcb51cc2b38eb610752c60fdb48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Mar 2017 16:03:11 +0100 Subject: [PATCH 0566/1398] hwmon: (asus_atk0110) fix uninitialized data access [ Upstream commit a2125d02443e9a4e68bcfd9f8004fa23239e8329 ] The latest gcc-7 snapshot adds a warning to point out that when atk_read_value_old or atk_read_value_new fails, we copy uninitialized data into sensor->cached_value: drivers/hwmon/asus_atk0110.c: In function 'atk_input_show': drivers/hwmon/asus_atk0110.c:651:26: error: 'value' may be used uninitialized in this function [-Werror=maybe-uninitialized] Adding an error check avoids this. All versions of the driver are affected. Fixes: 2c03d07ad54d ("hwmon: Add Asus ATK0110 support") Signed-off-by: Arnd Bergmann Reviewed-by: Luca Tettamanti Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/asus_atk0110.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index cccef87963e0..975c43d446f8 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value) else err = atk_read_value_new(sensor, value); + if (err) + return err; + sensor->is_valid = true; sensor->last_updated = jiffies; sensor->cached_value = *value; -- GitLab From 347848e0bb9956e070f8cfdee63ef5b8bb09af52 Mon Sep 17 00:00:00 2001 From: Peter Stein Date: Fri, 17 Feb 2017 00:00:50 -0800 Subject: [PATCH 0567/1398] HID: xinmo: fix for out of range for THT 2P arcade controller. [ Upstream commit 9257821c5a1dc57ef3a37f7cbcebaf548395c964 ] There is a new clone of the XIN MO arcade controller which has same issue with out of range like the original. This fix will solve the issue where 2 directions on the joystick are not recognized by the new THT 2P arcade controller with device ID 0x75e1. In details the new device ID is added the hid-id list and the hid-xinmo source code. Signed-off-by: Peter Stein Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + drivers/hid/hid-xinmo.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 6615b4d2f392..e32862ca5223 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2107,6 +2107,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) }, { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b365cb1df26e..244b97c1b74e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1080,6 +1080,7 @@ #define USB_VENDOR_ID_XIN_MO 0x16c0 #define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE 0x05e1 +#define USB_DEVICE_ID_THT_2P_ARCADE 0x75e1 #define USB_VENDOR_ID_XIROKU 0x1477 #define USB_DEVICE_ID_XIROKU_SPX 0x1006 diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c index 7df5227a7e61..9ad7731d2e10 100644 --- a/drivers/hid/hid-xinmo.c +++ b/drivers/hid/hid-xinmo.c @@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id xinmo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, { } }; -- GitLab From b89e229112c0277d73f41196c2b90397d4fdadd2 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Thu, 23 Mar 2017 19:39:54 +0100 Subject: [PATCH 0568/1398] ASoC: STI: Fix reader substream pointer set [ Upstream commit 3c9d3f1bc2defd418b5933bbc928096c9c686d3b ] reader->substream is used in IRQ handler for error case but is never set. Set value to pcm substream on DAI startup and clean it on dai shutdown. Signed-off-by: Arnaud Pouliquen Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sti/uniperif_reader.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 0e1c3ee56675..9735b4caaed3 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -364,6 +364,8 @@ static int uni_reader_startup(struct snd_pcm_substream *substream, struct uniperif *reader = priv->dai_data.uni; int ret; + reader->substream = substream; + if (!UNIPERIF_TYPE_IS_TDM(reader)) return 0; @@ -393,6 +395,7 @@ static void uni_reader_shutdown(struct snd_pcm_substream *substream, /* Stop the reader */ uni_reader_stop(reader); } + reader->substream = NULL; } static const struct snd_soc_dai_ops uni_reader_dai_ops = { -- GitLab From 60d59823046a7cfbfb6fe13ed9d320aafe1338a2 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 23 Mar 2017 19:14:19 +0800 Subject: [PATCH 0569/1398] r8152: prevent the driver from transmitting packets with carrier off [ Upstream commit 2f25abe6bac573928a990ccbdac75873add8127e ] The linking status may be changed when autosuspend. And, after autoresume, the driver may try to transmit packets when the device is carrier off, because the interrupt transfer doesn't update the linking status, yet. And, if the device is in ALDPS mode, the device would stop working. The another similar case is 1. unplug the cable. 2. interrupt transfer queue a work_queue for linking change. 3. device enters the ALDPS mode. 4. a tx occurs before the work_queue is called. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 5bc0377cdcdd..b2d7c7e32250 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1294,6 +1294,7 @@ static void intr_callback(struct urb *urb) } } else { if (netif_carrier_ok(tp->netdev)) { + netif_stop_queue(tp->netdev); set_bit(RTL8152_LINK_CHG, &tp->flags); schedule_delayed_work(&tp->schedule, 0); } @@ -3167,6 +3168,9 @@ static void set_carrier(struct r8152 *tp) napi_enable(&tp->napi); netif_wake_queue(netdev); netif_info(tp, link, netdev, "carrier on\n"); + } else if (netif_queue_stopped(netdev) && + skb_queue_len(&tp->tx_queue) < tp->tx_qlen) { + netif_wake_queue(netdev); } } else { if (netif_carrier_ok(netdev)) { @@ -3700,8 +3704,18 @@ static int rtl8152_resume(struct usb_interface *intf) tp->rtl_ops.autosuspend_en(tp, false); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); - if (netif_carrier_ok(tp->netdev)) - rtl_start_rx(tp); + + if (netif_carrier_ok(tp->netdev)) { + if (rtl8152_get_speed(tp) & LINK_STATUS) { + rtl_start_rx(tp); + } else { + netif_carrier_off(tp->netdev); + tp->rtl_ops.disable(tp); + netif_info(tp, link, tp->netdev, + "linking down\n"); + } + } + napi_enable(&tp->napi); clear_bit(SELECTIVE_SUSPEND, &tp->flags); smp_mb__after_atomic(); -- GitLab From 118b0404d68d4019ff7906175b3b85fb50f06e84 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 23 Mar 2017 14:55:08 +0100 Subject: [PATCH 0570/1398] s390/qeth: size calculation outbound buffers [ Upstream commit 7d969d2e8890f546c8cec634b3aa5f57d4eef883 ] Depending on the device type, hard_start_xmit() builds different output buffer formats. For instance with HiperSockets, on both L2 and L3 we strip the ETH header from the skb - L3 doesn't need it, and L2 carries it in the buffer's header element. For this, we pass data_offset = ETH_HLEN all the way down to __qeth_fill_buffer(), where skb->data is then adjusted accordingly. But the initial size calculation still considers the *full* skb length (including the ETH header). So qeth_get_elements_no() can erroneously reject a skb as too big, even though it would actually fit into an output buffer once the ETH header has been trimmed off later. Fix this by passing an additional offset to qeth_get_elements_no(), that indicates where in the skb the on-wire data actually begins. Since the current code uses data_offset=-1 for some special handling on OSA, we need to clamp data_offset to 0... On HiperSockets this helps when sending ~MTU-size skbs with weird page alignment. No change for OSA or AF_IUCV. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core.h | 3 ++- drivers/s390/net/qeth_core_main.c | 5 +++-- drivers/s390/net/qeth_l2_main.c | 5 +++-- drivers/s390/net/qeth_l3_main.c | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e2bd2ad01b15..e72234efb648 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -969,7 +969,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role); int qeth_bridgeport_an_set(struct qeth_card *card, int enable); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); -int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int); +int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, + int extra_elems, int data_offset); int qeth_get_elements_for_frags(struct sk_buff *); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, int, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b5fa6bb56b29..838ed6213118 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3842,6 +3842,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * @card: qeth card structure, to check max. elems. * @skb: SKB address * @extra_elems: extra elems needed, to check against max. + * @data_offset: range starts at skb->data + data_offset * * Returns the number of pages, and thus QDIO buffer elements, needed to cover * skb data, including linear part and fragments. Checks if the result plus @@ -3849,10 +3850,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * Note: extra_elems is not included in the returned result. */ int qeth_get_elements_no(struct qeth_card *card, - struct sk_buff *skb, int extra_elems) + struct sk_buff *skb, int extra_elems, int data_offset) { int elements = qeth_get_elements_for_range( - (addr_t)skb->data, + (addr_t)skb->data + data_offset, (addr_t)skb->data + skb_headlen(skb)) + qeth_get_elements_for_frags(skb); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index ac33f6c999b1..5082dfeacb95 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -865,7 +865,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * chaining we can not send long frag lists */ if ((card->info.type != QETH_CARD_TYPE_IQD) && - !qeth_get_elements_no(card, new_skb, 0)) { + !qeth_get_elements_no(card, new_skb, 0, 0)) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -910,7 +910,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - elements = qeth_get_elements_no(card, new_skb, elements_needed); + elements = qeth_get_elements_no(card, new_skb, elements_needed, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 5735fc3be6c7..d793be6f68e3 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2870,7 +2870,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if ((card->info.type != QETH_CARD_TYPE_IQD) && ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) || - (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) { + (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -2912,7 +2912,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) elements = use_tso ? qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) : - qeth_get_elements_no(card, new_skb, hdr_elements); + qeth_get_elements_no(card, new_skb, hdr_elements, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); -- GitLab From 51533c4bf1442afd9b54747cd3d47d698940d624 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 23 Mar 2017 14:55:09 +0100 Subject: [PATCH 0571/1398] s390/qeth: no ETH header for outbound AF_IUCV [ Upstream commit acd9776b5c45ef02d1a210969a6fcc058afb76e3 ] With AF_IUCV traffic, the skb passed to hard_start_xmit() has a 14 byte slot at skb->data, intended for an ETH header. qeth_l3_fill_af_iucv_hdr() fills this ETH header... and then immediately moves it to the skb's headroom, where it disappears and is never seen again. But it's still possible for us to return NETDEV_TX_BUSY after the skb has been modified. Since we didn't get a private copy of the skb, the next time the skb is delivered to hard_start_xmit() it no longer has the expected layout (we moved the ETH header to the headroom, so skb->data now starts at the IUCV_TRANS header). So when qeth_l3_fill_af_iucv_hdr() does another round of rebuilding, the resulting qeth header ends up all wrong. On transmission, the buffer is then rejected by the HiperSockets device with SBALF15 = x'04'. When this error is passed back to af_iucv as TX_NOTIFY_UNREACHABLE, it tears down the offending socket. As the ETH header for AF_IUCV serves no purpose, just align the code to what we do for IP traffic on L3 HiperSockets: keep the ETH header at skb->data, and pass down data_offset = ETH_HLEN to qeth_fill_buffer(). When mapping the payload into the SBAL elements, the ETH header is then stripped off. This avoids the skb manipulations in qeth_l3_fill_af_iucv_hdr(), and any buffer re-entering hard_start_xmit() after NETDEV_TX_BUSY is now processed properly. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d793be6f68e3..f91e70c369ed 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2612,17 +2612,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card, char daddr[16]; struct af_iucv_trans_hdr *iucv_hdr; - skb_pull(skb, 14); - card->dev->header_ops->create(skb, card->dev, 0, - card->dev->dev_addr, card->dev->dev_addr, - card->dev->addr_len); - skb_pull(skb, 14); - iucv_hdr = (struct af_iucv_trans_hdr *)skb->data; memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; hdr->hdr.l3.ext_flags = 0; - hdr->hdr.l3.length = skb->len; + hdr->hdr.l3.length = skb->len - ETH_HLEN; hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST; + + iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN); memset(daddr, 0, sizeof(daddr)); daddr[0] = 0xfe; daddr[1] = 0x80; @@ -2826,10 +2822,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((card->info.type == QETH_CARD_TYPE_IQD) && !skb_is_nonlinear(skb)) { new_skb = skb; - if (new_skb->protocol == ETH_P_AF_IUCV) - data_offset = 0; - else - data_offset = ETH_HLEN; + data_offset = ETH_HLEN; hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); if (!hdr) goto tx_drop; -- GitLab From 102a8a1634496d53fcf0b5fae92f7729a1d17f76 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Mar 2017 17:07:26 +0100 Subject: [PATCH 0572/1398] bna: avoid writing uninitialized data into hw registers [ Upstream commit a5af83925363eb85d467933e3d6ec5a87001eb7c ] The latest gcc-7 snapshot warns about bfa_ioc_send_enable/bfa_ioc_send_disable writing undefined values into the hardware registers: drivers/net/ethernet/brocade/bna/bfa_ioc.c: In function 'bfa_iocpf_sm_disabling_entry': arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+4)' is used uninitialized in this function [-Werror=uninitialized] arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+8)' is used uninitialized in this function [-Werror=uninitialized] The two functions look like they should do the same thing, but only one of them initializes the time stamp and clscode field. The fact that we only get a warning for one of the two functions seems to be arbitrary, based on the inlining decisions in the compiler. To address this, I'm making both functions do the same thing: - set the clscode from the ioc structure in both - set the time stamp from ktime_get_real_seconds (which also avoids the signed-integer overflow in 2038 and extends the well-defined behavior until 2106). - zero-fill the reserved field Fixes: 8b230ed8ec96 ("bna: Brocade 10Gb Ethernet device driver") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 9e59663a6ead..0f6811860ad5 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1930,13 +1930,13 @@ static void bfa_ioc_send_enable(struct bfa_ioc *ioc) { struct bfi_ioc_ctrl_req enable_req; - struct timeval tv; bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ, bfa_ioc_portid(ioc)); enable_req.clscode = htons(ioc->clscode); - do_gettimeofday(&tv); - enable_req.tv_sec = ntohl(tv.tv_sec); + enable_req.rsvd = htons(0); + /* overflow in 2106 */ + enable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req)); } @@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc) bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ, bfa_ioc_portid(ioc)); + disable_req.clscode = htons(ioc->clscode); + disable_req.rsvd = htons(0); + /* overflow in 2106 */ + disable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req)); } -- GitLab From 26452a5033dc4fd3792bed60ba85fddfe9ef7a5b Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 17 Mar 2017 18:30:07 -0500 Subject: [PATCH 0573/1398] i40iw: Receive netdev events post INET_NOTIFIER state [ Upstream commit 871a8623d3b40221ad1103aff715dfee0aa4dacf ] Netdev notification events are de-registered only when all client iwdev instances are removed. If a single client is closed and re-opened, netdev events could arrive even before the Control Queue-Pair (CQP) is created, causing a NULL pointer dereference crash in i40iw_get_cqp_request. Fix this by allowing netdev event notification only after we have reached the INET_NOTIFIER state with respect to device initialization. Reported-by: Stefan Assmann Signed-off-by: Shiraz Saleem Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 6fd043b1d714..7db2001775cb 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -159,6 +159,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; upper_dev = netdev_master_upper_dev_get(netdev); if (netdev != event_netdev) @@ -231,6 +234,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; if (netdev != event_netdev) return NOTIFY_DONE; @@ -280,6 +286,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * if (!iwhdl) return NOTIFY_DONE; iwdev = &iwhdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; p = (__be32 *)neigh->primary_key; i40iw_copy_ip_ntohl(local_ipaddr, p); if (neigh->nud_state & NUD_VALID) { -- GitLab From cd083d5bcafdc17e6192ddaed96f6744746c984b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Mar 2017 22:00:52 +0200 Subject: [PATCH 0574/1398] IB/core: Protect against self-requeue of a cq work item [ Upstream commit 86f46aba8d1ac3ed0904542158a9b9cb9c7a143c ] We need to make sure that the cq work item does not run when we are destroying the cq. Unlike flush_work, cancel_work_sync protects against self-requeue of the work item (which we can do in ib_cq_poll_work). Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a754fc727de5..ff12b8d176ce 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq) irq_poll_disable(&cq->iop); break; case IB_POLL_WORKQUEUE: - flush_work(&cq->work); + cancel_work_sync(&cq->work); break; default: WARN_ON_ONCE(1); -- GitLab From f4fcc56632cfd6d204ca6d64ad0912e626016b65 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Mar 2017 11:34:20 -0700 Subject: [PATCH 0575/1398] infiniband: Fix alignment of mmap cookies to support VIPT caching [ Upstream commit cb8864559631754ac93d5734b165ccd0cad4728c ] When vmalloc_user is used to create memory that is supposed to be mmap'd to user space, it is necessary for the mmap cookie (eg the offset) to be aligned to SHMLBA. This creates a situation where all virtual mappings of the same physical page share the same virtual cache index and guarantees VIPT coherence. Otherwise the cache is non-coherent and the kernel will not see writes by userspace when reading the shared page (or vice-versa). Reported-by: Josh Beavers Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rdmavt/mmap.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_mmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index e202b8142759..6b712eecbd37 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, spin_lock_irq(&rdi->mmap_offset_lock); if (rdi->mmap_offset == 0) - rdi->mmap_offset = PAGE_SIZE; + rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->offset = rdi->mmap_offset; - rdi->mmap_offset += size; + rdi->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_irq(&rdi->mmap_offset_lock); INIT_LIST_HEAD(&ip->pending_mmaps); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index c572a4c09359..bd812e00988e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, spin_lock_bh(&rxe->mmap_offset_lock); if (rxe->mmap_offset == 0) - rxe->mmap_offset = PAGE_SIZE; + rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->info.offset = rxe->mmap_offset; - rxe->mmap_offset += size; + rxe->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_bh(&rxe->mmap_offset_lock); -- GitLab From 521a7e3dad6b216eb0058db5c64a1e706e514fc0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Mar 2017 14:08:28 -0400 Subject: [PATCH 0576/1398] nbd: set queue timeout properly [ Upstream commit f8586855031a1d6b243f013c3082631346fddfad ] We can't just set the timeout on the tagset, we have to set it on the queue as it would have been setup already at this point. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 98b767d3171e..7d506cb73e54 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -654,7 +654,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return nbd_size_set(nbd, bdev, nbd->blksize, arg); case NBD_SET_TIMEOUT: - nbd->tag_set.timeout = arg * HZ; + if (arg) { + nbd->tag_set.timeout = arg * HZ; + blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); + } return 0; case NBD_SET_FLAGS: -- GitLab From 7f077afe94d90bae3262da5e39ba75e40f9c4607 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 09:38:03 -0700 Subject: [PATCH 0577/1398] net: Do not allow negative values for busy_read and busy_poll sysctl interfaces [ Upstream commit 95f255211396958c718aef8c45e3923b5211ea7b ] This change basically codifies what I think was already the limitations on the busy_poll and busy_read sysctl interfaces. We weren't checking the lower bounds and as such could input negative values. The behavior when that was used was dependent on the architecture. In order to prevent any issues with that I am just disabling support for values less than 0 since this way we don't have to worry about any odd behaviors. By limiting the sysctl values this way it also makes it consistent with how we handle the SO_BUSY_POLL socket option since the value appears to be reported as a signed integer value and negative values are rejected. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/sysctl_net_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0df2aa652530..a7f05f0130e8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -369,14 +369,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED -- GitLab From 2f0e39f2e3d77eb086995a99c8361d7f13020120 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Mar 2017 08:21:52 +0300 Subject: [PATCH 0578/1398] IB/rxe: double free on error [ Upstream commit ded260235308f340b979258a4c736e06ba12c747 ] "goto err;" has it's own kfree_skb() call so it's a double free. We only need to free on the "goto exit;" path. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 9d084780ac91..5b0ca35c06ab 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -726,11 +726,11 @@ int rxe_requester(void *arg) ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; - kfree_skb(skb); rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { + kfree_skb(skb); rxe_run_task(&qp->req.task, 1); goto exit; } -- GitLab From 2eb783a705b054067d0c652f4afc73db32a1d6b9 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Fri, 24 Feb 2017 15:38:26 +0100 Subject: [PATCH 0579/1398] IB/rxe: increment msn only when completing a request [ Upstream commit 9fcd67d1772c43d2f23e8fca56acc7219e991676 ] According to C9-147, MSN should only be incremented when the last packet of a multi packet request has been received. "Logically, the requester associates a sequential Send Sequence Number (SSN) with each WQE posted to the send queue. The SSN bears a one- to-one relationship to the MSN returned by the responder in each re- sponse packet. Therefore, when the requester receives a response, it in- terprets the MSN as representing the SSN of the most recent request completed by the responder to determine which send WQE(s) can be completed." Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: David Marchand Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_resp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 7705820cdac6..8c0ddd7165ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -799,18 +799,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) /* Unreachable */ WARN_ON(1); - /* We successfully processed this new request. */ - qp->resp.msn++; - /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) + if (pkt->mask & RXE_COMP_MASK) { + /* We successfully processed this new request. */ + qp->resp.msn++; return RESPST_COMPLETE; - else if (qp_type(qp) == IB_QPT_RC) + } else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; -- GitLab From 661f5348696a39ace18fcc028513ef7527a036e3 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 15:01:42 -0700 Subject: [PATCH 0580/1398] i40e: Do not enable NAPI on q_vectors that have no rings [ Upstream commit 13a8cd191a2b470cfd435b3b57dbd21aa65ff78c ] When testing the epoll w/ busy poll code I found that I could get into a state where the i40e driver had q_vectors w/ active NAPI that had no rings. This was resulting in a divide by zero error. To correct it I am updating the driver code so that we only support NAPI on q_vectors that have 1 or more rings allocated to them. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2caafebb0295..becffd15c092 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4217,8 +4217,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_enable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_enable(&q_vector->napi); + } } /** @@ -4232,8 +4236,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_disable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_disable(&q_vector->napi); + } } /** -- GitLab From 37f41dac70ca7e7be3fc50769df0ad566c44e389 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 27 Feb 2017 20:16:33 +0200 Subject: [PATCH 0581/1398] RDMA/iser: Fix possible mr leak on device removal event [ Upstream commit ea174c9573b0e0c8bc1a7a90fe9360ccb7aa9cbb ] When the rdma device is removed, we must cleanup all the rdma resources within the DEVICE_REMOVAL event handler to let the device teardown gracefully. When this happens with live I/O, some memory regions are occupied. Thus, track them too and dereg all the mr's. We are safe with mr access by iscsi_iser_cleanup_task. Reported-by: Raju Rangoju Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Max Gurtovoy Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ drivers/infiniband/ulp/iser/iser_verbs.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0be6a7c5ddb5..cb48e22afff7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -430,6 +430,7 @@ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; struct iser_pi_context *pi_ctx; + struct list_head all_list; }; /** @@ -443,6 +444,7 @@ struct iser_fr_pool { struct list_head list; spinlock_t lock; int size; + struct list_head all_list; }; /** diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a4b791dfaa1d..bc6f5bb6c524 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, int i, ret; INIT_LIST_HEAD(&fr_pool->list); + INIT_LIST_HEAD(&fr_pool->all_list); spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { @@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, } list_add_tail(&desc->list, &fr_pool->list); + list_add_tail(&desc->all_list, &fr_pool->all_list); fr_pool->size++; } @@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) struct iser_fr_desc *desc, *tmp; int i = 0; - if (list_empty(&fr_pool->list)) + if (list_empty(&fr_pool->all_list)) return; iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) { - list_del(&desc->list); + list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { + list_del(&desc->all_list); iser_free_reg_res(&desc->rsc); if (desc->pi_ctx) iser_free_pi_ctx(desc->pi_ctx); -- GitLab From 9e6398184a4d6f6bbca891592e86fac8525fc211 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 25 Mar 2017 01:48:08 +0300 Subject: [PATCH 0582/1398] irda: vlsi_ir: fix check for DMA mapping errors [ Upstream commit 6ac3b77a6ffff7513ff86b684aa256ea01c0e5b5 ] vlsi_alloc_ring() checks for DMA mapping errors by comparing returned address with zero, while pci_dma_mapping_error() should be used. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/irda/vlsi_ir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index a0849f49bbec..c0192f97ecc8 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr memset(rd, 0, sizeof(*rd)); rd->hw = hwmap + i; rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA); - if (rd->buf == NULL || - !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) { + if (rd->buf) + busaddr = pci_map_single(pdev, rd->buf, len, dir); + if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) { if (rd->buf) { net_err_ratelimited("%s: failed to create PCI-MAP for %p\n", __func__, rd->buf); @@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr rd = r->rd + j; busaddr = rd_get_addr(rd); rd_set_addr_status(rd, 0, 0); - if (busaddr) - pci_unmap_single(pdev, busaddr, len, dir); + pci_unmap_single(pdev, busaddr, len, dir); kfree(rd->buf); rd->buf = NULL; } -- GitLab From 01060acf6aabea3ce3e362b0c29eeb6ab5fdfa63 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 12:09:15 +0800 Subject: [PATCH 0583/1398] netfilter: nfnl_cthelper: fix a race when walk the nf_ct_helper_hash table [ Upstream commit 83d90219a5df8d950855ce73229a97b63605c317 ] The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER). So it's possible that one CPU is walking the nf_ct_helper_hash for cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister at the same time. This is dangrous, and may cause use after free error. Note, delete operation will flush all cthelpers added via nfnetlink, so using rcu to do protect is not easy. Now introduce a dummy list to record all the cthelpers added via nfnetlink, then we can walk the dummy list instead of walking the nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 177 +++++++++++++---------------- 1 file changed, 81 insertions(+), 96 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index ad65eb548157..28d065394c09 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,6 +32,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) @@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; err2: kfree(helper->expect_policy); err1: - kfree(helper); + kfree(nfcth); return ret; } @@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -422,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -588,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -613,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; - - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; - } + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -662,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -678,30 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); - kfree(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -750,22 +741,16 @@ static int __init nfnl_cthelper_init(void) static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); - kfree(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } -- GitLab From b5ed572a1b7d2d95d7bf1ddcac07c1c8961e405c Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 25 Mar 2017 18:24:36 +0800 Subject: [PATCH 0584/1398] netfilter: nf_nat_snmp: Fix panic when snmp_trap_helper fails to register [ Upstream commit 75c689dca98851d65ef5a27e5ce26b625b68751c ] In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the snmp_helper is never registered. But it still tries to unregister the snmp_helper, it could cause the panic. Now remove the useless snmp_helper and the unregister call in the error handler. Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper") Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5a8f7c360887..53e49f5011d3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) -- GitLab From e0d13153057e3c9782e5f0ebb87ddd502ca58d94 Mon Sep 17 00:00:00 2001 From: "Reizer, Eyal" Date: Sun, 26 Mar 2017 08:53:10 +0000 Subject: [PATCH 0585/1398] ARM: dts: am335x-evmsk: adjust mmc2 param to allow suspend [ Upstream commit 9bcf53f34a2c1cebc45cc12e273dcd5f51fbc099 ] mmc2 used for wl12xx was missing the keep-power-in suspend parameter. As a result the board couldn't reach suspend state. Signed-off-by: Eyal Reizer Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am335x-evmsk.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 975c36e332a2..8e6b3938bef9 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -668,6 +668,7 @@ ti,non-removable; bus-width = <4>; cap-power-off-card; + keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; -- GitLab From e9a1ba292fffc07d177e340944bb506d57105bce Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Mar 2017 19:33:09 +0200 Subject: [PATCH 0586/1398] cpufreq: Fix creation of symbolic links to policy directories [ Upstream commit 2f0ba790df51721794c11abc7a076d407392f648 ] The cpufreq core only tries to create symbolic links from CPU directories in sysfs to policy directories in cpufreq_add_dev(), either when a given CPU is registered or when the cpufreq driver is registered, whichever happens first. That is not sufficient, however, because cpufreq_add_dev() may be called for an offline CPU whose policy object has not been created yet and, quite obviously, the symbolic cannot be added in that case. Fix that by making cpufreq_online() attempt to add symbolic links to policy objects for the CPUs in the related_cpus mask of every new policy object created by it. The cpufreq_driver_lock locking around the for_each_cpu() loop in cpufreq_online() is dropped, because it is not necessary and the code is somewhat simpler without it. Moreover, failures to create a symbolic link will not be regarded as hard errors any more and the CPUs without those links will not be taken offline automatically, but that should not be problematic in practice. Reported-and-tested-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 530f255a898b..35e34c0e0429 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -918,11 +918,19 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static int add_cpu_dev_symlink(struct cpufreq_policy *policy, - struct device *dev) +static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) { + struct device *dev = get_cpu_device(cpu); + + if (!dev) + return; + + if (cpumask_test_and_set_cpu(cpu, policy->real_cpus)) + return; + dev_dbg(dev, "%s: Adding symlink\n", __func__); - return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); + if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq")) + dev_err(dev, "cpufreq symlink creation failed\n"); } static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, @@ -1184,10 +1192,10 @@ static int cpufreq_online(unsigned int cpu) policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; - write_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->related_cpus) + for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + add_cpu_dev_symlink(policy, j); + } } else { policy->min = policy->user_policy.min; policy->max = policy->user_policy.max; @@ -1284,13 +1292,15 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->exit) cpufreq_driver->exit(policy); + + for_each_cpu(j, policy->real_cpus) + remove_cpu_dev_symlink(policy, get_cpu_device(j)); + out_free_policy: cpufreq_policy_free(policy, !new_policy); return ret; } -static int cpufreq_offline(unsigned int cpu); - /** * cpufreq_add_dev - the cpufreq interface for a CPU device. * @dev: CPU device. @@ -1312,16 +1322,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) - return 0; + if (policy) + add_cpu_dev_symlink(policy, cpu); - ret = add_cpu_dev_symlink(policy, dev); - if (ret) { - cpumask_clear_cpu(cpu, policy->real_cpus); - cpufreq_offline(cpu); - } - - return ret; + return 0; } static int cpufreq_offline(unsigned int cpu) -- GitLab From 29c4f517ff5bb2f81750295418c8397755817d4c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Mar 2017 18:00:14 +0100 Subject: [PATCH 0587/1398] net: ipconfig: fix ic_close_devs() use-after-free [ Upstream commit ffefb6f4d6ad699a2b5484241bc46745a53235d0 ] Our chosen ic_dev may be anywhere in our list of ic_devs, and we may free it before attempting to close others. When we compare d->dev and ic_dev->dev, we're potentially dereferencing memory returned to the allocator. This causes KASAN to scream for each subsequent ic_dev we check. As there's a 1-1 mapping between ic_devs and netdevs, we can instead compare d and ic_dev directly, which implicitly handles the !ic_dev case, and avoids the use-after-free. The ic_dev pointer may be stale, but we will not dereference it. Original splat: [ 6.487446] ================================================================== [ 6.494693] BUG: KASAN: use-after-free in ic_close_devs+0xc4/0x154 at addr ffff800367efa708 [ 6.503013] Read of size 8 by task swapper/0/1 [ 6.507452] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-00002-gda42158 #8 [ 6.514993] Hardware name: AppliedMicro Mustang/Mustang, BIOS 3.05.05-beta_rc Jan 27 2016 [ 6.523138] Call trace: [ 6.525590] [] dump_backtrace+0x0/0x570 [ 6.530976] [] show_stack+0x20/0x30 [ 6.536017] [] dump_stack+0x120/0x188 [ 6.541231] [] kasan_object_err+0x24/0xa0 [ 6.546790] [] kasan_report_error+0x244/0x738 [ 6.552695] [] __asan_report_load8_noabort+0x54/0x80 [ 6.559204] [] ic_close_devs+0xc4/0x154 [ 6.564590] [] ip_auto_config+0x2ed4/0x2f1c [ 6.570321] [] do_one_initcall+0xcc/0x370 [ 6.575882] [] kernel_init_freeable+0x5f8/0x6c4 [ 6.581959] [] kernel_init+0x18/0x190 [ 6.587171] [] ret_from_fork+0x10/0x40 [ 6.592468] Object at ffff800367efa700, in cache kmalloc-128 size: 128 [ 6.598969] Allocated: [ 6.601324] PID = 1 [ 6.603427] save_stack_trace_tsk+0x0/0x418 [ 6.607603] save_stack_trace+0x20/0x30 [ 6.611430] kasan_kmalloc+0xd8/0x188 [ 6.615087] ip_auto_config+0x8c4/0x2f1c [ 6.619002] do_one_initcall+0xcc/0x370 [ 6.622832] kernel_init_freeable+0x5f8/0x6c4 [ 6.627178] kernel_init+0x18/0x190 [ 6.630660] ret_from_fork+0x10/0x40 [ 6.634223] Freed: [ 6.636233] PID = 1 [ 6.638334] save_stack_trace_tsk+0x0/0x418 [ 6.642510] save_stack_trace+0x20/0x30 [ 6.646337] kasan_slab_free+0x88/0x178 [ 6.650167] kfree+0xb8/0x478 [ 6.653131] ic_close_devs+0x130/0x154 [ 6.656875] ip_auto_config+0x2ed4/0x2f1c [ 6.660875] do_one_initcall+0xcc/0x370 [ 6.664705] kernel_init_freeable+0x5f8/0x6c4 [ 6.669051] kernel_init+0x18/0x190 [ 6.672534] ret_from_fork+0x10/0x40 [ 6.676098] Memory state around the buggy address: [ 6.680880] ffff800367efa600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.688078] ffff800367efa680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 6.695276] >ffff800367efa700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 6.702469] ^ [ 6.705952] ffff800367efa780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 6.713149] ffff800367efa800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 6.720343] ================================================================== [ 6.727536] Disabling lock debugging due to kernel taint Signed-off-by: Mark Rutland Cc: Alexey Kuznetsov Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Patrick McHardy Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 071a785c65eb..b23464d9c538 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } -- GitLab From 808ed3bd9d42823e00ab2d3dc4c9899437fa3c8a Mon Sep 17 00:00:00 2001 From: "Herongguang (Stephen)" Date: Mon, 27 Mar 2017 15:21:17 +0800 Subject: [PATCH 0588/1398] KVM: pci-assign: do not map smm memory slot pages in vt-d page tables [ Upstream commit 0292e169b2d9c8377a168778f0b16eadb1f578fd ] or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when destroy VM. This is consistent with current vfio implementation. Signed-off-by: herongguang Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4569fdcab701..68c078e47811 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1060,7 +1060,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * changes) is disallowed above, so any other attribute changes getting * here can be skipped. */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) { r = kvm_iommu_map_pages(kvm, &new); return r; } -- GitLab From c6f9090929dca3fd1208e636f60860f4a7e421a2 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Tue, 28 Mar 2017 18:46:58 +0200 Subject: [PATCH 0589/1398] virtio-balloon: use actual number of stats for stats queue buffers [ Upstream commit 9646b26e85896ef0256e66649f7937f774dc18a6 ] The virtio balloon driver contained a not-so-obvious invariant that update_balloon_stats has to update exactly VIRTIO_BALLOON_S_NR counters in order to send valid stats to the host. This commit fixes it by having update_balloon_stats return the actual number of counters, and its callers use it when pushing buffers to the stats virtqueue. Note that it is still out of spec to change the number of counters at run-time. "Driver MUST supply the same subset of statistics in all buffers submitted to the statsq." Suggested-by: Arnd Bergmann Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 2c2e6792f7e0..07b07b12ef90 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -241,11 +241,11 @@ static inline void update_stat(struct virtio_balloon *vb, int idx, #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) -static void update_balloon_stats(struct virtio_balloon *vb) +static unsigned int update_balloon_stats(struct virtio_balloon *vb) { unsigned long events[NR_VM_EVENT_ITEMS]; struct sysinfo i; - int idx = 0; + unsigned int idx = 0; long available; all_vm_events(events); @@ -265,6 +265,8 @@ static void update_balloon_stats(struct virtio_balloon *vb) pages_to_bytes(i.totalram)); update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, pages_to_bytes(available)); + + return idx; } /* @@ -290,14 +292,14 @@ static void stats_handle_request(struct virtio_balloon *vb) { struct virtqueue *vq; struct scatterlist sg; - unsigned int len; + unsigned int len, num_stats; - update_balloon_stats(vb); + num_stats = update_balloon_stats(vb); vq = vb->stats_vq; if (!virtqueue_get_buf(vq, &len)) return; - sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); virtqueue_kick(vq); } @@ -421,15 +423,16 @@ static int init_vqs(struct virtio_balloon *vb) vb->deflate_vq = vqs[1]; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { struct scatterlist sg; + unsigned int num_stats; vb->stats_vq = vqs[2]; /* * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ - update_balloon_stats(vb); + num_stats = update_balloon_stats(vb); - sg_init_one(&sg, vb->stats, sizeof vb->stats); + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) < 0) BUG(); -- GitLab From bb011a45138765020c1322b4d1b7b8b640da9c38 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 18:46:59 +0200 Subject: [PATCH 0590/1398] virtio_balloon: prevent uninitialized variable use [ Upstream commit f0bb2d50dfcc519f06f901aac88502be6ff1df2c ] The latest gcc-7.0.1 snapshot reports a new warning: virtio/virtio_balloon.c: In function 'update_balloon_stats': virtio/virtio_balloon.c:258:26: error: 'events[2]' is used uninitialized in this function [-Werror=uninitialized] virtio/virtio_balloon.c:260:26: error: 'events[3]' is used uninitialized in this function [-Werror=uninitialized] virtio/virtio_balloon.c:261:56: error: 'events[18]' is used uninitialized in this function [-Werror=uninitialized] virtio/virtio_balloon.c:262:56: error: 'events[17]' is used uninitialized in this function [-Werror=uninitialized] This seems absolutely right, so we should add an extra check to prevent copying uninitialized stack data into the statistics. >From all I can tell, this has been broken since the statistics code was originally added in 2.6.34. Fixes: 9564e138b1f6 ("virtio: Add memory statistics reporting to the balloon driver (V4)") Signed-off-by: Arnd Bergmann Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 07b07b12ef90..a7c08cc4c1b7 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -253,12 +253,14 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb) available = si_mem_available(); +#ifdef CONFIG_VM_EVENT_COUNTERS update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, pages_to_bytes(events[PSWPIN])); update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, pages_to_bytes(events[PSWPOUT])); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); +#endif update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, pages_to_bytes(i.freeram)); update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, -- GitLab From ce19146a0de06d512c6565e7374f01335d87bf65 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 12:11:07 +0200 Subject: [PATCH 0591/1398] isdn: kcapi: avoid uninitialized data [ Upstream commit af109a2cf6a9a6271fa420ae2d64d72d86c92b7d ] gcc-7 points out that the AVMB1_ADDCARD ioctl results in an unintialized value ending up in the cardnr parameter: drivers/isdn/capi/kcapi.c: In function 'old_capi_manufacturer': drivers/isdn/capi/kcapi.c:1042:24: error: 'cdef.cardnr' may be used uninitialized in this function [-Werror=maybe-uninitialized] cparams.cardnr = cdef.cardnr; This has been broken since before the start of the git history, so either the value is not used for anything important, or the ioctl command doesn't get called in practice. Setting the cardnr to zero avoids the warning and makes sure we have consistent behavior. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/capi/kcapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 823f6985b260..dd7e38ac29bd 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) sizeof(avmb1_carddef)))) return -EFAULT; cdef.cardtype = AVM_CARDTYPE_B1; + cdef.cardnr = 0; } else { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_extcarddef)))) -- GitLab From 55b6a5d080aaf42aad34a4866c27a185b91f7a26 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 28 Mar 2017 12:12:38 +0200 Subject: [PATCH 0592/1398] net: moxa: fix TX overrun memory leak [ Upstream commit c2b341a620018d4eaeb0e85c16274ac4e5f153d4 ] moxart_mac_start_xmit() doesn't care where tx_tail is, tx_head can catch and pass tx_tail, which is bad because moxart_tx_finished() isn't guaranteed to catch up on freeing resources from tx_tail. Add a check in moxart_mac_start_xmit() stopping the queue at the end of the circular buffer. Also add a check in moxart_tx_finished() waking the queue if the buffer has TX_WAKE_THRESHOLD or more free descriptors. While we're at it, move spin_lock_irq() to happen before our descriptor pointer is assigned in moxart_mac_start_xmit(). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=99451 Signed-off-by: Jonas Jensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/moxa/moxart_ether.c | 20 ++++++++++++++++++-- drivers/net/ethernet/moxa/moxart_ether.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 4367dd6879a2..0622fd03941b 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "moxart_ether.h" @@ -278,6 +279,13 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget) return rx; } +static int moxart_tx_queue_space(struct net_device *ndev) +{ + struct moxart_mac_priv_t *priv = netdev_priv(ndev); + + return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM); +} + static void moxart_tx_finished(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); @@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev) tx_tail = TX_NEXT(tx_tail); } priv->tx_tail = tx_tail; + if (netif_queue_stopped(ndev) && + moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD) + netif_wake_queue(ndev); } static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id) @@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct moxart_mac_priv_t *priv = netdev_priv(ndev); void *desc; unsigned int len; - unsigned int tx_head = priv->tx_head; + unsigned int tx_head; u32 txdes1; int ret = NETDEV_TX_BUSY; + spin_lock_irq(&priv->txlock); + + tx_head = priv->tx_head; desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head); - spin_lock_irq(&priv->txlock); + if (moxart_tx_queue_space(ndev) == 1) + netif_stop_queue(ndev); + if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) { net_dbg_ratelimited("no TX space for packet\n"); priv->stats.tx_dropped++; diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index 93a9563ac7c6..afc32ec998c0 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -59,6 +59,7 @@ #define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM_MASK)) #define TX_BUF_SIZE 1600 #define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK+1) +#define TX_WAKE_THRESHOLD 16 #define RX_DESC_NUM 64 #define RX_DESC_NUM_MASK (RX_DESC_NUM-1) -- GitLab From 54420c1ac4211cba7ac57e14a0971d4ad3105729 Mon Sep 17 00:00:00 2001 From: Adam Wallis Date: Tue, 28 Mar 2017 15:55:28 +0300 Subject: [PATCH 0593/1398] xhci: plat: Register shutdown for xhci_plat [ Upstream commit b07c12517f2aed0add8ce18146bb426b14099392 ] Shutdown should be called for xhci_plat devices especially for situations where kexec might be used by stopping DMA transactions. Signed-off-by: Adam Wallis Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index ca8b0b1ae37d..dec100811946 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -335,6 +335,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = DEV_PM_OPS, -- GitLab From 0708a476810d1a37422cb84ed3367be68045478f Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 28 Mar 2017 22:59:25 +0800 Subject: [PATCH 0594/1398] netfilter: nfnetlink_queue: fix secctx memory leak [ Upstream commit 77c1c03c5b8ef28e55bb0aff29b1e006037ca645 ] We must call security_release_secctx to free the memory returned by security_secid_to_secctx, otherwise memory may be leaked forever. Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context information") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_queue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index af832c526048..5efb40291ac3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } -- GitLab From 9c1433b5dd2b4f7bef192bc07d976f11616d7a29 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Mar 2017 23:51:20 +0300 Subject: [PATCH 0595/1398] Btrfs: fix an integer overflow check [ Upstream commit 457ae7268b29c33dee1c0feb143a15f6029d177b ] This isn't super serious because you need CAP_ADMIN to run this code. I added this integer overflow check last year but apparently I am rubbish at writing integer overflow checks... There are two issues. First, access_ok() works on unsigned long type and not u64 so on 32 bit systems the access_ok() could be checking a truncated size. The other issue is that we should be using a stricter limit so we don't overflow the kzalloc() setting ctx->clone_roots later in the function after the access_ok(): alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); Fixes: f5ecec3ce21f ("btrfs: send: silence an integer overflow warning") Signed-off-by: Dan Carpenter Reviewed-by: David Sterba [ added comment ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 77f9efc1f7aa..9a47b5598df7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6196,8 +6196,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + /* + * Check that we don't overflow at later allocations, we request + * clone_sources_count + 1 items, and compare to unsigned long inside + * access_ok. + */ if (arg->clone_sources_count > - ULLONG_MAX / sizeof(*arg->clone_sources)) { + ULONG_MAX / sizeof(struct clone_root) - 1) { ret = -EINVAL; goto out; } -- GitLab From 5460e4672b81980a2f05ab3e9bb64d9303e11215 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 29 Mar 2017 17:12:47 +0100 Subject: [PATCH 0596/1398] ARM: dma-mapping: disallow dma_get_sgtable() for non-kernel managed memory [ Upstream commit 916a008b4b8ecc02fbd035cfb133773dba1ff3d7 ] dma_get_sgtable() tries to create a scatterlist table containing valid struct page pointers for the coherent memory allocation passed in to it. However, memory can be declared via dma_declare_coherent_memory(), or via other reservation schemes which means that coherent memory is not guaranteed to be backed by struct pages. In such cases, the resulting scatterlist table contains pointers to invalid pages, which causes kernel oops later. This patch adds detection of such memory, and refuses to create a scatterlist table for such memory. Reported-by: Shuah Khan Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab7710002ba6..00e9e79b6cb8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -930,13 +930,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); } +/* + * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems + * that the intention is to allow exporting memory allocated via the + * coherent DMA APIs through the dma_buf API, which only accepts a + * scattertable. This presents a couple of problems: + * 1. Not all memory allocated via the coherent DMA APIs is backed by + * a struct page + * 2. Passing coherent DMA memory into the streaming APIs is not allowed + * as we will try to flush the memory through a different alias to that + * actually being used (and the flushes are redundant.) + */ int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) { - struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + unsigned long pfn = dma_to_pfn(dev, handle); + struct page *page; int ret; + /* If the PFN is not valid, we do not have a struct page */ + if (!pfn_valid(pfn)) + return -ENXIO; + + page = pfn_to_page(pfn); + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); if (unlikely(ret)) return ret; -- GitLab From 9e1771368a9835ba21999bebf58496428d7d5443 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Thu, 23 Mar 2017 20:52:46 +0530 Subject: [PATCH 0597/1398] cpuidle: powernv: Pass correct drv->cpumask for registration [ Upstream commit 293d264f13cbde328d5477f49e3103edbc1dc191 ] drv->cpumask defaults to cpu_possible_mask in __cpuidle_driver_init(). On PowerNV platform cpu_present could be less than cpu_possible in cases where firmware detects the cpu, but it is not available to the OS. When CONFIG_HOTPLUG_CPU=n, such cpus are not hotplugable at runtime and hence we skip creating cpu_device. This breaks cpuidle on powernv where register_cpu() is not called for cpus in cpu_possible_mask that cannot be hot-added at runtime. Trying cpuidle_register_device() on cpu without cpu_device will cause crash like this: cpu 0xf: Vector: 380 (Data SLB Access) at [c000000ff1503490] pc: c00000000022c8bc: string+0x34/0x60 lr: c00000000022ed78: vsnprintf+0x284/0x42c sp: c000000ff1503710 msr: 9000000000009033 dar: 6000000060000000 current = 0xc000000ff1480000 paca = 0xc00000000fe82d00 softe: 0 irq_happened: 0x01 pid = 1, comm = swapper/8 Linux version 4.11.0-rc2 (sv@sagarika) (gcc version 4.9.4 (Buildroot 2017.02-00004-gc28573e) ) #15 SMP Fri Mar 17 19:32:02 IST 2017 enter ? for help [link register ] c00000000022ed78 vsnprintf+0x284/0x42c [c000000ff1503710] c00000000022ebb8 vsnprintf+0xc4/0x42c (unreliable) [c000000ff1503800] c00000000022ef40 vscnprintf+0x20/0x44 [c000000ff1503830] c0000000000ab61c vprintk_emit+0x94/0x2cc [c000000ff15038a0] c0000000000acc9c vprintk_func+0x60/0x74 [c000000ff15038c0] c000000000619694 printk+0x38/0x4c [c000000ff15038e0] c000000000224950 kobject_get+0x40/0x60 [c000000ff1503950] c00000000022507c kobject_add_internal+0x60/0x2c4 [c000000ff15039e0] c000000000225350 kobject_init_and_add+0x70/0x78 [c000000ff1503a60] c00000000053c288 cpuidle_add_sysfs+0x9c/0xe0 [c000000ff1503ae0] c00000000053aeac cpuidle_register_device+0xd4/0x12c [c000000ff1503b30] c00000000053b108 cpuidle_register+0x98/0xcc [c000000ff1503bc0] c00000000085eaf0 powernv_processor_idle_init+0x140/0x1e0 [c000000ff1503c60] c00000000000cd60 do_one_initcall+0xc0/0x15c [c000000ff1503d20] c000000000833e84 kernel_init_freeable+0x1a0/0x25c [c000000ff1503dc0] c00000000000d478 kernel_init+0x24/0x12c [c000000ff1503e30] c00000000000b564 ret_from_kernel_thread+0x5c/0x78 This patch fixes the bug by passing correct cpumask from powernv-cpuidle driver. Signed-off-by: Vaidyanathan Srinivasan Reviewed-by: Gautham R. Shenoy Acked-by: Michael Ellerman [ rjw: Comment massage ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle-powernv.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 7fe442ca38f4..854a56781100 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -164,6 +164,24 @@ static int powernv_cpuidle_driver_init(void) drv->state_count += 1; } + /* + * On the PowerNV platform cpu_present may be less than cpu_possible in + * cases when firmware detects the CPU, but it is not available to the + * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at + * run time and hence cpu_devices are not created for those CPUs by the + * generic topology_init(). + * + * drv->cpumask defaults to cpu_possible_mask in + * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where + * cpu_devices are not created for CPUs in cpu_possible_mask that + * cannot be hot-added later at run time. + * + * Trying cpuidle_register_device() on a CPU without a cpu_device is + * incorrect, so pass a correct CPU mask to the generic cpuidle driver. + */ + + drv->cpumask = (struct cpumask *)cpu_present_mask; + return 0; } -- GitLab From d14718c9f434b27af41f5ecabc382d89ddc861cd Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Tue, 28 Mar 2017 19:47:29 -0400 Subject: [PATCH 0598/1398] bnxt_en: Fix NULL pointer dereference in reopen failure path [ Upstream commit 2247925f0942dc4e7c09b1cde45ca18461d94c5f ] Net device reset can fail when the h/w or f/w is in a bad state. Subsequent netdevice open fails in bnxt_hwrm_stat_ctx_alloc(). The cleanup invokes bnxt_hwrm_resource_free() which inturn calls bnxt_disable_int(). In this routine, the code segment if (ring->fw_ring_id != INVALID_HW_RING_ID) BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); results in NULL pointer dereference as cpr->cp_doorbell is not yet initialized, and fw_ring_id is zero. The fix is to initialize cpr fw_ring_id to INVALID_HW_RING_ID before bnxt_init_chip() is invoked. Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0b894d76aa41..bbb3641eddcb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2381,6 +2381,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) return 0; } +static void bnxt_init_cp_rings(struct bnxt *bp) +{ + int i; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + + ring->fw_ring_id = INVALID_HW_RING_ID; + } +} + static int bnxt_init_rx_rings(struct bnxt *bp) { int i, rc = 0; @@ -4700,6 +4712,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init) static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init) { + bnxt_init_cp_rings(bp); bnxt_init_rx_rings(bp); bnxt_init_tx_rings(bp); bnxt_init_ring_grps(bp, irq_re_init); -- GitLab From 291c7e488f64acb314d34f7081f6aca31b06894b Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Tue, 29 Aug 2017 13:34:34 -0700 Subject: [PATCH 0599/1398] backlight: pwm_bl: Fix overflow condition [ Upstream commit 5d0c49acebc9488e37db95f1d4a55644e545ffe7 ] This fixes an overflow condition that can happen with high max brightness and period values in compute_duty_cycle. This fixes it by using a 64 bit variable for computing the duty cycle. Signed-off-by: Derek Basehore Acked-by: Thierry Reding Reviewed-by: Brian Norris Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/backlight/pwm_bl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 12614006211e..d95ae092f154 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb) static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness) { unsigned int lth = pb->lth_brightness; - int duty_cycle; + u64 duty_cycle; if (pb->levels) duty_cycle = pb->levels[brightness]; else duty_cycle = brightness; - return (duty_cycle * (pb->period - lth) / pb->scale) + lth; + duty_cycle *= pb->period - lth; + do_div(duty_cycle, pb->scale); + + return duty_cycle + lth; } static int pwm_backlight_update_status(struct backlight_device *bl) -- GitLab From 1525e330d6464fd28c4b26b8490cf910d2ccff99 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Wed, 4 Oct 2017 01:00:08 +0200 Subject: [PATCH 0600/1398] crypto: crypto4xx - increase context and scatter ring buffer elements [ Upstream commit 778f81d6cdb7d25360f082ac0384d5103f04eca5 ] If crypto4xx is used in conjunction with dm-crypt, the available ring buffer elements are not enough to handle the load properly. On an aes-cbc-essiv:sha256 encrypted swap partition the read performance is abyssal: (tested with hdparm -t) /dev/mapper/swap_crypt: Timing buffered disk reads: 14 MB in 3.68 seconds = 3.81 MB/sec The patch increases both PPC4XX_NUM_SD and PPC4XX_NUM_PD to 256. This improves the performance considerably: /dev/mapper/swap_crypt: Timing buffered disk reads: 104 MB in 3.03 seconds = 34.31 MB/sec Furthermore, PPC4XX_LAST_SD, PPC4XX_LAST_GD and PPC4XX_LAST_PD can be easily calculated from their respective PPC4XX_NUM_* constant. Signed-off-by: Christian Lamparter Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/amcc/crypto4xx_core.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h index ecfdcfe3698d..4f41d6da5acc 100644 --- a/drivers/crypto/amcc/crypto4xx_core.h +++ b/drivers/crypto/amcc/crypto4xx_core.h @@ -34,12 +34,12 @@ #define PPC405EX_CE_RESET 0x00000008 #define CRYPTO4XX_CRYPTO_PRIORITY 300 -#define PPC4XX_LAST_PD 63 -#define PPC4XX_NUM_PD 64 -#define PPC4XX_LAST_GD 1023 +#define PPC4XX_NUM_PD 256 +#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1) #define PPC4XX_NUM_GD 1024 -#define PPC4XX_LAST_SD 63 -#define PPC4XX_NUM_SD 64 +#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1) +#define PPC4XX_NUM_SD 256 +#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1) #define PPC4XX_SD_BUFFER_SIZE 2048 #define PD_ENTRY_INUSE 1 -- GitLab From bdb33bb5e2cea11bb94907e6ca9666c067fc9fd3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 29 Sep 2017 11:22:15 +0100 Subject: [PATCH 0601/1398] rtc: pl031: make interrupt optional [ Upstream commit 5b64a2965dfdfca8039e93303c64e2b15c19ff0c ] On some platforms, the interrupt for the PL031 is optional. Avoid trying to claim the interrupt if it's not specified. Reviewed-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-pl031.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index e1687e19c59f..a30f24cb6c83 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev) dev_pm_clear_wake_irq(&adev->dev); device_init_wakeup(&adev->dev, false); - free_irq(adev->irq[0], ldata); + if (adev->irq[0]) + free_irq(adev->irq[0], ldata); rtc_device_unregister(ldata->rtc); iounmap(ldata->base); kfree(ldata); @@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_rtc; } - if (request_irq(adev->irq[0], pl031_interrupt, - vendor->irqflags, "rtc-pl031", ldata)) { - ret = -EIO; - goto out_no_irq; + if (adev->irq[0]) { + ret = request_irq(adev->irq[0], pl031_interrupt, + vendor->irqflags, "rtc-pl031", ldata); + if (ret) + goto out_no_irq; + dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); } - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); return 0; out_no_irq: -- GitLab From 206e1621ba721bf4bb4b71938a867ba5b8305bcb Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 5 Oct 2017 18:07:24 -0700 Subject: [PATCH 0602/1398] kvm, mm: account kvm related kmem slabs to kmemcg [ Upstream commit 46bea48ac241fe0b413805952dda74dd0c09ba8b ] The kvm slabs can consume a significant amount of system memory and indeed in our production environment we have observed that a lot of machines are spending significant amount of memory that can not be left as system memory overhead. Also the allocations from these slabs can be triggered directly by user space applications which has access to kvm and thus a buggy application can leak such memory. So, these caches should be accounted to kmemcg. Signed-off-by: Shakeel Butt Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 4 ++-- virt/kvm/kvm_main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d29c745f10ad..0a324e120942 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5052,13 +5052,13 @@ int kvm_mmu_module_init(void) { pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), - 0, 0, NULL); + 0, SLAB_ACCOUNT, NULL); if (!pte_list_desc_cache) goto nomem; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), - 0, 0, NULL); + 0, SLAB_ACCOUNT, NULL); if (!mmu_page_header_cache) goto nomem; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68c078e47811..1b20768e781d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3904,7 +3904,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, - 0, NULL); + SLAB_ACCOUNT, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; goto out_free_3; -- GitLab From d3469e6166686f35892f7bea52e6a8938e942c9c Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Tue, 10 Oct 2017 12:42:56 -0500 Subject: [PATCH 0603/1398] net: phy: at803x: Change error to EINVAL for invalid MAC [ Upstream commit fc7556877d1748ac00958822a0a3bba1d4bd9e0d ] Change the return error code to EINVAL if the MAC address is not valid in the set_wol function. Signed-off-by: Dan Murphy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/at803x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index a52b560e428b..3603eec7217f 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -166,7 +166,7 @@ static int at803x_set_wol(struct phy_device *phydev, mac = (const u8 *) ndev->dev_addr; if (!is_valid_ether_addr(mac)) - return -EFAULT; + return -EINVAL; for (i = 0; i < 3; i++) { phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, -- GitLab From 241833a3a90da1b82d7d93e223da8d60a5ebfffd Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Sep 2017 10:10:31 +0200 Subject: [PATCH 0604/1398] PCI: Avoid bus reset if bridge itself is broken [ Upstream commit 357027786f3523d26f42391aa4c075b8495e5d28 ] When checking to see if a PCI bus can safely be reset, we previously checked to see if any of the children had their PCI_DEV_FLAGS_NO_BUS_RESET flag set. Children marked with that flag are known not to behave well after a bus reset. Some PCIe root port bridges also do not behave well after a bus reset, sometimes causing the devices behind the bridge to become unusable. Add a check for PCI_DEV_FLAGS_NO_BUS_RESET being set in the bridge device to allow these bridges to be flagged, and prevent their secondary buses from being reset. Signed-off-by: David Daney [jglauber@cavium.com: fixed typo] Signed-off-by: Jan Glauber Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e7d4048e81f2..a87c8e1aef68 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4214,6 +4214,10 @@ static bool pci_bus_resetable(struct pci_bus *bus) { struct pci_dev *dev; + + if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)) + return false; + list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || (dev->subordinate && !pci_bus_resetable(dev->subordinate))) -- GitLab From fd1d9dccc01d2a72d9e2fabb9903615b7956e55a Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 11 Oct 2017 19:33:07 +0530 Subject: [PATCH 0605/1398] scsi: cxgb4i: fix Tx skb leak [ Upstream commit 9b3a081fb62158b50bcc90522ca2423017544367 ] In case of connection reset Tx skb queue can have some skbs which are not transmitted so purge Tx skb queue in release_offload_resources() to avoid skb leak. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 0039bebaa9e2..358ec32927ba 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1347,6 +1347,7 @@ static void release_offload_resources(struct cxgbi_sock *csk) csk, csk->state, csk->flags, csk->tid); cxgbi_sock_free_cpl_skbs(csk); + cxgbi_sock_purge_write_queue(csk); if (csk->wr_cred != csk->wr_max_cred) { cxgbi_sock_purge_wr_queue(csk); cxgbi_sock_reset_wr_list(csk); -- GitLab From b40eeea31afd1c07f37734fa19ef1c45f5e98011 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 10 Oct 2017 18:41:18 +0530 Subject: [PATCH 0606/1398] scsi: mpt3sas: Fix IO error occurs on pulling out a drive from RAID1 volume created on two SATA drive [ Upstream commit 2ce9a3645299ba1752873d333d73f67620f4550b ] Whenever an I/O for a RAID volume fails with IOCStatus MPI2_IOCSTATUS_SCSI_IOC_TERMINATED and SCSIStatus equal to (MPI2_SCSI_STATE_TERMINATED | MPI2_SCSI_STATE_NO_SCSI_STATUS) then return the I/O to SCSI midlayer with "DID_RESET" (i.e. retry the IO infinite times) set in the host byte. Previously, the driver was completing the I/O with "DID_SOFT_ERROR" which causes the I/O to be quickly retried. However, firmware needed more time and hence I/Os were failing. Signed-off-by: Sreekanth Reddy Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 289374cbcb47..468acab04d3d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4770,6 +4770,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } else if (log_info == VIRTUAL_IO_FAILED_RETRY) { scmd->result = DID_RESET << 16; break; + } else if ((scmd->device->channel == RAID_CHANNEL) && + (scsi_state == (MPI2_SCSI_STATE_TERMINATED | + MPI2_SCSI_STATE_NO_SCSI_STATUS))) { + scmd->result = DID_RESET << 16; + break; } scmd->result = DID_SOFT_ERROR << 16; break; -- GitLab From 164a941c031bc32e990abaa042367047d6d97975 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 4 Oct 2017 10:57:52 -0500 Subject: [PATCH 0607/1398] PCI: Create SR-IOV virtfn/physfn links before attaching driver [ Upstream commit 27d6162944b9b34c32cd5841acd21786637ee743 ] When creating virtual functions, create the "virtfn%u" and "physfn" links in sysfs *before* attaching the driver instead of after. When we attach the driver to the new virtual network interface first, there is a race when the driver attaches to the new sends out an "add" udev event, and the network interface naming software (biosdevname or systemd, for example) tries to look at these links. Signed-off-by: Stuart Hayes Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/iov.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 47227820406d..1d32fe2d97aa 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -164,7 +164,6 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) pci_device_add(virtfn, virtfn->bus); mutex_unlock(&iov->dev->sriov->lock); - pci_bus_add_device(virtfn); sprintf(buf, "virtfn%u", id); rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); if (rc) @@ -175,6 +174,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + pci_bus_add_device(virtfn); + return 0; failed2: -- GitLab From c236525bae023e43121ce1f1672aa100629e1c72 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 29 Sep 2017 14:39:49 -0300 Subject: [PATCH 0608/1398] PM / OPP: Move error message to debug level [ Upstream commit 035ed07208dc501d023873447113f3f178592156 ] On some i.MX6 platforms which do not have speed grading check, opp table will not be created in platform code, so cpufreq driver prints the following error message: cpu cpu0: dev_pm_opp_get_opp_count: OPP table not found (-19) However, this is not really an error in this case because the imx6q-cpufreq driver first calls dev_pm_opp_get_opp_count() and if it fails, it means that platform code does not provide OPP and then dev_pm_opp_of_add_table() will be called. In order to avoid such confusing error message, move it to debug level. It is up to the caller of dev_pm_opp_get_opp_count() to check its return value and decide if it will print an error or not. Signed-off-by: Fabio Estevam Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 6441dfda489f..a7c5b79371a7 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -331,7 +331,7 @@ int dev_pm_opp_get_opp_count(struct device *dev) opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { count = PTR_ERR(opp_table); - dev_err(dev, "%s: OPP table not found (%d)\n", + dev_dbg(dev, "%s: OPP table not found (%d)\n", __func__, count); goto out_unlock; } -- GitLab From 700053c8733e3bbc48dc78643d9bdb21f406e570 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 27 Aug 2017 08:39:51 +0200 Subject: [PATCH 0609/1398] igb: check memory allocation failure [ Upstream commit 18eb86362a52f0af933cc0fd5e37027317eb2d1c ] Check memory allocation failures and return -ENOMEM in such cases, as already done for other memory allocations in this function. This avoids NULL pointers dereference. Signed-off-by: Christophe JAILLET Tested-by: Aaron Brown Acked-by: PJ Waskiewicz Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 16839600fb78..ca54f7684668 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3102,6 +3102,8 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), GFP_ATOMIC); + if (!adapter->shadow_vfta) + return -ENOMEM; /* This call may decrease the number of queues */ if (igb_init_interrupt_scheme(adapter, true)) { -- GitLab From 2141182852b804c5f7a0e199c4456c91db639189 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 11 Sep 2017 14:21:31 -0700 Subject: [PATCH 0610/1398] ixgbe: fix use of uninitialized padding [ Upstream commit dcfd6b839c998bc9838e2a47f44f37afbdf3099c ] This patch is resolving Coverity hits where padding in a structure could be used uninitialized. - Initialize fwd_cmd.pad/2 before ixgbe_calculate_checksum() - Initialize buffer.pad2/3 before ixgbe_hic_unlocked() Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 77d3039283f6..ad3362293cbd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -3696,10 +3696,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, fw_cmd.ver_build = build; fw_cmd.ver_sub = sub; fw_cmd.hdr.checksum = 0; - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); fw_cmd.pad = 0; fw_cmd.pad2 = 0; + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { ret_val = ixgbe_host_interface_command(hw, &fw_cmd, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 60f0bf779073..77a60aa5dc7e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -617,6 +617,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, /* convert offset from words to bytes */ buffer.address = cpu_to_be32((offset + current_word) * 2); buffer.length = cpu_to_be16(words_to_read * 2); + buffer.pad2 = 0; + buffer.pad3 = 0; status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer), -- GitLab From 1d4b32bee9c79f1f4f434d20b9a75a4a69fa159b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 15:37:45 +0100 Subject: [PATCH 0611/1398] IB/rxe: check for allocation failure on elem [ Upstream commit 4831ca9e4a8e48cb27e0a792f73250390827a228 ] The allocation for elem may fail (especially because we're using GFP_ATOMIC) so best to check for a null return. This fixes a potential null pointer dereference when assigning elem->pool. Detected by CoverityScan CID#1357507 ("Dereference null return value") Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_pool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index ee26a1b1b4ed..1c4e5b2e6835 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -412,6 +412,8 @@ void *rxe_alloc(struct rxe_pool *pool) elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); + if (!elem) + return NULL; elem->pool = pool; kref_init(&elem->ref_cnt); -- GitLab From fbb2d72a54c6402192df22bf0c35e41f750ba188 Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Thu, 28 Sep 2017 15:33:05 +0100 Subject: [PATCH 0612/1398] PCI/AER: Report non-fatal errors only to the affected endpoint [ Upstream commit 86acc790717fb60fb51ea3095084e331d8711c74 ] Previously, if an non-fatal error was reported by an endpoint, we called report_error_detected() for the endpoint, every sibling on the bus, and their descendents. If any of them did not implement the .error_detected() method, do_recovery() failed, leaving all these devices unrecovered. For example, the system described in the bugzilla below has two devices: 0000:74:02.0 [19e5:a230] SAS controller, driver has .error_detected() 0000:74:03.0 [19e5:a235] SATA controller, driver lacks .error_detected() When a device such as 74:02.0 reported a non-fatal error, do_recovery() failed because 74:03.0 lacked an .error_detected() method. But per PCIe r3.1, sec 6.2.2.2.2, such an error does not compromise the Link and does not affect 74:03.0: Non-fatal errors are uncorrectable errors which cause a particular transaction to be unreliable but the Link is otherwise fully functional. Isolating Non-fatal from Fatal errors provides Requester/Receiver logic in a device or system management software the opportunity to recover from the error without resetting the components on the Link and disturbing other transactions in progress. Devices not associated with the transaction in error are not impacted by the error. Report non-fatal errors only to the endpoint that reported them. We really want to check for AER_NONFATAL here, but the current code structure doesn't allow that. Looking for pci_channel_io_normal is the best we can do now. Link: https://bugzilla.kernel.org/show_bug.cgi?id=197055 Fixes: 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver") Signed-off-by: Gabriele Paoloni Signed-off-by: Dongdong Liu [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aer/aerdrv_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index b1303b32053f..057465adf0b6 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, * If the error is reported by an end point, we think this * error is related to the upstream link of the end point. */ - pci_walk_bus(dev->bus, cb, &result_data); + if (state == pci_channel_io_normal) + /* + * the error is non fatal so the bus is ok, just invoke + * the callback for the function that logged the error. + */ + cb(dev, &result_data); + else + pci_walk_bus(dev->bus, cb, &result_data); } return result_data.result; -- GitLab From 6af9b18a2e485dae8f837e0194bfc6937dff69e1 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:17 -0500 Subject: [PATCH 0613/1398] tracing: Exclude 'generic fields' from histograms [ Upstream commit a15f7fc20389a8827d5859907568b201234d4b79 ] There are a small number of 'generic fields' (comm/COMM/cpu/CPU) that are found by trace_find_event_field() but are only meant for filtering. Specifically, they unlike normal fields, they have a size of 0 and thus wreak havoc when used as a histogram key. Exclude these (return -EINVAL) when used as histogram keys. Link: http://lkml.kernel.org/r/956154cbc3e8a4f0633d619b886c97f0f0edf7b4.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f3a960ed75a1..0664044ade06 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -449,7 +449,7 @@ static int create_val_field(struct hist_trigger_data *hist_data, } field = trace_find_event_field(file->event_call, field_name); - if (!field) { + if (!field || !field->size) { ret = -EINVAL; goto out; } @@ -547,7 +547,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } field = trace_find_event_field(file->event_call, field_name); - if (!field) { + if (!field || !field->size) { ret = -EINVAL; goto out; } -- GitLab From bd0feaac155f589237495d058bab6f5fca5a0c5f Mon Sep 17 00:00:00 2001 From: Ed Blake Date: Mon, 2 Oct 2017 11:00:33 +0100 Subject: [PATCH 0614/1398] ASoC: img-parallel-out: Add pm_runtime_get/put to set_fmt callback [ Upstream commit c70458890ff15d858bd347fa9f563818bcd6e457 ] Add pm_runtime_get_sync and pm_runtime_put calls to set_fmt callback function. This fixes a bus error during boot when CONFIG_SUSPEND is defined when this function gets called while the device is runtime disabled and device registers are accessed while the clock is disabled. Signed-off-by: Ed Blake Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/img/img-parallel-out.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c index c1610a054d65..3cf522d66755 100644 --- a/sound/soc/img/img-parallel-out.c +++ b/sound/soc/img/img-parallel-out.c @@ -166,9 +166,11 @@ static int img_prl_out_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } + pm_runtime_get_sync(prl->dev); reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL); reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set; img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL); + pm_runtime_put(prl->dev); return 0; } -- GitLab From fc9d6386a9a36c63d3a91f0573a4f7714725c282 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 11 Aug 2017 11:14:58 -0700 Subject: [PATCH 0615/1398] fm10k: fix mis-ordered parameters in declaration for .ndo_set_vf_bw [ Upstream commit 3e256ac5b1ec307e5dd5a4c99fbdbc651446c738 ] We've had support for setting both a minimum and maximum bandwidth via .ndo_set_vf_bw since commit 883a9ccbae56 ("fm10k: Add support for SR-IOV to driver", 2014-09-20). Likely because we do not support minimum rates, the declaration mis-ordered the "unused" parameter, which causes warnings when analyzed with cppcheck. Fix this warning by properly declaring the min_rate and max_rate variables in the declaration and definition (rather than using "unused"). Also rename "rate" to max_rate so as to clarify that we only support setting the maximum rate. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/fm10k/fm10k.h | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 4d19e46f7c55..3693ae104c2a 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -508,8 +508,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, - int unused); +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, + int __always_unused min_rate, int max_rate); int fm10k_ndo_get_vf_config(struct net_device *netdev, int vf_idx, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 5f4dac0d36ef..f919199944a0 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -482,7 +482,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, } int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, - int __always_unused unused, int rate) + int __always_unused min_rate, int max_rate) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -493,14 +493,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, return -EINVAL; /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) + if (max_rate && + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) return -EINVAL; /* store values */ - iov_data->vf_info[vf_idx].rate = rate; + iov_data->vf_info[vf_idx].rate = max_rate; /* update hardware configuration */ - hw->iov.ops.configure_tc(hw, vf_idx, rate); + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); return 0; } -- GitLab From b438d2f7e23c4697f8ba954d451bf1e65af3ac85 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 29 Sep 2017 17:34:42 -0700 Subject: [PATCH 0616/1398] scsi: lpfc: Fix secure firmware updates [ Upstream commit 184fc2b9a8bcbda9c14d0a1e7fbecfc028c7702e ] Firmware update fails with: status x17 add_status x56 on the final write If multiple DMA buffers are used for the download, some firmware revs have difficulty with signatures and crcs split across the dma buffer boundaries. Resolve by making all writes be a single 4k page in length. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_hw4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 55faa94637a9..2a436dff1589 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -3232,7 +3232,7 @@ struct lpfc_mbx_get_port_name { #define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4 #define MB_CQE_STATUS_DMA_FAILED 0x5 -#define LPFC_MBX_WR_CONFIG_MAX_BDE 8 +#define LPFC_MBX_WR_CONFIG_MAX_BDE 1 struct lpfc_mbx_wr_object { struct mbox_header header; union { -- GitLab From de5a4c816d312b137db36fe2e74b8fd486ca04df Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 29 Sep 2017 17:34:32 -0700 Subject: [PATCH 0617/1398] scsi: lpfc: PLOGI failures during NPIV testing [ Upstream commit e8bcf0ae4c0346fdc78ebefe0eefcaa6a6622d38 ] Local Reject/Invalid RPI errors seen during discovery. Temporary RPI cleanup was occurring regardless of SLI rev. It's only necessary on SLI-4. Adjust the test for whether cleanup is necessary. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_hbadisc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index ed223937798a..7d2ad633b6bc 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4784,7 +4784,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_cancel_retry_delay_tmo(vport, ndlp); if ((ndlp->nlp_flag & NLP_DEFER_RM) && !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) && - !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) { + !(ndlp->nlp_flag & NLP_RPI_REGISTERED) && + phba->sli_rev != LPFC_SLI_REV4) { /* For this case we need to cleanup the default rpi * allocated by the firmware. */ -- GitLab From 76d83bfc1158a0995d327ee39926688c8a08d9c9 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 2 Oct 2017 12:39:09 -0600 Subject: [PATCH 0618/1398] vfio/pci: Virtualize Maximum Payload Size [ Upstream commit 523184972b282cd9ca17a76f6ca4742394856818 ] With virtual PCI-Express chipsets, we now see userspace/guest drivers trying to match the physical MPS setting to a virtual downstream port. Of course a lone physical device surrounded by virtual interconnects cannot make a correct decision for a proper MPS setting. Instead, let's virtualize the MPS control register so that writes through to hardware are disallowed. Userspace drivers like QEMU assume they can write anything to the device and we'll filter out anything dangerous. Since mismatched MPS can lead to AER and other faults, let's add it to the kernel side rather than relying on userspace virtualization to handle it. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_config.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 65d4a3015542..9f1ec4392209 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -851,11 +851,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) /* * Allow writes to device control fields, except devctl_phantom, - * which could confuse IOMMU, and the ARI bit in devctl2, which + * which could confuse IOMMU, MPS, which can break communication + * with other physical devices, and the ARI bit in devctl2, which * is set at probe time. FLR gets virtualized via our writefn. */ p_setw(perm, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, + ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } -- GitLab From 52d0a601aec443088ced2530f3538a6276e8d255 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 2 Oct 2017 07:17:50 -0700 Subject: [PATCH 0619/1398] fm10k: ensure we process SM mbx when processing VF mbx [ Upstream commit 17a91809942ca32c70026d2d5ba3348a2c4fdf8f ] When we process VF mailboxes, the driver is likely going to also queue up messages to the switch manager. This process merely queues up the FIFO, but doesn't actually begin the transmission process. Because we hold the mailbox lock during this VF processing, the PF<->SM mailbox is not getting processed at this time. Ensure that we actually process the PF<->SM mailbox in between each PF<->VF mailbox. This should ensure prompt transmission of the messages queued up after each VF message is received and handled. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index f919199944a0..e72fd52bacfe 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -126,6 +126,9 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface) struct fm10k_mbx_info *mbx = &vf_info->mbx; u16 glort = vf_info->glort; + /* process the SM mailbox first to drain outgoing messages */ + hw->mbx.ops.process(hw, &hw->mbx); + /* verify port mapping is valid, if not reset port */ if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) hw->iov.ops.reset_lport(hw, vf_info); -- GitLab From 4bf42a2ec12a78a5638b64be54a7a35230575032 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 25 Sep 2017 22:01:36 +0100 Subject: [PATCH 0620/1398] net: ipv6: send NS for DAD when link operationally up [ Upstream commit 1f372c7bfb23286d2bf4ce0423ab488e86b74bb2 ] The NS for DAD are sent on admin up as long as a valid qdisc is found. A race condition exists by which these packets will not egress the interface if the operational state of the lower device is not yet up. The solution is to delay DAD until the link is operationally up according to RFC2863. Rather than only doing this, follow the existing code checks by deferring IPv6 device initialization altogether. The fix allows DAD on devices like tunnels that are controlled by userspace control plane. The fix has no impact on regular deployments, but means that there is no IPv6 connectivity until the port has been opened in the case of port-based network access control, which should be desirable. Signed-off-by: Mike Manning Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a4fb90c4819f..1594d9fc9c92 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -286,10 +286,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .keep_addr_on_down = 0, }; -/* Check if a valid qdisc is available */ -static inline bool addrconf_qdisc_ok(const struct net_device *dev) +/* Check if link is ready: is it up and is a valid qdisc available */ +static inline bool addrconf_link_ready(const struct net_device *dev) { - return !qdisc_tx_is_noop(dev); + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); } static void addrconf_del_rs_timer(struct inet6_dev *idev) @@ -434,7 +434,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->token = in6addr_any; - if (netif_running(dev) && addrconf_qdisc_ok(dev)) + if (netif_running(dev) && addrconf_link_ready(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -3368,7 +3368,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, /* restore routes for permanent addresses */ addrconf_permanent_addr(dev); - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name); @@ -3383,7 +3383,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } } else if (event == NETDEV_CHANGE) { - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is still not ready. */ break; } -- GitLab From bb0618ac2302316d924aff2e653ef8e4f1dd753b Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 23 Sep 2017 13:25:30 +0530 Subject: [PATCH 0621/1398] staging: greybus: light: Release memory obtained by kasprintf [ Upstream commit 04820da21050b35eed68aa046115d810163ead0c ] Free memory region, if gb_lights_channel_config is not successful. Signed-off-by: Arvind Yadav Reviewed-by: Rui Miguel Silva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/light.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 8dffd8a7e762..9f01427f35f9 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -924,6 +924,8 @@ static void __gb_lights_led_unregister(struct gb_channel *channel) return; led_classdev_unregister(cdev); + kfree(cdev->name); + cdev->name = NULL; channel->led = NULL; } -- GitLab From 5859027994f91d13fc3c4e99a8420ecbf8193729 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 29 Sep 2017 16:22:54 +0800 Subject: [PATCH 0622/1398] clk: sunxi-ng: sun6i: Rename HDMI DDC clock to avoid name collision [ Upstream commit 7f3ed79188f2f094d0ee366fa858857fb7f511ba ] The HDMI DDC clock found in the CCU is the parent of the actual DDC clock within the HDMI controller. That clock is also named "hdmi-ddc". Rename the one in the CCU to "ddc". This makes more sense than renaming the one in the HDMI controller to something else. Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index df97e25aec76..9fe0939c1273 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents, 0x150, 0, 4, 24, 2, BIT(31), CLK_SET_RATE_PARENT); -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0); +static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0); static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0); -- GitLab From acc96729e1d8d088e8e4cecdbef05f0c0b5e367b Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Wed, 27 Sep 2017 18:30:58 +0200 Subject: [PATCH 0623/1398] tcp: fix under-evaluated ssthresh in TCP Vegas [ Upstream commit cf5d74b85ef40c202c76d90959db4d850f301b95 ] With the commit 76174004a0f19785 (tcp: do not slow start when cwnd equals ssthresh), the comparison to the reduced cwnd in tcp_vegas_ssthresh() would under-evaluate the ssthresh. Signed-off-by: Hoang Tran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_vegas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 4c4bac1b5eab..3ecb61ee42fb 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd-1); + return min(tp->snd_ssthresh, tp->snd_cwnd); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) -- GitLab From 15319d2a49ca1df37d83c868e2c301fcbc87c059 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 28 Sep 2017 13:53:27 +0200 Subject: [PATCH 0624/1398] rtc: set the alarm to the next expiring timer [ Upstream commit 74717b28cb32e1ad3c1042cafd76b264c8c0f68d ] If there is any non expired timer in the queue, the RTC alarm is never set. This is an issue when adding a timer that expires before the next non expired timer. Ensure the RTC alarm is set in that case. Fixes: 2b2f5ff00f63 ("rtc: interface: ignore expired timers when enqueuing new timers") Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 84a52db9b05f..6ebd42aad291 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -772,7 +772,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) } timerqueue_add(&rtc->timerqueue, &timer->node); - if (!next) { + if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); -- GitLab From b86c7b8c5dfb2a68bce79e1ad3fe50e06022d1f6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 1 Sep 2017 14:29:56 +1000 Subject: [PATCH 0625/1398] cpuidle: fix broadcast control when broadcast can not be entered [ Upstream commit f187851b9b4a76952b1158b86434563dd2031103 ] When failing to enter broadcast timer mode for an idle state that requires it, a new state is selected that does not require broadcast, but the broadcast variable remains set. This causes tick_broadcast_exit to be called despite not having entered broadcast mode. This causes the WARN_ON_ONCE(!irqs_disabled()) to trigger in some cases. It does not appear to cause problems for code today, but seems to violate the interface so should be fixed. Signed-off-by: Nicholas Piggin Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c73207abb5a4..35237c8d5206 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -189,6 +189,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, return -EBUSY; } target_state = &drv->states[index]; + broadcast = false; } /* Take note of the planned idle state. */ -- GitLab From 82bf76afa8affad676ba6442bd4656f842312988 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 6 Jun 2017 15:04:46 +0530 Subject: [PATCH 0626/1398] thermal: hisilicon: Handle return value of clk_prepare_enable commit 919054fdfc8adf58c5512fe9872eb53ea0f5525d upstream. clk_prepare_enable() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Eduardo Valentin Signed-off-by: Kevin Wangtao Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index f6429666a1cf..9c3ce341eb97 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -397,8 +397,11 @@ static int hisi_thermal_suspend(struct device *dev) static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); + int ret; - clk_prepare_enable(data->clk); + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; data->irq_enabled = true; hisi_thermal_enable_bind_irq_sensor(data); -- GitLab From b679b8d7bad06765dde62c1bc2f378d5461928f3 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:43 +0200 Subject: [PATCH 0627/1398] thermal/drivers/hisi: Fix missing interrupt enablement commit c176b10b025acee4dc8f2ab1cd64eb73b5ccef53 upstream. The interrupt for the temperature threshold is not enabled at the end of the probe function, enable it after the setup is complete. On the other side, the irq_enabled is not correctly set as we are checking if the interrupt is masked where 'yes' means irq_enabled=false. irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED, &data->irq_enabled); As we are always enabling the interrupt, it is pointless to check if the interrupt is masked or not, just set irq_enabled to 'true'. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Kevin Wangtao Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 9c3ce341eb97..f3b50b0385e2 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -345,8 +345,7 @@ static int hisi_thermal_probe(struct platform_device *pdev) } hisi_thermal_enable_bind_irq_sensor(data); - irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED, - &data->irq_enabled); + data->irq_enabled = true; for (i = 0; i < HISI_MAX_SENSORS; ++i) { ret = hisi_thermal_register_sensor(pdev, data, @@ -358,6 +357,8 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_toggle_sensor(&data->sensors[i], true); } + enable_irq(data->irq); + return 0; } -- GitLab From 2dac559df962c064d13a14610e81535c55fcf33d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:45 +0200 Subject: [PATCH 0628/1398] thermal/drivers/hisi: Fix kernel panic on alarm interrupt commit 2cb4de785c40d4a2132cfc13e63828f5a28c3351 upstream. The threaded interrupt for the alarm interrupt is requested before the temperature controller is setup. This one can fire an interrupt immediately leading to a kernel panic as the sensor data is not initialized. In order to prevent that, move the threaded irq after the Tsensor is setup. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Kevin Wangtao Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index f3b50b0385e2..c90ceb891a37 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -317,15 +317,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) if (data->irq < 0) return data->irq; - ret = devm_request_threaded_irq(&pdev->dev, data->irq, - hisi_thermal_alarm_irq, - hisi_thermal_alarm_irq_thread, - 0, "hisi_thermal", data); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); - return ret; - } - platform_set_drvdata(pdev, data); data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); @@ -357,6 +348,15 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_toggle_sensor(&data->sensors[i], true); } + ret = devm_request_threaded_irq(&pdev->dev, data->irq, + hisi_thermal_alarm_irq, + hisi_thermal_alarm_irq_thread, + 0, "hisi_thermal", data); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + return ret; + } + enable_irq(data->irq); return 0; -- GitLab From 1b2c46a6be45a36c64230cc23859e2113ab1cc49 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:46 +0200 Subject: [PATCH 0629/1398] thermal/drivers/hisi: Simplify the temperature/step computation commit 48880b979cdc9ef5a70af020f42b8ba1e51dbd34 upstream. The step and the base temperature are fixed values, we can simplify the computation by converting the base temperature to milli celsius and use a pre-computed step value. That saves us a lot of mult + div for nothing at runtime. Take also the opportunity to change the function names to be consistent with the rest of the code. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Kevin Wangtao Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 41 +++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index c90ceb891a37..7a0ba76c3a46 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -35,8 +35,9 @@ #define TEMP0_RST_MSK (0x1C) #define TEMP0_VALUE (0x28) -#define HISI_TEMP_BASE (-60) +#define HISI_TEMP_BASE (-60000) #define HISI_TEMP_RESET (100000) +#define HISI_TEMP_STEP (784) #define HISI_MAX_SENSORS 4 @@ -61,19 +62,32 @@ struct hisi_thermal_data { void __iomem *regs; }; -/* in millicelsius */ -static inline int _step_to_temp(int step) +/* + * The temperature computation on the tsensor is as follow: + * Unit: millidegree Celsius + * Step: 255/200 (0.7843) + * Temperature base: -60°C + * + * The register is programmed in temperature steps, every step is 784 + * millidegree and begins at -60 000 m°C + * + * The temperature from the steps: + * + * Temp = TempBase + (steps x 784) + * + * and the steps from the temperature: + * + * steps = (Temp - TempBase) / 784 + * + */ +static inline int hisi_thermal_step_to_temp(int step) { - /* - * Every step equals (1 * 200) / 255 celsius, and finally - * need convert to millicelsius. - */ - return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255)); + return HISI_TEMP_BASE + (step * HISI_TEMP_STEP); } -static inline long _temp_to_step(long temp) +static inline long hisi_thermal_temp_to_step(long temp) { - return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000; + return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; } static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, @@ -99,7 +113,7 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, usleep_range(3000, 5000); val = readl(data->regs + TEMP0_VALUE); - val = _step_to_temp(val); + val = hisi_thermal_step_to_temp(val); mutex_unlock(&data->thermal_lock); @@ -126,10 +140,11 @@ static void hisi_thermal_enable_bind_irq_sensor writel((sensor->id << 12), data->regs + TEMP0_CFG); /* enable for interrupt */ - writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, + writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, data->regs + TEMP0_TH); - writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH); + writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET), + data->regs + TEMP0_RST_TH); /* enable module */ writel(0x1, data->regs + TEMP0_RST_MSK); -- GitLab From 3cff90788e289eb9b24aa4d6c9de78a689402b0d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:47 +0200 Subject: [PATCH 0630/1398] thermal/drivers/hisi: Fix multiple alarm interrupts firing commit db2b0332608c8e648ea1e44727d36ad37cdb56cb upstream. The DT specifies a threshold of 65000, we setup the register with a value in the temperature resolution for the controller, 64656. When we reach 64656, the interrupt fires, the interrupt is disabled. Then the irq thread runs and calls thermal_zone_device_update() which will call in turn hisi_thermal_get_temp(). The function will look if the temperature decreased, assuming it was more than 65000, but that is not the case because the current temperature is 64656 (because of the rounding when setting the threshold). This condition being true, we re-enable the interrupt which fires immediately after exiting the irq thread. That happens again and again until the temperature goes to more than 65000. Potentially, there is here an interrupt storm if the temperature stabilizes at this temperature. A very unlikely case but possible. In any case, it does not make sense to handle dozens of alarm interrupt for nothing. Fix this by rounding the threshold value to the controller resolution so the check against the threshold is consistent with the one set in the controller. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Kevin Wangtao Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 7a0ba76c3a46..c5285ed34fdd 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -90,6 +90,12 @@ static inline long hisi_thermal_temp_to_step(long temp) return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; } +static inline long hisi_thermal_round_temp(int temp) +{ + return hisi_thermal_step_to_temp( + hisi_thermal_temp_to_step(temp)); +} + static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, struct hisi_thermal_sensor *sensor) { @@ -245,7 +251,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) sensor = &data->sensors[data->irq_bind_sensor]; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", - sensor->thres_temp / 1000); + sensor->thres_temp); mutex_unlock(&data->thermal_lock); for (i = 0; i < HISI_MAX_SENSORS; i++) { @@ -284,7 +290,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) { if (trip[i].type == THERMAL_TRIP_PASSIVE) { - sensor->thres_temp = trip[i].temperature; + sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature); break; } } -- GitLab From 00ecb4b1a5991a67e58c44b2572c87534f0939cc Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 2 Nov 2017 12:13:58 +0100 Subject: [PATCH 0631/1398] MIPS: math-emu: Fix final emulation phase for certain instructions commit 409fcace9963c1e8d2cb0f7ac62e8b34d47ef979 upstream. Fix final phase of . emulation. Provide proper generation of SIGFPE signal and updating debugfs FP exception stats in cases of any exception flags set in preceding phases of emulation. CLASS. instruction may generate "Unimplemented Operation" FP exception. . instructions may generate "Inexact", "Unimplemented Operation", "Invalid Operation", "Overflow", and "Underflow" FP exceptions. . instructions can generate "Unimplemented Operation" and "Invalid Operation" FP exceptions. The proper final processing of the cases when any FP exception flag is set is achieved by replacing "break" statement with "goto copcsr" statement. With such solution, this patch brings the final phase of emulation of the above instructions consistent with the one corresponding to the previously implemented emulation of other related FPU instructions (ADD, SUB, etc.). Fixes: 38db37ba069f ("MIPS: math-emu: Add support for the MIPS R6 CLASS FPU instruction") Fixes: e24c3bec3e8e ("MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction") Fixes: 83d43305a1df ("MIPS: math-emu: Add support for the MIPS R6 MSUBF FPU instruction") Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Aleksandar Markovic Cc: Ralf Baechle Cc: Douglas Leung Cc: Goran Ferenc Cc: "Maciej W. Rozycki" Cc: Miodrag Dinic Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17581/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/cp1emu.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 9ade60ca08e0..7f2519cfb5d2 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1781,7 +1781,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); rv.s = ieee754sp_maddf(fd, fs, ft); - break; + goto copcsr; } case fmsubf_op: { @@ -1794,7 +1794,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); rv.s = ieee754sp_msubf(fd, fs, ft); - break; + goto copcsr; } case frint_op: { @@ -1818,7 +1818,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754sp_2008class(fs); rfmt = w_fmt; - break; + goto copcsr; } case fmin_op: { @@ -1830,7 +1830,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmin(fs, ft); - break; + goto copcsr; } case fmina_op: { @@ -1842,7 +1842,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmina(fs, ft); - break; + goto copcsr; } case fmax_op: { @@ -1854,7 +1854,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmax(fs, ft); - break; + goto copcsr; } case fmaxa_op: { @@ -1866,7 +1866,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmaxa(fs, ft); - break; + goto copcsr; } case fabs_op: @@ -2110,7 +2110,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); rv.d = ieee754dp_maddf(fd, fs, ft); - break; + goto copcsr; } case fmsubf_op: { @@ -2123,7 +2123,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); rv.d = ieee754dp_msubf(fd, fs, ft); - break; + goto copcsr; } case frint_op: { @@ -2147,7 +2147,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754dp_2008class(fs); rfmt = w_fmt; - break; + goto copcsr; } case fmin_op: { @@ -2159,7 +2159,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmin(fs, ft); - break; + goto copcsr; } case fmina_op: { @@ -2171,7 +2171,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmina(fs, ft); - break; + goto copcsr; } case fmax_op: { @@ -2183,7 +2183,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmax(fs, ft); - break; + goto copcsr; } case fmaxa_op: { @@ -2195,7 +2195,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmaxa(fs, ft); - break; + goto copcsr; } case fabs_op: -- GitLab From dbeb719e24c333a5e4067de1e9a0833ba63e2c75 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 4 Dec 2017 10:26:17 +1000 Subject: [PATCH 0632/1398] platform/x86: asus-wireless: send an EV_SYN/SYN_REPORT between state changes commit bff5bf9db1c9453ffd0a78abed3e2d040c092fd9 upstream. Sending the switch state change twice within the same frame is invalid evdev protocol and only works if the client handles keys immediately as well. Processing events immediately is incorrect, it forces a fake order of events that does not exist on the device. Recent versions of libinput changed to only process the device state and SYN_REPORT time, so now the key event is lost. https://bugs.freedesktop.org/show_bug.cgi?id=104041 Signed-off-by: Peter Hutterer Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-wireless.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c index 9f31bc1a47d0..18716025b1db 100644 --- a/drivers/platform/x86/asus-wireless.c +++ b/drivers/platform/x86/asus-wireless.c @@ -97,6 +97,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event) return; } input_report_key(data->idev, KEY_RFKILL, 1); + input_sync(data->idev); input_report_key(data->idev, KEY_RFKILL, 0); input_sync(data->idev); } -- GitLab From 3b6c84bc64490529c33bd5f0f9e2c6256031f398 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 22 Dec 2017 08:43:23 +0100 Subject: [PATCH 0633/1398] Revert "Bluetooth: btusb: driver to enable the usb-wakeup feature" This reverts commit 7336f5481f6cf913a2d29d98c6e11f4bbe19d3b2 which is commit a0085f2510e8976614ad8f766b209448b385492f upstream. It causes problems with working systems, as noted by a number of the ChromeOS developers. Cc: Sukumar Ghorai Cc: Amit K Bag Cc: Oliver Neukum Cc: Marcel Holtmann Cc: Matthias Kaehlcke Reported-by: Guenter Roeck Reported-by: Brian Norris Acked-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3ae950c82922..693028659ccc 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1059,10 +1059,6 @@ static int btusb_open(struct hci_dev *hdev) } data->intf->needs_remote_wakeup = 1; - /* device specific wakeup source enabled and required for USB - * remote wakeup while host is suspended - */ - device_wakeup_enable(&data->udev->dev); if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags)) goto done; @@ -1126,7 +1122,6 @@ static int btusb_close(struct hci_dev *hdev) goto failed; data->intf->needs_remote_wakeup = 0; - device_wakeup_disable(&data->udev->dev); usb_autopm_put_interface(data->intf); failed: -- GitLab From 565f012f5abb2d2bba8e03efcd1dba7577245011 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Dec 2017 16:29:02 +0100 Subject: [PATCH 0634/1398] bpf: adjust insn_aux_data when patching insns From: Alexei Starovoitov [ Upstream commit 8041902dae5299c1f194ba42d14383f734631009 ] convert_ctx_accesses() replaces single bpf instruction with a set of instructions. Adjust corresponding insn_aux_data while patching. It's needed to make sure subsequent 'for(all insn)' loops have matching insn and insn_aux_data. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 44 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 372454aa7f37..6faa5c16d821 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3210,6 +3210,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) insn->src_reg = 0; } +/* single env->prog->insni[off] instruction was replaced with the range + * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying + * [0, off) and [off, end) to new locations, so the patched range stays zero + */ +static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, + u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + + if (cnt == 1) + return 0; + new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); + if (!new_data) + return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); + memcpy(new_data + off + cnt - 1, old_data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + env->insn_aux_data = new_data; + vfree(old_data); + return 0; +} + +static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + struct bpf_prog *new_prog; + + new_prog = bpf_patch_insn_single(env->prog, off, patch, len); + if (!new_prog) + return NULL; + if (adjust_insn_aux_data(env, new_prog->len, off, len)) + return NULL; + return new_prog; +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -3229,10 +3264,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) verbose("bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { - new_prog = bpf_patch_insn_single(env->prog, 0, - insn_buf, cnt); + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); if (!new_prog) return -ENOMEM; + env->prog = new_prog; delta += cnt - 1; } @@ -3253,7 +3288,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; - if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, @@ -3263,8 +3298,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, - cnt); + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; -- GitLab From 7b5b73ea87a06236fa124bdebed1390d362d3439 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Dec 2017 16:29:03 +0100 Subject: [PATCH 0635/1398] bpf: fix branch pruning logic From: Alexei Starovoitov [ Upstream commit c131187db2d3fa2f8bf32fdf4e9a4ef805168467 ] when the verifier detects that register contains a runtime constant and it's compared with another constant it will prune exploration of the branch that is guaranteed not to be taken at runtime. This is all correct, but malicious program may be constructed in such a way that it always has a constant comparison and the other branch is never taken under any conditions. In this case such path through the program will not be explored by the verifier. It won't be taken at run-time either, but since all instructions are JITed the malicious program may cause JITs to complain about using reserved fields, etc. To fix the issue we have to track the instructions explored by the verifier and sanitize instructions that are dead at run time with NOPs. We cannot reject such dead code, since llvm generates it for valid C code, since it doesn't do as much data flow analysis as the verifier does. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3101141661a1..4c4e9358c146 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -68,6 +68,7 @@ struct bpf_verifier_state_list { struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + bool seen; /* this insn was processed by the verifier */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6faa5c16d821..2f19c94452dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2862,6 +2862,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + env->insn_aux_data[insn_idx].seen = true; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -3059,6 +3060,7 @@ static int do_check(struct bpf_verifier_env *env) return err; insn_idx++; + env->insn_aux_data[insn_idx].seen = true; } else { verbose("invalid BPF_LD mode\n"); return -EINVAL; @@ -3218,6 +3220,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, u32 off, u32 cnt) { struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + int i; if (cnt == 1) return 0; @@ -3227,6 +3230,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + for (i = off; i < off + cnt - 1; i++) + new_data[i].seen = true; env->insn_aux_data = new_data; vfree(old_data); return 0; @@ -3245,6 +3250,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +/* The verifier does more data flow analysis than llvm and will not explore + * branches that are dead at run time. Malicious programs can have dead code + * too. Therefore replace all dead at-run-time code with nops. + */ +static void sanitize_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0); + struct bpf_insn *insn = env->prog->insnsi; + const int insn_cnt = env->prog->len; + int i; + + for (i = 0; i < insn_cnt; i++) { + if (aux_data[i].seen) + continue; + memcpy(insn + i, &nop, sizeof(nop)); + } +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -3406,6 +3430,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) while (pop_stack(env, NULL) >= 0); free_states(env); + if (ret == 0) + sanitize_dead_code(env); + if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); -- GitLab From d75d3ee237cee9068022117e059b64bbab617f3d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Dec 2017 16:29:04 +0100 Subject: [PATCH 0636/1398] bpf: reject out-of-bounds stack pointer calculation From: Jann Horn Reject programs that compute wildly out-of-bounds stack pointers. Otherwise, pointers can be computed with an offset that doesn't fit into an `int`, causing security issues in the stack memory access check (as well as signed integer overflow during offset addition). This is a fix specifically for the v4.9 stable tree because the mainline code looks very different at this point. Fixes: 7bca0a9702edf ("bpf: enhance verifier to understand stack pointer arithmetic") Signed-off-by: Jann Horn Acked-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2f19c94452dd..b03af36b8e87 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1861,10 +1861,28 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) ((BPF_SRC(insn->code) == BPF_X && regs[insn->src_reg].type == CONST_IMM) || BPF_SRC(insn->code) == BPF_K)) { - if (BPF_SRC(insn->code) == BPF_X) + if (BPF_SRC(insn->code) == BPF_X) { + /* check in case the register contains a big + * 64-bit value + */ + if (regs[insn->src_reg].imm < -MAX_BPF_STACK || + regs[insn->src_reg].imm > MAX_BPF_STACK) { + verbose("R%d value too big in R%d pointer arithmetic\n", + insn->src_reg, insn->dst_reg); + return -EACCES; + } dst_reg->imm += regs[insn->src_reg].imm; - else + } else { + /* safe against overflow: addition of 32-bit + * numbers in 64-bit representation + */ dst_reg->imm += insn->imm; + } + if (dst_reg->imm > 0 || dst_reg->imm < -MAX_BPF_STACK) { + verbose("R%d out-of-bounds pointer arithmetic\n", + insn->dst_reg); + return -EACCES; + } return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && -- GitLab From 3695b3b18519099224efbc5875569d2cb6da256d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Dec 2017 16:29:05 +0100 Subject: [PATCH 0637/1398] bpf: fix incorrect sign extension in check_alu_op() From: Jann Horn [ Upstream commit 95a762e2c8c942780948091f8f2a4f32fce1ac6f ] Distinguish between BPF_ALU64|BPF_MOV|BPF_K (load 32-bit immediate, sign-extended to 64-bit) and BPF_ALU|BPF_MOV|BPF_K (load 32-bit immediate, zero-padded to 64-bit); only perform sign extension in the first case. Starting with v4.14, this is exploitable by unprivileged users as long as the unprivileged_bpf_disabled sysctl isn't set. Debian assigned CVE-2017-16995 for this issue. v3: - add CVE number (Ben Hutchings) Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Jann Horn Acked-by: Edward Cree Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b03af36b8e87..8b1ebe4c6aba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1790,10 +1790,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* case: R = imm * remember the value we stored into this reg */ + u64 imm; + + if (BPF_CLASS(insn->code) == BPF_ALU64) + imm = insn->imm; + else + imm = (u32)insn->imm; + regs[insn->dst_reg].type = CONST_IMM; - regs[insn->dst_reg].imm = insn->imm; - regs[insn->dst_reg].max_value = insn->imm; - regs[insn->dst_reg].min_value = insn->imm; + regs[insn->dst_reg].imm = imm; + regs[insn->dst_reg].max_value = imm; + regs[insn->dst_reg].min_value = imm; } } else if (opcode > BPF_END) { -- GitLab From 6430e166aee863d293a9a3c083af7a64fec57d13 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 1 Apr 2017 13:47:44 -0700 Subject: [PATCH 0638/1398] sparc32: Export vac_cache_size to fix build error commit 9d262d95114cf2e2ac5e0ff358347fa2e214eda5 upstream. sparc32:allmodconfig fails to build with the following error. ERROR: "vac_cache_size" [drivers/infiniband/sw/rxe/rdma_rxe.ko] undefined! Fixes: cb8864559631 ("infiniband: Fix alignment of mmap cookies ...") Cc: Jason Gunthorpe Cc: Doug Ledford Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/mm/srmmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c7f2a5295b3a..83a73cf5116a 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -54,6 +54,7 @@ enum mbus_module srmmu_modtype; static unsigned int hwbug_bitmask; int vac_cache_size; +EXPORT_SYMBOL(vac_cache_size); int vac_line_size; extern struct resource sparc_iomap; -- GitLab From 2df397931072ff4e66f9e42ae1f1630c03513ce7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 25 Dec 2017 14:23:47 +0100 Subject: [PATCH 0639/1398] Linux 4.9.72 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5f2736bb4877..78dde51d9d74 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 71 +SUBLEVEL = 72 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 54c74d38819df62c67f420bb654996637124ae30 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Dec 2017 13:31:16 +0100 Subject: [PATCH 0640/1398] ACPI: APEI / ERST: Fix missing error handling in erst_reader() commit bb82e0b4a7e96494f0c1004ce50cec3d7b5fb3d1 upstream. The commit f6f828513290 ("pstore: pass allocated memory region back to caller") changed the check of the return value from erst_read() in erst_reader() in the following way: if (len == -ENOENT) goto skip; - else if (len < 0) { - rc = -1; + else if (len < sizeof(*rcd)) { + rc = -EIO; goto out; This introduced another bug: since the comparison with sizeof() is cast to unsigned, a negative len value doesn't hit any longer. As a result, when an error is returned from erst_read(), the code falls through, and it may eventually lead to some weird thing like memory corruption. This patch adds the negative error value check more explicitly for addressing the issue. Fixes: f6f828513290 (pstore: pass allocated memory region back to caller) Tested-by: Jerry Tang Signed-off-by: Takashi Iwai Acked-by: Kees Cook Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/apei/erst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index ec4f507b524f..4558cc73abf3 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1020,7 +1020,7 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count, /* The record may be cleared by others, try read next record */ if (len == -ENOENT) goto skip; - else if (len < sizeof(*rcd)) { + else if (len < 0 || len < sizeof(*rcd)) { rc = -EIO; goto out; } -- GitLab From d31a207aaf070edc94cffe6130f06e070711f3ce Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Nov 2017 19:42:52 -0800 Subject: [PATCH 0641/1398] acpi, nfit: fix health event notification commit adf6895754e2503d994a765535fd1813f8834674 upstream. Integration testing with a BIOS that generates injected health event notifications fails to communicate those events to userspace. The nfit driver neglects to link the ACPI DIMM device with the necessary driver data so acpi_nvdimm_notify() fails this lookup: nfit_mem = dev_get_drvdata(dev); if (nfit_mem && nfit_mem->flags_attr) sysfs_notify_dirent(nfit_mem->flags_attr); Add the necessary linkage when installing the notification handler and clean it up when the nfit driver instance is torn down. Cc: Toshi Kani Cc: Vishal Verma Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support") Reported-by: Daniel Osawa Tested-by: Daniel Osawa Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f3bc901ac930..fe03d00de22b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1390,6 +1390,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1446,9 +1451,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } -- GitLab From e81cff1cedefea0fcaf6fed805469eaf9a87192b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 30 Nov 2017 13:39:27 +0100 Subject: [PATCH 0642/1398] crypto: mcryptd - protect the per-CPU queue with a lock commit 9abffc6f2efe46c3564c04312e52e07622d40e51 upstream. mcryptd_enqueue_request() grabs the per-CPU queue struct and protects access to it with disabled preemption. Then it schedules a worker on the same CPU. The worker in mcryptd_queue_worker() guards access to the same per-CPU variable with disabled preemption. If we take CPU-hotplug into account then it is possible that between queue_work_on() and the actual invocation of the worker the CPU goes down and the worker will be scheduled on _another_ CPU. And here the preempt_disable() protection does not work anymore. The easiest thing is to add a spin_lock() to guard access to the list. Another detail: mcryptd_queue_worker() is not processing more than MCRYPTD_BATCH invocation in a row. If there are still items left, then it will invoke queue_work() to proceed with more later. *I* would suggest to simply drop that check because it does not use a system workqueue and the workqueue is already marked as "CPU_INTENSIVE". And if preemption is required then the scheduler should do it. However if queue_work() is used then the work item is marked as CPU unbound. That means it will try to run on the local CPU but it may run on another CPU as well. Especially with CONFIG_DEBUG_WQ_FORCE_RR_CPU=y. Again, the preempt_disable() won't work here but lock which was introduced will help. In order to keep work-item on the local CPU (and avoid RR) I changed it to queue_work_on(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/mcryptd.c | 23 ++++++++++------------- include/crypto/mcryptd.h | 1 + 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index c207458d6299..a14100e74754 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -80,6 +80,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -103,15 +104,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -160,16 +162,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -184,7 +181,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index 4a53c0d38cd2..e045722a69aa 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -26,6 +26,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast( struct mcryptd_cpu_queue { struct crypto_queue queue; + spinlock_t q_lock; struct work_struct work; }; -- GitLab From 2db85cb211d0e5ce01922f1afec159769b408be7 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 14 Nov 2017 14:43:27 +0000 Subject: [PATCH 0643/1398] mfd: cros ec: spi: Don't send first message too soon commit 15d8374874ded0bec37ef27f8301a6d54032c0e5 upstream. On the Tegra124 Nyan-Big chromebook the very first SPI message sent to the EC is failing. The Tegra SPI driver configures the SPI chip-selects to be active-high by default (and always has for many years). The EC SPI requires an active-low chip-select and so the Tegra chip-select is reconfigured to be active-low when the EC SPI driver calls spi_setup(). The problem is that if the first SPI message to the EC is sent too soon after reconfiguring the SPI chip-select, it fails. The EC SPI driver prevents back-to-back SPI messages being sent too soon by keeping track of the time the last transfer was sent via the variable 'last_transfer_ns'. To prevent the very first transfer being sent too soon, initialise the 'last_transfer_ns' variable after calling spi_setup() and before sending the first SPI message. Signed-off-by: Jon Hunter Reviewed-by: Brian Norris Reviewed-by: Douglas Anderson Acked-by: Benson Leung Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/cros_ec_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index a518832ed5f5..59dbdaa24c28 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -664,6 +664,7 @@ static int cros_ec_spi_probe(struct spi_device *spi) sizeof(struct ec_response_get_protocol_info); ec_dev->dout_size = sizeof(struct ec_host_request); + ec_spi->last_transfer_ns = ktime_get_ns(); err = cros_ec_register(ec_dev); if (err) { -- GitLab From f4c0796fdc8b09093037f6665efe350e6f81bb41 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 11 Nov 2017 16:38:43 +0100 Subject: [PATCH 0644/1398] mfd: twl4030-audio: Fix sibling-node lookup commit 0a423772de2f3d7b00899987884f62f63ae00dcb upstream. A helper purported to look up a child node based on its name was using the wrong of-helper and ended up prematurely freeing the parent of-node while leaking any matching node. To make things worse, any matching node would not even necessarily be a child node as the whole device tree was searched depth-first starting at the parent. Fixes: 019a7e6b7b31 ("mfd: twl4030-audio: Add DT support") Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/twl4030-audio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c index 0a1606480023..cc832d309599 100644 --- a/drivers/mfd/twl4030-audio.c +++ b/drivers/mfd/twl4030-audio.c @@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void) EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->codec) return true; - if (of_find_node_by_name(node, "codec")) + node = of_get_child_by_name(parent, "codec"); + if (node) { + of_node_put(node); return true; + } return false; } -- GitLab From becf7d87cda9c1afbe187d30590cd2c749bda3b6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 11 Nov 2017 16:38:44 +0100 Subject: [PATCH 0645/1398] mfd: twl6040: Fix child-node lookup commit 85e9b13cbb130a3209f21bd7933933399c389ffe upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent node was prematurely freed, while the child node was leaked. Note that the CONFIG_OF compile guard can be removed as of_get_child_by_name() provides a !CONFIG_OF implementation which always fails. Fixes: 37e13cecaa14 ("mfd: Add support for Device Tree to twl6040") Fixes: ca2cad6ae38e ("mfd: Fix twl6040 build failure") Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/twl6040.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index d66502d36ba0..dd19f17a1b63 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = { }; -static bool twl6040_has_vibra(struct device_node *node) +static bool twl6040_has_vibra(struct device_node *parent) { -#ifdef CONFIG_OF - if (of_find_node_by_name(node, "vibra")) + struct device_node *node; + + node = of_get_child_by_name(parent, "vibra"); + if (node) { + of_node_put(node); return true; -#endif + } + return false; } -- GitLab From cec92448c58eeb462eaec9677f3b41e61d589178 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Dec 2017 16:44:12 +0100 Subject: [PATCH 0646/1398] ALSA: rawmidi: Avoid racy info ioctl via ctl device commit c1cfd9025cc394fd137a01159d74335c5ac978ce upstream. The rawmidi also allows to obtaining the information via ioctl of ctl API. It means that user can issue an ioctl to the rawmidi device even when it's being removed as long as the control device is present. Although the code has some protection via the global register_mutex, its range is limited to the search of the corresponding rawmidi object, and the mutex is already unlocked at accessing the rawmidi object. This may lead to a use-after-free. For avoiding it, this patch widens the application of register_mutex to the whole snd_rawmidi_info_select() function. We have another mutex per rawmidi object, but this operation isn't very hot path, so it shouldn't matter from the performance POV. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index b450a27588c8..16f8124b1150 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream, return 0; } -int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +static int __snd_rawmidi_info_select(struct snd_card *card, + struct snd_rawmidi_info *info) { struct snd_rawmidi *rmidi; struct snd_rawmidi_str *pstr; struct snd_rawmidi_substream *substream; - mutex_lock(®ister_mutex); rmidi = snd_rawmidi_search(card, info->device); - mutex_unlock(®ister_mutex); if (!rmidi) return -ENXIO; if (info->stream < 0 || info->stream > 1) @@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info } return -ENXIO; } + +int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +{ + int ret; + + mutex_lock(®ister_mutex); + ret = __snd_rawmidi_info_select(card, info); + mutex_unlock(®ister_mutex); + return ret; +} EXPORT_SYMBOL(snd_rawmidi_info_select); static int snd_rawmidi_info_select_user(struct snd_card *card, -- GitLab From beab14a3eeb8e52b154db12c1fe802a27e10aa00 Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Thu, 7 Dec 2017 12:58:33 +0200 Subject: [PATCH 0647/1398] ALSA: usb-audio: Add native DSD support for Esoteric D-05X commit 866f7ed7d67936dcdbcddc111c8af878c918fe7c upstream. Adds VID:PID of Esoteric D-05X to the TEAC device id's. Renames the is_teac_50X_dac() function to is_teac_dsd_dac() to cover broader device family from the same corporation sharing the same USB audio implementation. Signed-off-by: Jussi Laako Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7613b9e07b5c..1cd7f8b0bf77 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1170,10 +1170,11 @@ static bool is_marantz_denon_dac(unsigned int id) /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch * between PCM/DOP and native DSD mode */ -static bool is_teac_50X_dac(unsigned int id) +static bool is_teac_dsd_dac(unsigned int id) { switch (id) { case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ + case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */ return true; } return false; @@ -1206,7 +1207,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs, break; } mdelay(20); - } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { + } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) { /* Vendor mode switch cmd is required. */ switch (fmt->altsetting) { case 3: /* DSD mode (DSD_U32) requested */ @@ -1376,7 +1377,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, } /* TEAC devices with USB DAC functionality */ - if (is_teac_50X_dac(chip->usb_id)) { + if (is_teac_dsd_dac(chip->usb_id)) { if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; } -- GitLab From 3176065495e17f1b7da86a104bf476b8ec0dcd38 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Dec 2017 23:36:57 +0100 Subject: [PATCH 0648/1398] ALSA: usb-audio: Fix the missing ctl name suffix at parsing SU commit 5a15f289ee87eaf33f13f08a4909ec99d837ec5f upstream. The commit 89b89d121ffc ("ALSA: usb-audio: Add check return value for usb_string()") added the check of the return value from snd_usb_copy_string_desc(), which is correct per se, but it introduced a regression. In the original code, either the "Clock Source", "Playback Source" or "Capture Source" suffix is added after the terminal string, while the commit changed it to add the suffix only when get_term_name() is failing. It ended up with an incorrect ctl name like "PCM" instead of "PCM Capture Source". Also, even the original code has a similar bug: when the ctl name is generated from snd_usb_copy_string_desc() for the given iSelector, it also doesn't put the suffix. This patch addresses these issues: the suffix is added always when no static mapping is found. Also the patch tries to put more comments and cleans up the if/else block for better readability in order to avoid the same pitfall again. Fixes: 89b89d121ffc ("ALSA: usb-audio: Add check return value for usb_string()") Reported-and-tested-by: Mauro Santos Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 24c897f0b571..08015c139116 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2167,20 +2167,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, kctl->private_value = (unsigned long)namelist; kctl->private_free = usb_mixer_selector_elem_free; - nameid = uac_selector_unit_iSelector(desc); + /* check the static mapping table at first */ len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); - if (len) - ; - else if (nameid) - len = snd_usb_copy_string_desc(state, nameid, kctl->id.name, - sizeof(kctl->id.name)); - else - len = get_term_name(state, &state->oterm, - kctl->id.name, sizeof(kctl->id.name), 0); - if (!len) { - strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* no mapping ? */ + /* if iSelector is given, use it */ + nameid = uac_selector_unit_iSelector(desc); + if (nameid) + len = snd_usb_copy_string_desc(state, nameid, + kctl->id.name, + sizeof(kctl->id.name)); + /* ... or pick up the terminal name at next */ + if (!len) + len = get_term_name(state, &state->oterm, + kctl->id.name, sizeof(kctl->id.name), 0); + /* ... or use the fixed string "USB" as the last resort */ + if (!len) + strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* and add the proper suffix */ if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) append_ctl_name(kctl, " Clock Source"); else if ((state->oterm.type & 0xff00) == 0x0100) -- GitLab From 10b4a621f36791bcdca40ee4c513e721de9e3b32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Dec 2017 03:07:18 +0100 Subject: [PATCH 0649/1398] PCI / PM: Force devices to D0 in pci_pm_thaw_noirq() commit 5839ee7389e893a31e4e3c9cf17b50d14103c902 upstream. It is incorrect to call pci_restore_state() for devices in low-power states (D1-D3), as that involves the restoration of MSI setup which requires MMIO to be operational and that is only the case in D0. However, pci_pm_thaw_noirq() may do that if the driver's "freeze" callbacks put the device into a low-power state, so fix it by making it force devices into D0 via pci_set_power_state() instead of trying to "update" their power state which is pointless. Fixes: e60514bd4485 (PCI/PM: Restore the status of PCI devices across hibernation) Reported-by: Thomas Gleixner Reported-by: Maarten Lankhorst Tested-by: Thomas Gleixner Tested-by: Maarten Lankhorst Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 8a68e2b554e1..802997e2ddcc 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -953,7 +953,12 @@ static int pci_pm_thaw_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_update_current_state(pci_dev, PCI_D0); + /* + * pci_restore_state() requires the device to be in D0 (because of MSI + * restoration among other things), so force it into D0 in case the + * driver's "freeze" callbacks put it into a low-power state directly. + */ + pci_set_power_state(pci_dev, PCI_D0); pci_restore_state(pci_dev); if (drv && drv->pm && drv->pm->thaw_noirq) -- GitLab From 373386ec3f70088a87cd0147d97e47cd1b59b51a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 12 Dec 2017 21:52:26 +0100 Subject: [PATCH 0650/1398] parisc: Hide Diva-built-in serial aux and graphics card commit bcf3f1752a622f1372d3252d0fea8855d89812e7 upstream. Diva GSP card has built-in serial AUX port and ATI graphic card which simply don't work and which both don't have external connectors. User Guides even mention that those devices shouldn't be used. So, prevent that Linux drivers try to enable those devices. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/lba_pci.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index bc286cbbbc9b..1cced1d039d7 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1656,3 +1656,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask) iounmap(base_addr); } + +/* + * The design of the Diva management card in rp34x0 machines (rp3410, rp3440) + * seems rushed, so that many built-in components simply don't work. + * The following quirks disable the serial AUX port and the built-in ATI RV100 + * Radeon 7000 graphics card which both don't have any external connectors and + * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as + * such makes those machines the only PARISC machines on which we can't use + * ttyS0 as boot console. + */ +static void quirk_diva_ati_card(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1292) + return; + + dev_info(&dev->dev, "Hiding Diva built-in ATI card"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY, + quirk_diva_ati_card); + +static void quirk_diva_aux_disable(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1291) + return; + + dev_info(&dev->dev, "Hiding Diva built-in AUX serial device"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX, + quirk_diva_aux_disable); -- GitLab From cb8b2fd1909eac2162ea73abf5c91053a05cf183 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 21 Nov 2017 10:09:02 +0100 Subject: [PATCH 0651/1398] spi: xilinx: Detect stall with Unknown commands commit 5a1314fa697fc65cefaba64cd4699bfc3e6882a6 upstream. When the core is configured in C_SPI_MODE > 0, it integrates a lookup table that automatically configures the core in dual or quad mode based on the command (first byte on the tx fifo). Unfortunately, that list mode_?_memoy_*.mif does not contain all the supported commands by the flash. Since 4.14 spi-nor automatically tries to probe the flash using SFDP (command 0x5a), and that command is not part of the list_mode table. Whit the right combination of C_SPI_MODE and C_SPI_MEMORY this leads into a stall that can only be recovered with a soft rest. This patch detects this kind of stall and returns -EIO to the caller on those commands. spi-nor can handle this error properly: m25p80 spi0.0: Detected stall. Check C_SPI_MODE and C_SPI_MEMORY. 0x21 0x2404 m25p80 spi0.0: SPI transfer failed: -5 spi_master spi0: failed to transfer one message from queue m25p80 spi0.0: s25sl064p (8192 Kbytes) Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-xilinx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index bc7100b93dfc..e0b9fe1d0e37 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) while (remaining_words) { int n_words, tx_words, rx_words; u32 sr; + int stalled; n_words = min(remaining_words, xspi->buffer_size); @@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) /* Read out all the data from the Rx FIFO */ rx_words = n_words; + stalled = 10; while (rx_words) { + if (rx_words == n_words && !(stalled--) && + !(sr & XSPI_SR_TX_EMPTY_MASK) && + (sr & XSPI_SR_RX_EMPTY_MASK)) { + dev_err(&spi->dev, + "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n"); + xspi_init_hw(xspi); + return -EIO; + } + if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { xilinx_spi_rx(xspi); rx_words--; -- GitLab From 418dfce4fa630d9f9355451e583285045d612d9b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 4 Dec 2017 12:11:02 +0300 Subject: [PATCH 0652/1398] pinctrl: cherryview: Mask all interrupts on Intel_Strago based systems commit d2b3c353595a855794f8b9df5b5bdbe8deb0c413 upstream. Guenter Roeck reported an interrupt storm on a prototype system which is based on Cyan Chromebook. The root cause turned out to be a incorrectly configured pin that triggers spurious interrupts. This will be fixed in coreboot but currently we need to prevent the interrupt storm from happening by masking all interrupts (but not GPEs) on those systems. Link: https://bugzilla.kernel.org/show_bug.cgi?id=197953 Fixes: bcb48cca23ec ("pinctrl: cherryview: Do not mask all interrupts in probe") Reported-and-tested-by: Guenter Roeck Reported-by: Dmitry Torokhov Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-cherryview.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 0d34d8a4ab14..e8c08eb97530 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1594,6 +1594,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) clear_bit(i, chip->irq_valid_mask); } + /* + * The same set of machines in chv_no_valid_mask[] have incorrectly + * configured GPIOs that generate spurious interrupts so we use + * this same list to apply another quirk for them. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953. + */ + if (!need_valid_mask) { + /* + * Mask all interrupts the community is able to generate + * but leave the ones that can only generate GPEs unmasked. + */ + chv_writel(GENMASK(31, pctrl->community->nirqs), + pctrl->regs + CHV_INTMASK); + } + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); -- GitLab From e5c73b3b60e1b8d645749e0bdc93104ae6fa01f5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 7 Dec 2017 00:30:08 -0800 Subject: [PATCH 0653/1398] KVM: X86: Fix load RFLAGS w/o the fixed bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d73235d17ba63b53dc0e1051dbc10a1f1be91b71 upstream. *** Guest State *** CR0: actual=0x0000000000000030, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002050, shadow=0x0000000000000000, gh_mask=ffffffffffffe871 CR3 = 0x00000000fffbc000 RSP = 0x0000000000000000 RIP = 0x0000000000000000 RFLAGS=0x00000000 DR7 = 0x0000000000000400 ^^^^^^^^^^ The failed vmentry is triggered by the following testcase when ept=Y: #include #include #include #include #include #include #include long r[5]; int main() { r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_regs regs = { .rflags = 0, }; ioctl(r[4], KVM_SET_REGS, ®s); ioctl(r[4], KVM_RUN, 0); } X86 RFLAGS bit 1 is fixed set, userspace can simply clearing bit 1 of RFLAGS with KVM_SET_REGS ioctl which results in vmentry fails. This patch fixes it by oring X86_EFLAGS_FIXED during ioctl. Suggested-by: Jim Mattson Reviewed-by: David Hildenbrand Reviewed-by: Quan Xu Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f4d893713d54..7e28e6c877d9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7132,7 +7132,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) #endif kvm_rip_write(vcpu, regs->rip); - kvm_set_rflags(vcpu, regs->rflags); + kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); vcpu->arch.exception.pending = false; -- GitLab From 18276e9bcd49d5d4bcbdbf41901a9dd996fdb1a7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 21 Dec 2017 00:49:14 +0100 Subject: [PATCH 0654/1398] kvm: x86: fix RSM when PCID is non-zero commit fae1a3e775cca8c3a9e0eb34443b310871a15a92 upstream. rsm_load_state_64() and rsm_enter_protected_mode() load CR3, then CR4 & ~PCIDE, then CR0, then CR4. However, setting CR4.PCIDE fails if CR3[11:0] != 0. It's probably easier in the long run to replace rsm_enter_protected_mode() with an emulator callback that sets all the special registers (like KVM_SET_SREGS would do). For now, set the PCID field of CR3 only after CR4.PCIDE is 1. Reported-by: Laszlo Ersek Tested-by: Laszlo Ersek Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 72b737b8c9d6..c8f8dd8ca0a1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2395,9 +2395,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) } static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr4) + u64 cr0, u64 cr3, u64 cr4) { int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = ctxt->ops->set_cr(ctxt, 3, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; /* * First enable PAE, long mode needs it before CR0.PG = 1 is set. @@ -2416,6 +2428,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, bad = ctxt->ops->set_cr(ctxt, 4, cr4); if (bad) return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + } return X86EMUL_CONTINUE; @@ -2426,11 +2444,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) struct desc_struct desc; struct desc_ptr dt; u16 selector; - u32 val, cr0, cr4; + u32 val, cr0, cr3, cr4; int i; cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); + cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); @@ -2472,14 +2490,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) { struct desc_struct desc; struct desc_ptr dt; - u64 val, cr0, cr4; + u64 val, cr0, cr3, cr4; u32 base3; u16 selector; int i, r; @@ -2496,7 +2514,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); + cr3 = GET_SMSTATE(u64, smbase, 0x7f50); cr4 = GET_SMSTATE(u64, smbase, 0x7f48); ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); val = GET_SMSTATE(u64, smbase, 0x7ed0); @@ -2524,7 +2542,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); - r = rsm_enter_protected_mode(ctxt, cr0, cr4); + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); if (r != X86EMUL_CONTINUE) return r; -- GitLab From 2635a64d0e94636c8600487d4424f8655502c3f2 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 18 Dec 2017 11:57:51 +0800 Subject: [PATCH 0655/1398] clk: sunxi: sun9i-mmc: Implement reset callback for reset controls commit 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 upstream. Our MMC host driver now issues a reset, instead of just deasserting the reset control, since commit c34eda69ad4c ("mmc: sunxi: Reset the device at probe time"). The sun9i-mmc clock driver does not support this, and will fail, which results in MMC not probing. This patch implements the reset callback by asserting the reset control, then deasserting it after a small delay. Fixes: 7a6fca879f59 ("clk: sunxi: Add driver for A80 MMC config clocks/resets") Signed-off-by: Chen-Yu Tsai Acked-by: Philipp Zabel Acked-by: Maxime Ripard Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/20171218035751.20661-1-wens@csie.org Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi/clk-sun9i-mmc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index 6041bdba2e97..f69f9e8c6f38 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) -- GitLab From 77b318a4e55836c44429c7c8396847b67d5aaa7f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 12 Dec 2017 17:59:15 +0530 Subject: [PATCH 0656/1398] powerpc/perf: Dereference BHRB entries safely commit f41d84dddc66b164ac16acf3f584c276146f1c48 upstream. It's theoretically possible that branch instructions recorded in BHRB (Branch History Rolling Buffer) entries have already been unmapped before they are processed by the kernel. Hence, trying to dereference such memory location will result in a crash. eg: Unable to handle kernel paging request for data at address 0xd000000019c41764 Faulting instruction address: 0xc000000000084a14 NIP [c000000000084a14] branch_target+0x4/0x70 LR [c0000000000eb828] record_and_restart+0x568/0x5c0 Call Trace: [c0000000000eb3b4] record_and_restart+0xf4/0x5c0 (unreliable) [c0000000000ec378] perf_event_interrupt+0x298/0x460 [c000000000027964] performance_monitor_exception+0x54/0x70 [c000000000009ba4] performance_monitor_common+0x114/0x120 Fix it by deferefencing the addresses safely. Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB") Suggested-by: Naveen N. Rao Signed-off-by: Ravi Bangoria Reviewed-by: Naveen N. Rao [mpe: Use probe_kernel_read() which is clearer, tweak change log] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 72c27b8d2cf3..083f92746951 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -401,8 +401,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); -- GitLab From 423716cf28157287b4c7be21474aea796ba39b51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 19 Dec 2017 15:07:10 -0800 Subject: [PATCH 0657/1398] libnvdimm, pfn: fix start_pad handling for aligned namespaces commit 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc upstream. The alignment checks at pfn driver startup fail to properly account for the 'start_pad' in the case where the namespace is misaligned relative to its internal alignment. This is typically triggered in 1G aligned namespace, but could theoretically trigger with small namespace alignments. When this triggers the kernel reports messages of the form: dax2.1: bad offset: 0x3c000000 dax disabled align: 0x40000000 Fixes: 1ee6667cd8d1 ("libnvdimm, pfn, dax: fix initialization vs autodetect...") Reported-by: Jane Chu Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pfn_devs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 71eb6c637b60..42abdd2391c9 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -352,9 +352,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; - unsigned long align; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; + unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -398,6 +398,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); + start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -456,7 +457,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(offset, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", -- GitLab From 405f3d7946fdebf71f81552fead04951c37946b2 Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Tue, 19 Dec 2017 17:59:45 +0100 Subject: [PATCH 0658/1398] net: mvneta: clear interface link status on port disable commit 4423c18e466afdfb02a36ee8b9f901d144b3c607 upstream. When port connect to PHY in polling mode (with poll interval 1 sec), port and phy link status must be synchronize in order don't loss link change event. [gregory.clement@free-electrons.com: add fixes tag] Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Yelena Krivosheev Tested-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 6ea10a9f33e8..2e2fb2d9f604 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1182,6 +1182,10 @@ static void mvneta_port_disable(struct mvneta_port *pp) val &= ~MVNETA_GMAC0_PORT_ENABLE; mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); + pp->link = 0; + pp->duplex = -1; + pp->speed = 0; + udelay(200); } -- GitLab From a57f99f484e51f27db87e38dfbcecec6fd3cf689 Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Tue, 19 Dec 2017 17:59:46 +0100 Subject: [PATCH 0659/1398] net: mvneta: use proper rxq_number in loop on rx queues commit ca5902a6547f662419689ca28b3c29a772446caa upstream. When adding the RX queue association with each CPU, a typo was made in the mvneta_cleanup_rxqs() function. This patch fixes it. [gregory.clement@free-electrons.com: add commit log and fixes tag] Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU") Signed-off-by: Yelena Krivosheev Tested-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 2e2fb2d9f604..0264b21aa640 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2926,7 +2926,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp) { int queue; - for (queue = 0; queue < txq_number; queue++) + for (queue = 0; queue < rxq_number; queue++) mvneta_rxq_deinit(pp, &pp->rxqs[queue]); } -- GitLab From 69cf72b2879167364d2a97211fbb26ca0e374bcf Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Tue, 19 Dec 2017 17:59:47 +0100 Subject: [PATCH 0660/1398] net: mvneta: eliminate wrong call to handle rx descriptor error commit 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8 upstream. There are few reasons in mvneta_rx_swbm() function when received packet is dropped. mvneta_rx_error() should be called only if error bit [16] is set in rx descriptor. [gregory.clement@free-electrons.com: add fixes tag] Fixes: dc35a10f68d3 ("net: mvneta: bm: add support for hardware buffer management") Signed-off-by: Yelena Krivosheev Tested-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 0264b21aa640..fa463268d019 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1909,9 +1909,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo, if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { + mvneta_rx_error(pp, rx_desc); err_drop_frame: dev->stats.rx_errors++; - mvneta_rx_error(pp, rx_desc); /* leave the descriptor untouched */ continue; } -- GitLab From 37435f7e80ef9adc32a69013c18f135e3f434244 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 23 Dec 2017 02:26:17 +0000 Subject: [PATCH 0661/1398] bpf/verifier: Fix states_equal() comparison of pointer and UNKNOWN An UNKNOWN_VALUE is not supposed to be derived from a pointer, unless pointer leaks are allowed. Therefore, states_equal() must not treat a state with a pointer in a register as "equal" to a state with an UNKNOWN_VALUE in that register. This was fixed differently upstream, but the code around here was largely rewritten in 4.14 by commit f1174f77b50c "bpf/verifier: rework value tracking". The bug can be detected by the bpf/verifier sub-test "pointer/scalar confusion in state equality check (way 1)". Signed-off-by: Ben Hutchings Cc: Edward Cree Cc: Jann Horn Cc: Alexei Starovoitov Cc: Daniel Borkmann --- kernel/bpf/verifier.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b1ebe4c6aba..d7eeebfafe8d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2722,11 +2722,12 @@ static bool states_equal(struct bpf_verifier_env *env, /* If we didn't map access then again we don't care about the * mismatched range values and it's ok if our old type was - * UNKNOWN and we didn't go to a NOT_INIT'ed reg. + * UNKNOWN and we didn't go to a NOT_INIT'ed or pointer reg. */ if (rold->type == NOT_INIT || (!varlen_map_access && rold->type == UNKNOWN_VALUE && - rcur->type != NOT_INIT)) + rcur->type != NOT_INIT && + !__is_pointer_value(env->allow_ptr_leaks, rcur))) continue; /* Don't care about the reg->id in this case. */ -- GitLab From b3e88217e2f95b004da89a0ff931e1dc45d3d094 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 29 Dec 2017 17:43:00 +0100 Subject: [PATCH 0662/1398] Linux 4.9.73 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 78dde51d9d74..64eb0bf614ee 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 72 +SUBLEVEL = 73 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 3d16a1315add12386894e7dc7bc86454e4f4b79d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 31 Dec 2017 14:13:42 +0100 Subject: [PATCH 0663/1398] sync objtool's copy of x86-opcode-map.txt When building objtool, we get the warning: warning: objtool: x86 instruction decoder differs from kernel That's due to commit 2816c0455cea088f07a210f8a00701a82a78aa9c which was commit 12a78d43de767eaf8fb272facb7a7b6f2dc6a9df upstream that modified arch/x86/lib/x86-opcode-map.txt without also updating the objtool copy. The objtool copy was updated in a much larger patch upstream, but we don't need all of that here, so just update the single file. If this gets too annoying, I'll just end up doing what we did for 4.14 and backport the whole series to keep this from happening again, but as this seems to be rare in the 4.9-stable series, this single patch should be fine. Cc: Masami Hiramatsu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/arch/x86/insn/x86-opcode-map.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt index 767be7c76034..1754e094bc28 100644 --- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb -- GitLab From 6edea15d120c6e18b6383969e35d6a9683b31d2c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 20:38:57 -0500 Subject: [PATCH 0664/1398] tracing: Remove extra zeroing out of the ring buffer page commit 6b7e633fe9c24682df550e5311f47fb524701586 upstream. The ring_buffer_read_page() takes care of zeroing out any extra data in the page that it returns. There's no need to zero it out again from the consumer. It was removed from one consumer of this function, but read_buffers_splice_read() did not remove it, and worse, it contained a nasty bug because of it. Fixes: 2711ca237a084 ("ring-buffer: Move zeroing out excess in page to ring buffer code") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4214cd960b8e..a0ad5a2ad656 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6181,7 +6181,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int entries, size, i; + int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE @@ -6232,14 +6232,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - /* - * zero out any left over data, this is going to - * user land. - */ - size = ring_buffer_page_len(ref->page); - if (size < PAGE_SIZE) - memset(ref->page + size, 0, PAGE_SIZE - size); - page = virt_to_page(ref->page); spd.pages[i] = page; -- GitLab From 5dc4cd2688e39b0286e9a4b815c9bf5e71b5a685 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 26 Dec 2017 20:07:34 -0500 Subject: [PATCH 0665/1398] tracing: Fix possible double free on failure of allocating trace buffer commit 4397f04575c44e1440ec2e49b6302785c95fd2f8 upstream. Jing Xia and Chunyan Zhang reported that on failing to allocate part of the tracing buffer, memory is freed, but the pointers that point to them are not initialized back to NULL, and later paths may try to free the freed memory again. Jing and Chunyan fixed one of the locations that does this, but missed a spot. Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Reported-by: Jing Xia Reported-by: Chunyan Zhang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0ad5a2ad656..bbc117d73d19 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6955,6 +6955,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); + buf->buffer = NULL; return -ENOMEM; } -- GitLab From 81e155e7b023dd8967ddead36edf8a6a0e2ba72b Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Tue, 26 Dec 2017 15:12:53 +0800 Subject: [PATCH 0666/1398] tracing: Fix crash when it fails to alloc ring buffer commit 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 upstream. Double free of the ring buffer happens when it fails to alloc new ring buffer instance for max_buffer if TRACER_MAX_TRACE is configured. The root cause is that the pointer is not set to NULL after the buffer is freed in allocate_trace_buffers(), and the freeing of the ring buffer is invoked again later if the pointer is not equal to Null, as: instance_mkdir() |-allocate_trace_buffers() |-allocate_trace_buffer(tr, &tr->trace_buffer...) |-allocate_trace_buffer(tr, &tr->max_buffer...) // allocate fail(-ENOMEM),first free // and the buffer pointer is not set to null |-ring_buffer_free(tr->trace_buffer.buffer) // out_free_tr |-free_trace_buffers() |-free_trace_buffer(&tr->trace_buffer); //if trace_buffer is not null, free again |-ring_buffer_free(buf->buffer) |-rb_free_cpu_buffer(buffer->buffers[cpu]) // ring_buffer_per_cpu is null, and // crash in ring_buffer_per_cpu->pages Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Signed-off-by: Jing Xia Signed-off-by: Chunyan Zhang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bbc117d73d19..15b02645ce8b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6979,7 +6979,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) allocate_snapshot ? size : 1); if (WARN_ON(ret)) { ring_buffer_free(tr->trace_buffer.buffer); + tr->trace_buffer.buffer = NULL; free_percpu(tr->trace_buffer.data); + tr->trace_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; -- GitLab From 2e0d458c315ab2bee425212725aae841e0f6dd6a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 20:32:35 -0500 Subject: [PATCH 0667/1398] ring-buffer: Mask out the info bits when returning buffer page length commit 45d8b80c2ac5d21cd1e2954431fb676bc2b1e099 upstream. Two info bits were added to the "commit" part of the ring buffer data page when returned to be consumed. This was to inform the user space readers that events have been missed, and that the count may be stored at the end of the page. What wasn't handled, was the splice code that actually called a function to return the length of the data in order to zero out the rest of the page before sending it up to user space. These data bits were returned with the length making the value negative, and that negative value was not checked. It was compared to PAGE_SIZE, and only used if the size was less than PAGE_SIZE. Luckily PAGE_SIZE is unsigned long which made the compare an unsigned compare, meaning the negative size value did not end up causing a large portion of memory to be randomly zeroed out. Fixes: 66a8cb95ed040 ("ring-buffer: Add place holder recording of dropped events") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f5c016e8fc88..3e1d11f4fe44 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) +#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) */ size_t ring_buffer_page_len(void *page) { - return local_read(&((struct buffer_data_page *)page)->commit) + struct buffer_data_page *bpage = page; + + return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) + BUF_PAGE_HDR_SIZE; } -- GitLab From 72d5e020c0ef77ba86f2a5ff37620b65ba7d5daf Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 18 Dec 2017 13:10:00 -0800 Subject: [PATCH 0668/1398] iw_cxgb4: Only validate the MSN for successful completions commit f55688c45442bc863f40ad678c638785b26cdce6 upstream. If the RECV CQE is in error, ignore the MSN check. This was causing recvs that were flushed into the sw cq to be completed with the wrong status (BAD_MSN instead of FLUSHED). Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/cq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 19c6477af19f..a856371bbe58 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -575,10 +575,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, ret = -EAGAIN; goto skip_cqe; } - if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + if (unlikely(!CQE_STATUS(hw_cqe) && + CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { t4_set_wq_in_error(wq); - hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); - goto proc_cqe; + hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; } -- GitLab From 125e81b5afcb50dc48c7c2058631e8fd2b10a1ba Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 8 Dec 2017 16:15:20 +0000 Subject: [PATCH 0669/1398] ASoC: wm_adsp: Fix validation of firmware and coeff lengths commit 50dd2ea8ef67a1617e0c0658bcbec4b9fb03b936 upstream. The checks for whether another region/block header could be present are subtracting the size from the current offset. Obviously we should instead subtract the offset from the size. The checks for whether the region/block data fit in the file are adding the data size to the current offset and header size, without checking for integer overflow. Rearrange these so that overflow is impossible. Signed-off-by: Ben Hutchings Acked-by: Charles Keepax Tested-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm_adsp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 757af795cebd..c03c9da076c2 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1465,7 +1465,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) le64_to_cpu(footer->timestamp)); while (pos < firmware->size && - pos - firmware->size > sizeof(*region)) { + sizeof(*region) < firmware->size - pos) { region = (void *)&(firmware->data[pos]); region_name = "Unknown"; reg = 0; @@ -1526,8 +1526,8 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); - if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > - firmware->size) { + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, regions, region_name, @@ -1992,7 +1992,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) blocks = 0; while (pos < firmware->size && - pos - firmware->size > sizeof(*blk)) { + sizeof(*blk) < firmware->size - pos) { blk = (void *)(&firmware->data[pos]); type = le16_to_cpu(blk->type); @@ -2066,8 +2066,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { - if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > - firmware->size) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, blocks, region_name, -- GitLab From 35f87d45cba354936430f93feba7b46edaf35e5c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 12:12:55 +0100 Subject: [PATCH 0670/1398] ASoC: da7218: fix fix child-node lookup commit bc6476d6c1edcb9b97621b5131bd169aa81f27db upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent codec node was also prematurely freed. Fixes: 4d50934abd22 ("ASoC: da7218: Add da7218 codec driver") Signed-off-by: Johan Hovold Acked-by: Adam Thomson Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/da7218.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index c69e97654fc6..f88632426c0a 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2519,7 +2519,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) } if (da7218->dev_id == DA7218_DEV_ID) { - hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); + hpldet_np = of_get_child_by_name(np, "da7218_hpldet"); if (!hpldet_np) return pdata; -- GitLab From 00add00ed2c0d59aceaf83981afc00228f357387 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 20 Nov 2017 23:14:55 +0100 Subject: [PATCH 0671/1398] ASoC: fsl_ssi: AC'97 ops need regmap, clock and cleaning up on failure commit 695b78b548d8a26288f041e907ff17758df9e1d5 upstream. AC'97 ops (register read / write) need SSI regmap and clock, so they have to be set after them. We also need to set these ops back to NULL if we fail the probe. Signed-off-by: Maciej S. Szmigiero Acked-by: Nicolin Chen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_ssi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index fde08660b63b..1c03490e1182 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1467,12 +1467,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi_private; - - ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); - if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); - return ret; - } } else { /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1583,6 +1577,14 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ret; } + if (fsl_ssi_is_ac97(ssi_private)) { + ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); + if (ret) { + dev_err(&pdev->dev, "could not set AC'97 ops\n"); + goto error_ac97_ops; + } + } + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, &ssi_private->cpu_dai_drv, 1); if (ret) { @@ -1666,6 +1668,10 @@ static int fsl_ssi_probe(struct platform_device *pdev) fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); error_asoc_register: + if (fsl_ssi_is_ac97(ssi_private)) + snd_soc_set_ac97_ops(NULL); + +error_ac97_ops: if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); -- GitLab From b04640a450d3306eb350ef758336ce7385471646 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 12:12:56 +0100 Subject: [PATCH 0672/1398] ASoC: twl4030: fix child-node lookup commit 15f8c5f2415bfac73f33a14bcd83422bcbfb5298 upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent codec node was also prematurely freed, while the child node was leaked. Fixes: 2d6d649a2e0f ("ASoC: twl4030: Support for DT booted kernel") Signed-off-by: Johan Hovold Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/twl4030.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index a2104d68169d..26fd6a664b9b 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); struct device_node *twl4030_codec_node = NULL; - twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, + twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node, "codec"); if (!pdata && twl4030_codec_node) { @@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) GFP_KERNEL); if (!pdata) { dev_err(codec->dev, "Can not allocate memory\n"); + of_node_put(twl4030_codec_node); return NULL; } twl4030_setup_pdata_of(pdata, twl4030_codec_node); + of_node_put(twl4030_codec_node); } return pdata; -- GitLab From d30d1761bc433d5ab5b620129b0efa3d4a7c6e52 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:46 -0600 Subject: [PATCH 0673/1398] ASoC: tlv320aic31xx: Fix GPIO1 register definition commit 737e0b7b67bdfe24090fab2852044bb283282fc5 upstream. GPIO1 control register is number 51, fix this here. Fixes: bafcbfe429eb ("ASoC: tlv320aic31xx: Make the register values human readable") Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tlv320aic31xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 5acd5b69fb83..f9b6c5a81b47 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -115,7 +115,7 @@ struct aic31xx_pdata { /* INT2 interrupt control */ #define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) #define AIC31XX_DACPRB AIC31XX_REG(0, 60) /* ADC Instruction Set Register */ -- GitLab From a1dbcd823a302fa90766ca4eb80374031d4dea72 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Dec 2017 10:45:07 +0100 Subject: [PATCH 0674/1398] ALSA: hda: Drop useless WARN_ON() commit a36c2638380c0a4676647a1f553b70b20d3ebce1 upstream. Since the commit 97cc2ed27e5a ("ALSA: hda - Fix yet another i915 pointer leftover in error path") cleared hdac_acomp pointer, the WARN_ON() non-NULL check in snd_hdac_i915_register_notifier() may give a false-positive warning, as the function gets called no matter whether the component is registered or not. For fixing it, let's get rid of the spurious WARN_ON(). Fixes: 97cc2ed27e5a ("ALSA: hda - Fix yet another i915 pointer leftover in error path") Reported-by: Kouta Okamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdac_i915.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index c9af022676c2..47c3e97c3136 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -319,7 +319,7 @@ static int hdac_component_master_match(struct device *dev, void *data) */ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) { - if (WARN_ON(!hdac_acomp)) + if (!hdac_acomp) return -ENODEV; hdac_acomp->audio_ops = aops; -- GitLab From 65ca46e5fe123ebd5128f0af1eaaef35251d4bc4 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:45 +0800 Subject: [PATCH 0675/1398] ALSA: hda - fix headset mic detection issue on a Dell machine commit 285d5ddcffafa5d5e68c586f4c9eaa8b24a2897d upstream. It has the codec alc256, and add its pin definition to pin quirk table to let it apply ALC255_FIXUP_DELL1_MIC_NO_PRESENCE. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ba40596b9d92..4ef3b0067876 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5971,6 +5971,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x1b, 0x01011020}, {0x21, 0x02211010}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, -- GitLab From 72b812d5b874893070f81afa2d4b4ccc7f0efa5f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Apr 2017 00:01:19 -0700 Subject: [PATCH 0676/1398] x86/vm86/32: Switch to flush_tlb_mm_range() in mark_screen_rdonly() commit 9ccee2373f0658f234727700e619df097ba57023 upstream. mark_screen_rdonly() is the last remaining caller of flush_tlb(). flush_tlb_mm_range() is potentially faster and isn't obsolete. Compile-tested only because I don't know whether software that uses this mechanism even exists. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Michal Hocko Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sasha Levin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/791a644076fc3577ba7f7b7cafd643cc089baa7d.1492844372.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/vm86_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 01f30e56f99e..4b3012888ada 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -191,7 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) pte_unmap_unlock(pte, ptl); out: up_write(&mm->mmap_sem); - flush_tlb(); + flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL); } -- GitLab From 219acedb061c56a46e5cf562cb072cd2815a1263 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Apr 2017 00:01:20 -0700 Subject: [PATCH 0677/1398] x86/mm: Remove flush_tlb() and flush_tlb_current_task() commit 29961b59a51f8c6838a26a45e871a7ed6771809b upstream. I was trying to figure out what how flush_tlb_current_task() would possibly work correctly if current->mm != current->active_mm, but I realized I could spare myself the effort: it has no callers except the unused flush_tlb() macro. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Michal Hocko Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 9 --------- arch/x86/mm/tlb.c | 17 ----------------- 2 files changed, 26 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fc5abff9b7fd..8e7ae9e6c59a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -205,7 +205,6 @@ static inline void __flush_tlb_one(unsigned long addr) /* * TLB flushing: * - * - flush_tlb() flushes the current mm struct TLBs * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page @@ -237,11 +236,6 @@ static inline void flush_tlb_all(void) __flush_tlb_all(); } -static inline void flush_tlb(void) -{ - __flush_tlb_up(); -} - static inline void local_flush_tlb(void) { __flush_tlb_up(); @@ -303,14 +297,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); extern void flush_tlb_page(struct vm_area_struct *, unsigned long); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -#define flush_tlb() flush_tlb_current_task() - void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, unsigned long end); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 75fb01109f94..30ed9a5a667e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -287,23 +287,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, smp_call_function_many(cpumask, flush_tlb_func, &info, 1); } -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - - /* This is an implicit full barrier that synchronizes with switch_mm. */ - local_flush_tlb(); - - trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); - preempt_enable(); -} - /* * See Documentation/x86/tlb.txt for details. We choose 33 * because it is large enough to cover the vast majority (at -- GitLab From 113980c002eb092a1f947389c933740193ada766 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Apr 2017 00:01:21 -0700 Subject: [PATCH 0678/1398] x86/mm: Make flush_tlb_mm_range() more predictable commit ce27374fabf553153c3f53efcaa9bfab9216bd8c upstream. I'm about to rewrite the function almost completely, but first I want to get a functional change out of the way. Currently, if flush_tlb_mm_range() does not flush the local TLB at all, it will never do individual page flushes on remote CPUs. This seems to be an accident, and preserving it will be awkward. Let's change it first so that any regressions in the rewrite will be easier to bisect and so that the rewrite can attempt to change no visible behavior at all. The fix is simple: we can simply avoid short-circuiting the calculation of base_pages_to_flush. As a side effect, this also eliminates a potential corner case: if tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range() could have ended up flushing the entire address space one page at a time. Signed-off-by: Andy Lutomirski Acked-by: Dave Hansen Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Michal Hocko Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 30ed9a5a667e..6e7bedf69af7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -307,6 +307,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); + + if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) + base_pages_to_flush = (end - start) >> PAGE_SHIFT; + if (base_pages_to_flush > tlb_single_page_flush_ceiling) + base_pages_to_flush = TLB_FLUSH_ALL; + if (current->active_mm != mm) { /* Synchronize with switch_mm. */ smp_mb(); @@ -323,15 +329,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, goto out; } - if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) - base_pages_to_flush = (end - start) >> PAGE_SHIFT; - /* * Both branches below are implicit full barriers (MOV to CR or * INVLPG) that synchronize with switch_mm. */ - if (base_pages_to_flush > tlb_single_page_flush_ceiling) { - base_pages_to_flush = TLB_FLUSH_ALL; + if (base_pages_to_flush == TLB_FLUSH_ALL) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { -- GitLab From a94af050080f3301eecb4e7f7bbe59899568446d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 22 May 2017 15:30:01 -0700 Subject: [PATCH 0679/1398] x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range() commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream. flush_tlb_page() was very similar to flush_tlb_mm_range() except that it had a couple of issues: - It was missing an smp_mb() in the case where current->active_mm != mm. (This is a longstanding bug reported by Nadav Amit) - It was missing tracepoints and vm counter updates. The only reason that I can see for keeping it at as a separate function is that it could avoid a few branches that flush_tlb_mm_range() needs to decide to flush just one page. This hardly seems worthwhile. If we decide we want to get rid of those branches again, a better way would be to introduce an __flush_tlb_mm_range() helper and make both flush_tlb_page() and flush_tlb_mm_range() use it. Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Cc: Andrew Morton Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Nadav Amit Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 6 +++++- arch/x86/mm/tlb.c | 27 --------------------------- 2 files changed, 5 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8e7ae9e6c59a..abcd615ea27e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -297,11 +297,15 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) extern void flush_tlb_all(void); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) +{ + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); +} + void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, unsigned long end); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6e7bedf69af7..fe6471132ea3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -354,33 +354,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, preempt_enable(); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) { - /* - * Implicit full barrier (INVLPG) that synchronizes - * with switch_mm. - */ - __flush_tlb_one(start); - } else { - leave_mm(smp_processor_id()); - - /* Synchronize with switch_mm. */ - smp_mb(); - } - } - - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE); - - preempt_enable(); -} - static void do_flush_tlb_all(void *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); -- GitLab From 3e5daacf65173987436bad6ab9039a05f9545cdd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 May 2017 10:00:14 -0700 Subject: [PATCH 0680/1398] x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream. The UP asm/tlbflush.h generates somewhat nicer code than the SMP version. Aside from that, it's fallen quite a bit behind the SMP code: - flush_tlb_mm_range() didn't flush individual pages if the range was small. - The lazy TLB code was much weaker. This usually wouldn't matter, but, if a kernel thread flushed its lazy "active_mm" more than once (due to reclaim or similar), it wouldn't be unlazied and would instead pointlessly flush repeatedly. - Tracepoints were missing. Aside from that, simply having the UP code around was a maintanence burden, since it means that any change to the TLB flush code had to make sure not to break it. Simplify everything by deleting the UP code. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Nadav Amit Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/mmu.h | 6 --- arch/x86/include/asm/mmu_context.h | 2 - arch/x86/include/asm/tlbflush.h | 78 +----------------------------- arch/x86/mm/init.c | 2 - arch/x86/mm/tlb.c | 17 +------ 7 files changed, 5 insertions(+), 104 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b9c546a305a4..da8156fd3d58 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,7 +45,7 @@ config X86 select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 59405a248fc2..9b76cd331990 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,8 +22,8 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - unsigned int irq_tlb_count; #endif + unsigned int irq_tlb_count; #ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; #endif diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 72198c64e646..8b272a08d1a8 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -33,12 +33,6 @@ typedef struct { #endif } mm_context_t; -#ifdef CONFIG_SMP void leave_mm(int cpu); -#else -static inline void leave_mm(int cpu) -{ -} -#endif #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index f9dd22469388..d23e35584f15 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#ifdef CONFIG_SMP if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif } static inline int init_new_context(struct task_struct *tsk, diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index abcd615ea27e..12dedd6c9e42 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,7 @@ #include #include #include +#include static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) @@ -65,10 +66,8 @@ static inline void invpcid_flush_all_nonglobals(void) #endif struct tlb_state { -#ifdef CONFIG_SMP struct mm_struct *active_mm; int state; -#endif /* * Access to this CR4 shadow and to H/W CR4 is protected by @@ -216,79 +215,6 @@ static inline void __flush_tlb_one(unsigned long addr) * and page-granular flushes are available only on i486 and up. */ -#ifndef CONFIG_SMP - -/* "_up" is for UniProcessor. - * - * This is a helper for other header functions. *Not* intended to be called - * directly. All global TLB flushes need to either call this, or to bump the - * vm statistics themselves. - */ -static inline void __flush_tlb_up(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); -} - -static inline void flush_tlb_all(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb_all(); -} - -static inline void local_flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_mm_range(struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned long vmflag) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ -} - -static inline void reset_lazy_tlbstate(void) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#else /* SMP */ - -#include - #define local_flush_tlb() __flush_tlb() #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) @@ -319,8 +245,6 @@ static inline void reset_lazy_tlbstate(void) this_cpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif /* SMP */ - #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, mm, start, end) \ native_flush_tlb_others(mask, mm, start, end) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 889e7619a091..0381638168d1 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -764,10 +764,8 @@ void __init zone_sizes_init(void) } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { -#ifdef CONFIG_SMP .active_mm = &init_mm, .state = 0, -#endif .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; EXPORT_SYMBOL_GPL(cpu_tlbstate); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index fe6471132ea3..53b72fb4e781 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -15,7 +15,7 @@ #include /* - * Smarter SMP flushing macros. + * TLB flushing, formerly SMP-only * c/o Linus Torvalds. * * These mean you can really definitely utterly forget about @@ -28,8 +28,6 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ -#ifdef CONFIG_SMP - struct flush_tlb_info { struct mm_struct *flush_mm; unsigned long flush_start; @@ -59,8 +57,6 @@ void leave_mm(int cpu) } EXPORT_SYMBOL_GPL(leave_mm); -#endif /* CONFIG_SMP */ - void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -91,10 +87,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, set_pgd(pgd, init_mm.pgd[stack_pgd_index]); } -#ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); -#endif cpumask_set_cpu(cpu, mm_cpumask(next)); @@ -152,9 +146,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (unlikely(prev->context.ldt != next->context.ldt)) load_mm_ldt(next); #endif - } -#ifdef CONFIG_SMP - else { + } else { this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); @@ -181,11 +173,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, load_mm_ldt(next); } } -#endif } -#ifdef CONFIG_SMP - /* * The flush IPI assumes that a thread switch happens in this order: * [cpu0: the cpu that switches] @@ -438,5 +427,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -#endif /* CONFIG_SMP */ -- GitLab From 1e7f3d8875ee92983bd754982af7f4f044dd057d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 29 Jun 2017 08:53:19 -0700 Subject: [PATCH 0681/1398] x86/mm: Disable PCID on 32-bit kernels commit cba4671af7550e008f7a7835f06df0763825bf3e upstream. 32-bit kernels on new hardware will see PCID in CPUID, but PCID can only be used in 64-bit mode. Rather than making all PCID code conditional, just disable the feature on 32-bit builds. Signed-off-by: Andy Lutomirski Reviewed-by: Nadav Amit Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/2e391769192a4d31b808410c383c6bf0734bc6ea.1498751203.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/disabled-features.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 85599ad4d024..21c5ac15657b 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -21,11 +21,13 @@ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +# define DISABLE_PCID 0 #else # define DISABLE_VME 0 # define DISABLE_K6_MTRR 0 # define DISABLE_CYRIX_ARR 0 # define DISABLE_CENTAUR_MCR 0 +# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -43,7 +45,7 @@ #define DISABLED_MASK1 0 #define DISABLED_MASK2 0 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 0 +#define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 0 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bd17db15a2c1..0b6124315441 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -19,6 +19,14 @@ void __init check_bugs(void) { +#ifdef CONFIG_X86_32 + /* + * Regardless of whether PCID is enumerated, the SDM says + * that it can't be enabled in 32-bit mode. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); +#endif + identify_boot_cpu(); #ifndef CONFIG_SMP pr_info("CPU: "); -- GitLab From e6a29320de005792c06f2deec2c77b19c42d1e6a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 29 Jun 2017 08:53:20 -0700 Subject: [PATCH 0682/1398] x86/mm: Add the 'nopcid' boot option to turn off PCID commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream. The parameter is only present on x86_64 systems to save a few bytes, as PCID is always disabled on x86_32. Signed-off-by: Andy Lutomirski Reviewed-by: Nadav Amit Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 86a6746f6833..152ec4e87b57 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2795,6 +2795,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nopat [X86] Disable PAT (page attribute table extension of pagetables) support. + nopcid [X86-64] Disable the PCID cpu feature. + norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4eece91ada37..81c8a535175d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -163,6 +163,24 @@ static int __init x86_mpx_setup(char *s) } __setup("nompx", x86_mpx_setup); +#ifdef CONFIG_X86_64 +static int __init x86_pcid_setup(char *s) +{ + /* require an exact match without trailing characters */ + if (strlen(s)) + return 0; + + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_PCID)) + return 1; + + setup_clear_cpu_cap(X86_FEATURE_PCID); + pr_info("nopcid: PCID feature disabled\n"); + return 1; +} +__setup("nopcid", x86_pcid_setup); +#endif + static int __init x86_noinvpcid_setup(char *s) { /* noinvpcid doesn't accept parameters */ -- GitLab From b52f937eccd4c68000ba80cd03609bcf1f97c141 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 29 Jun 2017 08:53:21 -0700 Subject: [PATCH 0683/1398] x86/mm: Enable CR4.PCIDE on supported systems commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream. We can use PCID if the CPU has PCID and PGE and we're not on Xen. By itself, this has no effect. A followup patch will start using PCID. Signed-off-by: Andy Lutomirski Reviewed-by: Nadav Amit Reviewed-by: Boris Ostrovsky Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: Juergen Gross Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org Signed-off-by: Ingo Molnar Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 8 ++++++++ arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++ arch/x86/xen/enlighten.c | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 12dedd6c9e42..7d2ea6b1f7d9 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -191,6 +191,14 @@ static inline void __flush_tlb_all(void) __flush_tlb_global(); else __flush_tlb(); + + /* + * Note: if we somehow had PCID but not PGE, then this wouldn't work -- + * we'd end up flushing kernel translations for the current ASID but + * we might fail to flush kernel translations for other cached ASIDs. + * + * To avoid this issue, we force PCID off if PGE is off. + */ } static inline void __flush_tlb_one(unsigned long addr) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 81c8a535175d..91588be529b9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -324,6 +324,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } } +static void setup_pcid(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PCID)) { + if (cpu_has(c, X86_FEATURE_PGE)) { + cr4_set_bits(X86_CR4_PCIDE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't + * work if PCID is on but PGE is not. Since that + * combination doesn't exist on real hardware, there's + * no reason to try to fully support it, but it's + * polite to avoid corrupting data if we're on + * an improperly configured VM. + */ + clear_cpu_cap(c, X86_FEATURE_PCID); + } + } +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -1082,6 +1101,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_smep(c); setup_smap(c); + /* Set up PCID */ + setup_pcid(c); + /* * The vendor-specific functions might have changed features. * Now we do "generic changes." diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8f1f7efa848c..2bea87cc0ff2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -444,6 +444,12 @@ static void __init xen_init_cpuid_mask(void) ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + /* + * Xen PV would need some work to support PCID: CR3 handling as well + * as xen_flush_tlb_others() would need updating. + */ + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */ + if (!xen_initial_domain()) cpuid_leaf1_edx_mask &= ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ -- GitLab From 04bdf71d9f7412364e9bae93f321e6bf776cd1d4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 8 Oct 2017 21:53:05 -0700 Subject: [PATCH 0684/1398] x86/mm/64: Fix reboot interaction with CR4.PCIDE commit 924c6b900cfdf376b07bccfd80e62b21914f8a5a upstream. Trying to reboot via real mode fails with PCID on: long mode cannot be exited while CR4.PCIDE is set. (No, I have no idea why, but the SDM and actual CPUs are in agreement here.) The result is a GPF and a hang instead of a reboot. I didn't catch this in testing because neither my computer nor my VM reboots this way. I can trigger it with reboot=bios, though. Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems") Reported-and-tested-by: Steven Rostedt (VMware) Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.1507524746.git.luto@kernel.org Cc: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/reboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 067f9813fd2c..ce020a69bba9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -106,6 +106,10 @@ void __noreturn machine_real_restart(unsigned int type) load_cr3(initial_page_table); #else write_cr3(real_mode_header->trampoline_pgd); + + /* Exiting long mode will fail if CR4.PCIDE is set. */ + if (static_cpu_has(X86_FEATURE_PCID)) + cr4_clear_bits(X86_CR4_PCIDE); #endif /* Jump to the identity-mapped low memory code */ -- GitLab From b929ccccbc8c22736d1fd722da181e275465f5a6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 29 Dec 2017 17:34:43 -0800 Subject: [PATCH 0685/1398] kbuild: add '-fno-stack-check' to kernel build options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3ce120b16cc548472f80cf8644f90eda958cf1b6 upstream. It appears that hardened gentoo enables "-fstack-check" by default for gcc. That doesn't work _at_all_ for the kernel, because the kernel stack doesn't act like a user stack at all: it's much smaller, and it doesn't auto-expand on use. So the extra "probe one page below the stack" code generated by -fstack-check just breaks the kernel in horrible ways, causing infinite double faults etc. [ I have to say, that the particular code gcc generates looks very stupid even for user space where it works, but that's a separate issue. ] Reported-and-tested-by: Alexander Tsoy Reported-and-tested-by: Toralf Förster Cc: Dave Hansen Cc: Jiri Kosina Cc: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 64eb0bf614ee..dc9012a6d14b 100644 --- a/Makefile +++ b/Makefile @@ -788,6 +788,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# Make sure -fstack-check isn't enabled (like gentoo apparently did) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) + # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) -- GitLab From c2f78bf8ca3aa18e51f1de41093db5adae2c5f34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Dec 2017 07:17:39 -0800 Subject: [PATCH 0686/1398] ipv4: igmp: guard against silly MTU values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b5476022bbada3764609368f03329ca287528dc8 ] IPv4 stack reacts to changes to small MTU, by disabling itself under RTNL. But there is a window where threads not using RTNL can see a wrong device mtu. This can lead to surprises, in igmp code where it is assumed the mtu is suitable. Fix this by reading device mtu once and checking IPv4 minimal MTU. This patch adds missing IPV4_MIN_MTU define, to not abuse ETH_MIN_MTU anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 2 ++ net/ipv4/devinet.c | 2 +- net/ipv4/igmp.c | 24 +++++++++++++++--------- net/ipv4/ip_tunnel.c | 4 ++-- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 51c6b9786c46..0e3dcd5a134d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,6 +33,8 @@ #include #include +#define IPV4_MIN_MTU 68 /* RFC 791 */ + struct sock; struct inet_skb_parm { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 062a67ca9a21..f08f984ebc56 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1380,7 +1380,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) static bool inetdev_valid_mtu(unsigned int mtu) { - return mtu >= 68; + return mtu >= IPV4_MIN_MTU; } static void inetdev_send_gratuitous_arp(struct net_device *dev, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08575e3bd135..49e070425502 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -404,16 +404,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, - int type, struct igmpv3_grec **ppgr) + int type, struct igmpv3_grec **ppgr, unsigned int mtu) { struct net_device *dev = pmc->interface->dev; struct igmpv3_report *pih; struct igmpv3_grec *pgr; - if (!skb) - skb = igmpv3_newpack(dev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = igmpv3_newpack(dev, mtu); + if (!skb) + return NULL; + } pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -436,12 +437,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, struct igmpv3_grec *pgr = NULL; struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV4_MIN_MTU) + return skb; + isquery = type == IGMPV3_MODE_IS_INCLUDE || type == IGMPV3_MODE_IS_EXCLUDE; truncate = type == IGMPV3_MODE_IS_EXCLUDE || @@ -462,7 +468,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); } } first = 1; @@ -498,12 +504,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, pgr->grec_nsrcs = htons(scount); if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -538,7 +544,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd7f1836bb70..96536a0d6e2d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -346,8 +346,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); - if (mtu < 68) - mtu = 68; + if (mtu < IPV4_MIN_MTU) + mtu = IPV4_MIN_MTU; return mtu; } -- GitLab From f6d7cdbb028506040b951768565892a4eff0a330 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Dec 2017 07:03:38 -0800 Subject: [PATCH 0687/1398] ipv6: mcast: better catch silly mtu values [ Upstream commit b9b312a7a451e9c098921856e7cfbc201120e1a7 ] syzkaller reported crashes in IPv6 stack [1] Xin Long found that lo MTU was set to silly values. IPv6 stack reacts to changes to small MTU, by disabling itself under RTNL. But there is a window where threads not using RTNL can see a wrong device mtu. This can lead to surprises, in mld code where it is assumed the mtu is suitable. Fix this by reading device mtu once and checking IPv6 minimal MTU. [1] skbuff: skb_over_panic: text:0000000010b86b8d len:196 put:20 head:000000003b477e60 data:000000000e85441e tail:0xd4 end:0xc0 dev:lo ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc2-mm1+ #39 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_panic+0x15c/0x1f0 net/core/skbuff.c:100 RSP: 0018:ffff8801db307508 EFLAGS: 00010286 RAX: 0000000000000082 RBX: ffff8801c517e840 RCX: 0000000000000000 RDX: 0000000000000082 RSI: 1ffff1003b660e61 RDI: ffffed003b660e95 RBP: ffff8801db307570 R08: 1ffff1003b660e23 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff85bd4020 R13: ffffffff84754ed2 R14: 0000000000000014 R15: ffff8801c4e26540 FS: 0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000463610 CR3: 00000001c6698000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_over_panic net/core/skbuff.c:109 [inline] skb_put+0x181/0x1c0 net/core/skbuff.c:1694 add_grhead.isra.24+0x42/0x3b0 net/ipv6/mcast.c:1695 add_grec+0xa55/0x1060 net/ipv6/mcast.c:1817 mld_send_cr net/ipv6/mcast.c:1903 [inline] mld_ifc_timer_expire+0x4d2/0x770 net/ipv6/mcast.c:2448 call_timer_fn+0x23b/0x840 kernel/time/timer.c:1320 expire_timers kernel/time/timer.c:1357 [inline] __run_timers+0x7e1/0xb60 kernel/time/timer.c:1660 run_timer_softirq+0x4c/0xb0 kernel/time/timer.c:1686 __do_softirq+0x29d/0xbb2 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1d3/0x210 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:540 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:920 Signed-off-by: Eric Dumazet Reported-by: syzbot Tested-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1bdc703cb966..ca8fac6e5a09 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, struct mld2_grec **ppgr) + int type, struct mld2_grec **ppgr, unsigned int mtu) { - struct net_device *dev = pmc->idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr; - if (!skb) - skb = mld_newpack(pmc->idev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = mld_newpack(pmc->idev, mtu); + if (!skb) + return NULL; + } pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV6_MIN_MTU) + return skb; + isquery = type == MLD2_MODE_IS_INCLUDE || type == MLD2_MODE_IS_EXCLUDE; truncate = type == MLD2_MODE_IS_EXCLUDE || @@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); } } first = 1; @@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -1814,7 +1819,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, mld_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) -- GitLab From 930882f8b83105eb6e4827bf87b38ecaa47c404a Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Fri, 22 Dec 2017 17:12:09 +0800 Subject: [PATCH 0688/1398] net: fec: unmap the xmit buffer that are not transferred by DMA [ Upstream commit 178e5f57a8d8f8fc5799a624b96fc31ef9a29ffa ] The enet IP only support 32 bit, it will use swiotlb buffer to do dma mapping when xmit buffer DMA memory address is bigger than 4G in i.MX platform. After stress suspend/resume test, it will print out: log: [12826.352864] fec 5b040000.ethernet: swiotlb buffer is full (sz: 191 bytes) [12826.359676] DMA: Out of SW-IOMMU space for 191 bytes at device 5b040000.ethernet [12826.367110] fec 5b040000.ethernet eth0: Tx DMA memory map failed The issue is that the ready xmit buffers that are dma mapped but DMA still don't copy them into fifo, once MAC restart, these DMA buffers are not unmapped. So it should check the dma mapping buffer and unmap them. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 849b8712ec81..60f686684c4c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -813,6 +813,12 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); if (txq->tx_skbuff[i]) { dev_kfree_skb_any(txq->tx_skbuff[i]); txq->tx_skbuff[i] = NULL; -- GitLab From 2c1a0b2e2bac6a35121d9ea9547b4b66d6f1380b Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Mon, 11 Dec 2017 11:13:45 -0800 Subject: [PATCH 0689/1398] net: igmp: Use correct source address on IGMPv3 reports [ Upstream commit a46182b00290839fa3fa159d54fd3237bd8669f0 ] Closing a multicast socket after the final IPv4 address is deleted from an interface can generate a membership report that uses the source IP from a different interface. The following test script, run from an isolated netns, reproduces the issue: #!/bin/bash ip link add dummy0 type dummy ip link add dummy1 type dummy ip link set dummy0 up ip link set dummy1 up ip addr add 10.1.1.1/24 dev dummy0 ip addr add 192.168.99.99/24 dev dummy1 tcpdump -U -i dummy0 & socat EXEC:"sleep 2" \ UDP4-DATAGRAM:239.101.1.68:8889,ip-add-membership=239.0.1.68:10.1.1.1 & sleep 1 ip addr del 10.1.1.1/24 dev dummy0 sleep 5 kill %tcpdump RFC 3376 specifies that the report must be sent with a valid IP source address from the destination subnet, or from address 0.0.0.0. Add an extra check to make sure this is the case. Signed-off-by: Kevin Cernekee Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 49e070425502..7bff0c65046f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +/* source address selection per RFC 3376 section 4.2.13 */ +static __be32 igmpv3_get_srcaddr(struct net_device *dev, + const struct flowi4 *fl4) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return htonl(INADDR_ANY); + + for_ifa(in_dev) { + if (inet_ifa_match(fl4->saddr, ifa)) + return fl4->saddr; + } endfor_ifa(in_dev); + + return htonl(INADDR_ANY); +} + static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; @@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; - pip->saddr = fl4.saddr; + pip->saddr = igmpv3_get_srcaddr(dev, &fl4); pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); -- GitLab From 0b18782288a2f1c2a25e85d2553c15ea83bb5802 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Wed, 6 Dec 2017 12:12:27 -0800 Subject: [PATCH 0690/1398] netlink: Add netns check on taps [ Upstream commit 93c647643b48f0131f02e45da3bd367d80443291 ] Currently, a nlmon link inside a child namespace can observe systemwide netlink activity. Filter the traffic so that nlmon can only sniff netlink messages from its own netns. Test case: vpnns -- bash -c "ip link add nlmon0 type nlmon; \ ip link set nlmon0 up; \ tcpdump -i nlmon0 -q -w /tmp/nlmon.pcap -U" & sudo ip xfrm state add src 10.1.1.1 dst 10.1.1.2 proto esp \ spi 0x1 mode transport \ auth sha1 0x6162633132330000000000000000000000000000 \ enc aes 0x00000000000000000000000000000000 grep --binary abc123 /tmp/nlmon.pcap Signed-off-by: Kevin Cernekee Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1ff497bd9c20..e1c123d4cdda 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -261,6 +261,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, struct sock *sk = skb->sk; int ret = -ENOMEM; + if (!net_eq(dev_net(dev), sock_net(sk))) + return 0; + dev_hold(dev); if (is_vmalloc_addr(skb->head)) -- GitLab From 8baa58c5d536462977907773d351d656b8b6dd1e Mon Sep 17 00:00:00 2001 From: Sebastian Sjoholm Date: Mon, 11 Dec 2017 21:51:14 +0100 Subject: [PATCH 0691/1398] net: qmi_wwan: add Sierra EM7565 1199:9091 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aceef61ee56898cfa7b6960fb60b9326c3860441 ] Sierra Wireless EM7565 is an Qualcomm MDM9x50 based M.2 modem. The USB id is added to qmi_wwan.c to allow QMI communication with the EM7565. Signed-off-by: Sebastian Sjoholm Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 105fbfb47e3a..db65d9ad4488 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -907,6 +907,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ -- GitLab From b3b56038bab017847ac9e1c5610fc00567b51d00 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 12:10:21 -0800 Subject: [PATCH 0692/1398] net: reevalulate autoflowlabel setting after sysctl setting [ Upstream commit 513674b5a2c9c7a67501506419da5c3c77ac6f08 ] sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. If sockopt doesn't set autoflowlabel, outcome packets from the hosts are supposed to not include flowlabel. This is true for normal packet, but not for reset packet. The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed, so the sock will keep the old behavior in terms of auto flowlabel. Reset packet is suffering from this problem, because reset packet is sent from a special control socket, which is created at boot time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control socket will always have its ipv6_pinfo.autoflowlabel set, even after user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have flowlabel. Normal sock created before sysctl setting suffers from the same issue. We can't even turn off autoflowlabel unless we kill all socks in the hosts. To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the autoflowlabel setting from user, otherwise we always call ip6_default_np_autolabel() which has the new settings of sysctl. Note, this changes behavior a little bit. Before commit 42240901f7c4 (ipv6: Implement different admin modes for automatic flow labels), the autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, existing connection will change autoflowlabel behavior. After that commit, autoflowlabel behavior is sticky in the whole life of the sock. With this patch, the behavior isn't sticky again. Cc: Martin KaFai Lau Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: Shaohua Li Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ipv6.h | 3 ++- net/ipv6/af_inet6.c | 1 - net/ipv6/ip6_output.c | 12 ++++++++++-- net/ipv6/ipv6_sockglue.c | 1 + 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a0649973ee5b..b9dfca557a6c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -246,7 +246,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8285a1c108c9..5cad76f87536 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -209,7 +209,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6e01c9a8dfd3..506efba33a89 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -156,6 +156,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +{ + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); + else + return np->autoflowlabel; +} + /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified @@ -219,7 +227,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1691,7 +1699,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 38bee173dc2b..6e3871c7f8f7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -874,6 +874,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: np->autoflowlabel = valbool; + np->autoflowlabel_set = 1; retv = 0; break; } -- GitLab From 8b032bde28998122bd38cd9e7ee1e52cd15f4773 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 5 Dec 2017 21:29:37 +0200 Subject: [PATCH 0693/1398] ptr_ring: add barriers [ Upstream commit a8ceb5dbfde1092b466936bca0ff3be127ecf38e ] Users of ptr_ring expect that it's safe to give the data structure a pointer and have it be available to consumers, but that actually requires an smb_wmb or a stronger barrier. In absence of such barriers and on architectures that reorder writes, consumer might read an un=initialized value from an skb pointer stored in the skb array. This was observed causing crashes. To fix, add memory barriers. The barrier we use is a wmb, the assumption being that producers do not need to read the value so we do not need to order these reads. Reported-by: George Cherian Suggested-by: Jason Wang Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index b83507c0640c..e38f471a5402 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -99,12 +99,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. + * Callers are responsible for making sure pointer that is being queued + * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; + /* Make sure the pointer we are storing points to a valid data. */ + /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + smp_wmb(); + r->queue[r->producer++] = ptr; if (unlikely(r->producer >= r->size)) r->producer = 0; @@ -244,6 +250,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) if (ptr) __ptr_ring_discard_one(r); + /* Make sure anyone accessing data through the pointer is up to date. */ + /* Pairs with smp_wmb in __ptr_ring_produce. */ + smp_read_barrier_depends(); return ptr; } -- GitLab From 53288d82188ba3f69731283c78430121c246e737 Mon Sep 17 00:00:00 2001 From: Avinash Repaka Date: Thu, 21 Dec 2017 20:17:04 -0800 Subject: [PATCH 0694/1398] RDS: Check cmsg_len before dereferencing CMSG_DATA [ Upstream commit 14e138a86f6347c6199f610576d2e11c03bec5f0 ] RDS currently doesn't check if the length of the control message is large enough to hold the required data, before dereferencing the control message data. This results in following crash: BUG: KASAN: stack-out-of-bounds in rds_rdma_bytes net/rds/send.c:1013 [inline] BUG: KASAN: stack-out-of-bounds in rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 Read of size 8 at addr ffff8801c928fb70 by task syzkaller455006/3157 CPU: 0 PID: 3157 Comm: syzkaller455006 Not tainted 4.15.0-rc3+ #161 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 rds_rdma_bytes net/rds/send.c:1013 [inline] rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 ___sys_sendmsg+0x320/0x8b0 net/socket.c:2018 __sys_sendmmsg+0x1ee/0x620 net/socket.c:2108 SYSC_sendmmsg net/socket.c:2139 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2134 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x43fe49 RSP: 002b:00007fffbe244ad8 EFLAGS: 00000217 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fe49 RDX: 0000000000000001 RSI: 000000002020c000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004017b0 R13: 0000000000401840 R14: 0000000000000000 R15: 0000000000000000 To fix this, we verify that the cmsg_len is large enough to hold the data to be read, before proceeding further. Reported-by: syzbot Signed-off-by: Avinash Repaka Acked-by: Santosh Shilimkar Reviewed-by: Yuval Shaia Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/send.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/send.c b/net/rds/send.c index ad247dc71ebb..ef53d164e146 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1006,6 +1006,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { + if (cmsg->cmsg_len < + CMSG_LEN(sizeof(struct rds_rdma_args))) + return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } -- GitLab From a4bf8efd2bcb233c422adc663552515dc4ee0da8 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 7 Dec 2017 12:43:30 -0500 Subject: [PATCH 0695/1398] tcp_bbr: record "full bw reached" decision in new full_bw_reached bit [ Upstream commit c589e69b508d29ed8e644dfecda453f71c02ec27 ] This commit records the "full bw reached" decision in a new full_bw_reached bit. This is a pure refactor that does not change the current behavior, but enables subsequent fixes and improvements. In particular, this enables simple and clean fixes because the full_bw and full_bw_cnt can be unconditionally zeroed without worrying about forgetting that we estimated we filled the pipe in Startup. And it enables future improvements because multiple code paths can be used for estimating that we filled the pipe in Startup; any new code paths only need to set this bit when they think the pipe is full. Note that this fix intentionally reduces the width of the full_bw_cnt counter, since we have never used the most significant bit. Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index cb8db347c680..97f9cac98348 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -81,7 +81,8 @@ struct bbr { u32 lt_last_lost; /* LT intvl start: tp->lost */ u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_cnt:3, /* number of rounds without large bw gains */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ unused_b:5; @@ -151,7 +152,7 @@ static bool bbr_full_bw_reached(const struct sock *sk) { const struct bbr *bbr = inet_csk_ca(sk); - return bbr->full_bw_cnt >= bbr_full_bw_cnt; + return bbr->full_bw_reached; } /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ @@ -688,6 +689,7 @@ static void bbr_check_full_bw_reached(struct sock *sk, return; } ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; } /* If pipe is probably full, drain the queue and then enter steady-state. */ @@ -821,6 +823,7 @@ static void bbr_init(struct sock *sk) bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; + bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; bbr->cycle_mstamp.v64 = 0; -- GitLab From 7887a700ce613bcf200edda53ca824d691c6ee8f Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 11 Dec 2017 00:05:46 -0800 Subject: [PATCH 0696/1398] tcp md5sig: Use skb's saddr when replying to an incoming segment [ Upstream commit 30791ac41927ebd3e75486f9504b6d2280463bf0 ] The MD5-key that belongs to a connection is identified by the peer's IP-address. When we are in tcp_v4(6)_reqsk_send_ack(), we are replying to an incoming segment from tcp_check_req() that failed the seq-number checks. Thus, to find the correct key, we need to use the skb's saddr and not the daddr. This bug seems to have been there since quite a while, but probably got unnoticed because the consequences are not catastrophic. We will call tcp_v4_reqsk_send_ack only to send a challenge-ACK back to the peer, thus the connection doesn't really fail. Fixes: 9501f9722922 ("tcp md5sig: Let the caller pass appropriate key for tcp_v{4,6}_do_calc_md5_hash().") Signed-off-by: Christoph Paasch Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d577ec07a0d8..b3960738464e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -828,7 +828,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp, req->ts_recent, 0, - tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7ac2365aa6fb..eb624547382f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -962,7 +962,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), 0, 0); } -- GitLab From 484369ff9715ed4f7a2d7b3da27249674a25a523 Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 15 Dec 2017 15:21:50 -0600 Subject: [PATCH 0697/1398] tg3: Fix rx hang on MTU change with 5717/5719 [ Upstream commit 748a240c589824e9121befb1cba5341c319885bc ] This fixes a hang issue seen when changing the MTU size from 1500 MTU to 9000 MTU on both 5717 and 5719 chips. In discussion with Broadcom, they've indicated that these chipsets have the same phy as the 57766 chipset, so the same workarounds apply. This has been tested by IBM on both Power 8 and Power 9 systems as well as by Broadcom on x86 hardware and has been confirmed to resolve the hang issue. Signed-off-by: Brian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index edae2dcc4927..bb22d325e965 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14226,7 +14226,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) /* Reset PHY, otherwise the read DMA engine will be in a mode that * breaks all requests to 256 bytes. */ - if (tg3_asic_rev(tp) == ASIC_REV_57766) + if (tg3_asic_rev(tp) == ASIC_REV_57766 || + tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); -- GitLab From f75f910ffa90af17eeca18714847eaa0f16cb4ad Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Sun, 10 Dec 2017 03:50:58 +0000 Subject: [PATCH 0698/1398] net: ipv4: fix for a race condition in raw_sendmsg [ Upstream commit 8f659a03a0ba9289b9aeb9b4470e6fb263d6f483 ] inet->hdrincl is racy, and could lead to uninitialized stack pointer usage, so its value should be read only once. Fixes: c008ba5bdc9f ("ipv4: Avoid reading user iov twice after raw_probe_proto_opt") Signed-off-by: Mohamed Ghannam Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/raw.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9879b73d5565..59d8770055ed 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -502,11 +502,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; + int hdrincl; err = -EMSGSIZE; if (len > 0xFFFF) goto out; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields + */ + hdrincl = inet->hdrincl; /* * Check the flags. */ @@ -582,7 +587,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ - if (inet->hdrincl) + if (hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) @@ -604,12 +609,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | - (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), + (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!inet->hdrincl) { + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; @@ -634,7 +639,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); -- GitLab From 99cf2daf0da31b0b1853d1ccec9fbabc9d5856ae Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Wed, 6 Dec 2017 15:23:23 +0100 Subject: [PATCH 0699/1398] net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case [ Upstream commit 589bf32f09852041fbd3b7ce1a9e703f95c230ba ] add appropriate calls to clk_disable_unprepare() by jumping to out_mdio in case orion_mdio_probe() returns -EPROBE_DEFER. Found by Linux Driver Verification project (linuxtesting.org). Fixes: 3d604da1e954 ("net: mvmdio: get and enable optional clock") Signed-off-by: Tobias Jordan Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvmdio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index a0d1b084ecec..7aeb7fedb364 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -232,7 +232,8 @@ static int orion_mdio_probe(struct platform_device *pdev) dev->regs + MVMDIO_ERR_INT_MASK); } else if (dev->err_interrupt == -EPROBE_DEFER) { - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out_mdio; } mutex_init(&dev->lock); -- GitLab From ae67e5486b972610648cd8ad1abff4049e7e8b0c Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 22 Dec 2017 10:15:20 -0800 Subject: [PATCH 0700/1398] sctp: Replace use of sockets_allocated with specified macro. [ Upstream commit 8cb38a602478e9f806571f6920b0a3298aabf042 ] The patch(180d8cd942ce) replaces all uses of struct sock fields' memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem to accessor macros. But the sockets_allocated field of sctp sock is not replaced at all. Then replace it now for unifying the code. Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.") Cc: Glauber Costa Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c2ab864da50d..7181ce6c62bf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4246,7 +4246,7 @@ static int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); - percpu_counter_inc(&sctp_sockets_allocated); + sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); /* Nothing can fail after this block, otherwise @@ -4290,7 +4290,7 @@ static void sctp_destroy_sock(struct sock *sk) } sctp_endpoint_free(sp->ep); local_bh_disable(); - percpu_counter_dec(&sctp_sockets_allocated); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } -- GitLab From e51abae8458a36b24b18162873578ef788412191 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Wed, 6 Dec 2017 17:15:43 -0800 Subject: [PATCH 0701/1398] adding missing rcu_read_unlock in ipxip6_rcv [ Upstream commit 74c4b656c3d92ec4c824ea1a4afd726b7b6568c8 ] commit 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") introduced new exit point in ipxip6_rcv. however rcu_read_unlock is missing there. this diff is fixing this v1->v2: instead of doing rcu_read_unlock in place, we are going to "drop" section (to prevent skb leakage) Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Signed-off-by: Nikita V. Shirokov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 12b2fd512f32..11d22d642488 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -911,7 +911,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, if (t->parms.collect_md) { tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) - return 0; + goto drop; } ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); -- GitLab From e4f6698027751bdca656e90f568b9cc8f5e7f401 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 20 Dec 2017 19:34:19 +0200 Subject: [PATCH 0702/1398] ipv4: Fix use-after-free when flushing FIB tables [ Upstream commit b4681c2829e24943aadd1a7bb3a30d41d0a20050 ] Since commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") the local table uses the same trie allocated for the main table when custom rules are not in use. When a net namespace is dismantled, the main table is flushed and freed (via an RCU callback) before the local table. In case the callback is invoked before the local table is iterated, a use-after-free can occur. Fix this by iterating over the FIB tables in reverse order, so that the main table is always freed after the local table. v3: Reworded comment according to Alex's suggestion. v2: Add a comment to make the fix more explicit per Dave's and Alex's feedback. Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Signed-off-by: Ido Schimmel Reported-by: Fengguang Wu Acked-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 968d8e165e3d..ffae472e250a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1253,14 +1253,19 @@ static int __net_init ip_fib_net_init(struct net *net) static void ip_fib_net_exit(struct net *net) { - unsigned int i; + int i; rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + /* Destroy the tables in reverse order to guarantee that the + * local table, ID 255, is destroyed before the main table, ID + * 254. This is necessary as the local table may contain + * references to data contained in the main table. + */ + for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) { struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; struct fib_table *tb; -- GitLab From 243adaa4eaea5088aa5a8813b82bb6098ba6fb92 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 18 Dec 2017 17:35:09 +0200 Subject: [PATCH 0703/1398] net: bridge: fix early call to br_stp_change_bridge_id and plug newlink leaks [ Upstream commit 84aeb437ab98a2bce3d4b2111c79723aedfceb33 ] The early call to br_stp_change_bridge_id in bridge's newlink can cause a memory leak if an error occurs during the newlink because the fdb entries are not cleaned up if a different lladdr was specified, also another minor issue is that it generates fdb notifications with ifindex = 0. Another unrelated memory leak is the bridge sysfs entries which get added on NETDEV_REGISTER event, but are not cleaned up in the newlink error path. To remove this special case the call to br_stp_change_bridge_id is done after netdev register and we cleanup the bridge on changelink error via br_dev_delete to plug all leaks. This patch makes netlink bridge destruction on newlink error the same as dellink and ioctl del which is necessary since at that point we have a fully initialized bridge device. To reproduce the issue: $ ip l add br0 address 00:11:22:33:44:55 type bridge group_fwd_mask 1 RTNETLINK answers: Invalid argument $ rmmod bridge [ 1822.142525] ============================================================================= [ 1822.143640] BUG bridge_fdb_cache (Tainted: G O ): Objects remaining in bridge_fdb_cache on __kmem_cache_shutdown() [ 1822.144821] ----------------------------------------------------------------------------- [ 1822.145990] Disabling lock debugging due to kernel taint [ 1822.146732] INFO: Slab 0x0000000092a844b2 objects=32 used=2 fp=0x00000000fef011b0 flags=0x1ffff8000000100 [ 1822.147700] CPU: 2 PID: 13584 Comm: rmmod Tainted: G B O 4.15.0-rc2+ #87 [ 1822.148578] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 1822.150008] Call Trace: [ 1822.150510] dump_stack+0x78/0xa9 [ 1822.151156] slab_err+0xb1/0xd3 [ 1822.151834] ? __kmalloc+0x1bb/0x1ce [ 1822.152546] __kmem_cache_shutdown+0x151/0x28b [ 1822.153395] shutdown_cache+0x13/0x144 [ 1822.154126] kmem_cache_destroy+0x1c0/0x1fb [ 1822.154669] SyS_delete_module+0x194/0x244 [ 1822.155199] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 1822.155773] entry_SYSCALL_64_fastpath+0x23/0x9a [ 1822.156343] RIP: 0033:0x7f929bd38b17 [ 1822.156859] RSP: 002b:00007ffd160e9a98 EFLAGS: 00000202 ORIG_RAX: 00000000000000b0 [ 1822.157728] RAX: ffffffffffffffda RBX: 00005578316ba090 RCX: 00007f929bd38b17 [ 1822.158422] RDX: 00007f929bd9ec60 RSI: 0000000000000800 RDI: 00005578316ba0f0 [ 1822.159114] RBP: 0000000000000003 R08: 00007f929bff5f20 R09: 00007ffd160e8a11 [ 1822.159808] R10: 00007ffd160e9860 R11: 0000000000000202 R12: 00007ffd160e8a80 [ 1822.160513] R13: 0000000000000000 R14: 0000000000000000 R15: 00005578316ba090 [ 1822.161278] INFO: Object 0x000000007645de29 @offset=0 [ 1822.161666] INFO: Object 0x00000000d5df2ab5 @offset=128 Fixes: 30313a3d5794 ("bridge: Handle IFLA_ADDRESS correctly when creating bridge device") Fixes: 5b8d5429daa0 ("bridge: netlink: register netdevice before executing changelink") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5d4006e589cb..4f831225d34f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1092,19 +1092,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, struct net_bridge *br = netdev_priv(dev); int err; + err = register_netdevice(dev); + if (err) + return err; + if (tb[IFLA_ADDRESS]) { spin_lock_bh(&br->lock); br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); spin_unlock_bh(&br->lock); } - err = register_netdevice(dev); - if (err) - return err; - err = br_changelink(dev, tb, data); if (err) - unregister_netdevice(dev); + br_dev_delete(dev, NULL); + return err; } -- GitLab From 8c38f3190fd69ec1700f83078785b83e173f1499 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 30 Jul 2017 19:36:05 +0200 Subject: [PATCH 0704/1398] net: fec: Allow reception of frames bigger than 1522 bytes [ Upstream commit fbbeefdd21049fcf9437c809da3828b210577f36 ] The FEC Receive Control Register has a 14 bit field indicating the longest frame that may be received. It is being set to 1522. Frames longer than this are discarded, but counted as being in error. When using DSA, frames from the switch has an additional header, either 4 or 8 bytes if a Marvell switch is used. Thus a full MTU frame of 1522 bytes received by the switch on a port becomes 1530 bytes when passed to the host via the FEC interface. Change the maximum receive size to 2048 - 64, where 64 is the maximum rx_alignment applied on the receive buffer for AVB capable FEC cores. Use this value also for the maximum receive buffer size. The driver is already allocating a receive SKB of 2048 bytes, so this change should not have any significant effects. Tested on imx51, imx6, vf610. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 60f686684c4c..917091871259 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -172,10 +172,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #endif /* CONFIG_M5272 */ /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets. + * + * 2048 byte skbufs are allocated. However, alignment requirements + * varies between FEC variants. Worst case is 64, so round down by 64. */ -#define PKT_MAXBUF_SIZE 1522 +#define PKT_MAXBUF_SIZE (round_down(2048 - 64, 64)) #define PKT_MINBUF_SIZE 64 -#define PKT_MAXBLR_SIZE 1536 /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) @@ -853,7 +855,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i)); - writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); + writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ if (i) -- GitLab From 03c93293a83a603fc5b87fc15321f392d3cf0043 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 19 Dec 2017 11:27:56 -0600 Subject: [PATCH 0705/1398] net: Fix double free and memory corruption in get_net_ns_by_id() [ Upstream commit 21b5944350052d2583e82dd59b19a9ba94a007f0 ] (I can trivially verify that that idr_remove in cleanup_net happens after the network namespace count has dropped to zero --EWB) Function get_net_ns_by_id() does not check for net::count after it has found a peer in netns_ids idr. It may dereference a peer, after its count has already been finaly decremented. This leads to double free and memory corruption: put_net(peer) rtnl_lock() atomic_dec_and_test(&peer->count) [count=0] ... __put_net(peer) get_net_ns_by_id(net, id) spin_lock(&cleanup_list_lock) list_add(&net->cleanup_list, &cleanup_list) spin_unlock(&cleanup_list_lock) queue_work() peer = idr_find(&net->netns_ids, id) | get_net(peer) [count=1] | ... | (use after final put) v ... cleanup_net() ... spin_lock(&cleanup_list_lock) ... list_replace_init(&cleanup_list, ..) ... spin_unlock(&cleanup_list_lock) ... ... ... ... put_net(peer) ... atomic_dec_and_test(&peer->count) [count=0] ... spin_lock(&cleanup_list_lock) ... list_add(&net->cleanup_list, &cleanup_list) ... spin_unlock(&cleanup_list_lock) ... queue_work() ... rtnl_unlock() rtnl_lock() ... for_each_net(tmp) { ... id = __peernet2id(tmp, peer) ... spin_lock_irq(&tmp->nsid_lock) ... idr_remove(&tmp->netns_ids, id) ... ... ... net_drop_ns() ... net_free(peer) ... } ... | v cleanup_net() ... (Second free of peer) Also, put_net() on the right cpu may reorder with left's cpu list_replace_init(&cleanup_list, ..), and then cleanup_list will be corrupted. Since cleanup_net() is executed in worker thread, while put_net(peer) can happen everywhere, there should be enough time for concurrent get_net_ns_by_id() to pick the peer up, and the race does not seem to be unlikely. The patch fixes the problem in standard way. (Also, there is possible problem in peernet2id_alloc(), which requires check for net::count under nsid_lock and maybe_get_net(peer), but in current stable kernel it's used under rtnl_lock() and it has to be safe. Openswitch begun to use peernet2id_alloc(), and possibly it should be fixed too. While this is not in stable kernel yet, so I'll send a separate message to netdev@ later). Cc: Nicolas Dichtel Signed-off-by: Kirill Tkhai Fixes: 0c7aecd4bde4 "netns: add rtnl cmd to add and get peer netns ids" Reviewed-by: Andrey Ryabinin Reviewed-by: "Eric W. Biederman" Signed-off-by: Eric W. Biederman Reviewed-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7001da910c6b..b7efe2f19f83 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -263,7 +263,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) - get_net(peer); + peer = maybe_get_net(peer); spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); -- GitLab From a746fadd5e3187b814a66769355adcc9524839e6 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 20 Dec 2017 18:45:10 -0600 Subject: [PATCH 0706/1398] net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround [ Upstream commit c1a8d0a3accf64a014d605e6806ce05d1c17adf1 ] Under some circumstances driver will perform PHY reset in ksz9031_read_status() to fix autoneg failure case (idle error count = 0xFF). When this happens ksz9031 will not detect link status change any more when connecting to Netgear 1G switch (link can be recovered sometimes by restarting netdevice "ifconfig down up"). Reproduced with TI am572x board equipped with ksz9031 PHY while connecting to Netgear 1G switch. Fix the issue by reconfiguring autonegotiation after PHY reset in ksz9031_read_status(). Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fbf5945ce00d..2032a6de026b 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev) phydev->link = 0; if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); } return 0; -- GitLab From 58f6ebbd3424ef855cf28887f20965bfbef13b90 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 13 Dec 2017 14:41:06 -0500 Subject: [PATCH 0707/1398] sock: free skb in skb_complete_tx_timestamp on error [ Upstream commit 35b99dffc3f710cafceee6c8c6ac6a98eb2cb4bf ] skb_complete_tx_timestamp must ingest the skb it is passed. Call kfree_skb if the skb cannot be enqueued. Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl") Fixes: 9ac25fc06375 ("net: fix socket refcounting in skb_complete_tx_timestamp()") Reported-by: Richard Cochran Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index aec5605944d3..a64515583bc1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3823,7 +3823,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) - return; + goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. @@ -3832,7 +3832,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); sock_put(sk); + return; } + +err: + kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); -- GitLab From e74fe7268e7eadb2880d3842fe167131220d5616 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 7 Dec 2017 13:41:34 -0800 Subject: [PATCH 0708/1398] tcp: invalidate rate samples during SACK reneging [ Upstream commit d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 ] Mark tcp_sock during a SACK reneging event and invalidate rate samples while marked. Such rate samples may overestimate bw by including packets that were SACKed before reneging. < ack 6001 win 10000 sack 7001:38001 < ack 7001 win 0 sack 8001:38001 // Reneg detected > seq 7001:8001 // RTO, SACK cleared. < ack 38001 win 10000 In above example the rate sample taken after the last ack will count 7001-38001 as delivered while the actual delivery rate likely could be much lower i.e. 7001-8001. This patch adds a new field tcp_sock.sack_reneg and marks it when we declare SACK reneging and entering TCP_CA_Loss, and unmarks it after the last rate sample was taken before moving back to TCP_CA_Open. This patch also invalidates rate samples taken while tcp_sock.is_sack_reneg is set. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 10 ++++++++-- net/ipv4/tcp_rate.c | 10 +++++++--- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 647532b0eb03..f50b717ce644 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -219,7 +219,8 @@ struct tcp_sock { } rack; u16 advmss; /* Advertised MSS */ u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:7; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:6; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ diff --git a/include/net/tcp.h b/include/net/tcp.h index fba4fc46871d..caf35e062639 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1001,7 +1001,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs); + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dd33c785ce16..05d2bde00864 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2297,6 +2297,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 05255a286888..2f107e46355c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1966,6 +1966,8 @@ void tcp_enter_loss(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2463,6 +2465,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2494,8 +2497,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3589,6 +3594,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3711,7 +3717,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs); tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 9be1581a5a08..18309f58ab8d 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs) + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp.v64) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp.v64 || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; -- GitLab From 138723912343f74b23170a2f86728f70e65e4d6b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 13 Nov 2017 10:11:27 +0200 Subject: [PATCH 0709/1398] net/mlx5: Fix rate limit packet pacing naming and struct [ Upstream commit 37e92a9d4fe38dc3e7308913575983a6a088c8d4 ] In mlx5_ifc, struct size was not complete, and thus driver was sending garbage after the last defined field. Fixed it by adding reserved field to complete the struct size. In addition, rename all set_rate_limit to set_pp_rate_limit to be compliant with the Firmware <-> Driver definition. Fixes: 7486216b3a0b ("{net,IB}/mlx5: mlx5_ifc updates") Fixes: 1466cc5b23d1 ("net/mlx5: Rate limit tables support") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 22 +++++++++---------- include/linux/mlx5/mlx5_ifc.h | 8 ++++--- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f7fabecc104f..4c3f1cb7e2c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -367,7 +367,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: - case MLX5_CMD_OP_SET_RATE_LIMIT: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: @@ -502,7 +502,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); - MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 104902a93a0b..2be9ec5fd651 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -60,16 +60,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, return ret_entry; } -static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; - MLX5_SET(set_rate_limit_in, in, opcode, - MLX5_CMD_OP_SET_RATE_LIMIT); - MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -108,7 +108,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) entry->refcount++; } else { /* new rate limit */ - err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index); if (err) { mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", rate, err); @@ -144,7 +144,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) entry->refcount--; if (!entry->refcount) { /* need to remove rate */ - mlx5_set_rate_limit_cmd(dev, 0, entry->index); + mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index); entry->rate = 0; } @@ -197,8 +197,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) if (table->rl_entry[i].rate) - mlx5_set_rate_limit_cmd(dev, 0, - table->rl_entry[i].index); + mlx5_set_pp_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); kfree(dev->priv.rl_table.rl_entry); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6045d4d58065..25ed105bbcfb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -143,7 +143,7 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, - MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, @@ -6689,7 +6689,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; -struct mlx5_ifc_set_rate_limit_out_bits { +struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6698,7 +6698,7 @@ struct mlx5_ifc_set_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_set_rate_limit_in_bits { +struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -6711,6 +6711,8 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; + + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_access_register_out_bits { -- GitLab From 9424a79ec11d5aac7fcb73583afc448be1ca6848 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 21 Nov 2017 17:49:36 +0200 Subject: [PATCH 0710/1398] net/mlx5e: Fix features check of IPv6 traffic [ Upstream commit 2989ad1ec03021ee6d2193c35414f1d970a243de ] The assumption that the next header field contains the transport protocol is wrong for IPv6 packets with extension headers. Instead, we should look the inner-most next header field in the buffer. This will fix TSO offload for tunnels over IPv6 with extension headers. Performance testing: 19.25x improvement, cool! Measuring bandwidth of 16 threads TCP traffic over IPv6 GRE tap. CPU: Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz NIC: Mellanox Technologies MT28800 Family [ConnectX-5 Ex] TSO: Enabled Before: 4,926.24 Mbps Now : 94,827.91 Mbps Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9d3722930c95..38981db43bc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3038,6 +3038,7 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) { + unsigned int offset = 0; struct udphdr *udph; u16 proto; u16 port = 0; @@ -3047,7 +3048,7 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, proto = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): - proto = ipv6_hdr(skb)->nexthdr; + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); break; default: goto out; -- GitLab From d1614fd9cd1039f26aaf63cbb261d5ccc3bdf133 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Nov 2017 13:52:28 +0200 Subject: [PATCH 0711/1398] net/mlx5e: Fix possible deadlock of VXLAN lock [ Upstream commit 6323514116404cc651df1b7fffa1311ddf8ce647 ] mlx5e_vxlan_lookup_port is called both from mlx5e_add_vxlan_port (user context) and mlx5e_features_check (softirq), but the lock acquired does not disable bottom half and might result in deadlock. Fix it by simply replacing spin_lock() with spin_lock_bh(). While at it, replace all unnecessary spin_lock_irq() to spin_lock_bh(). lockdep's WARNING: inconsistent lock state [ 654.028136] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 654.028229] swapper/5/0 [HC0[0]:SC1[9]:HE1:SE0] takes: [ 654.028321] (&(&vxlan_db->lock)->rlock){+.?.}, at: [] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core] [ 654.028528] {SOFTIRQ-ON-W} state was registered at: [ 654.028607] _raw_spin_lock+0x3c/0x70 [ 654.028689] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core] [ 654.028794] mlx5e_vxlan_add_port+0x2e/0x120 [mlx5_core] [ 654.028878] process_one_work+0x1e9/0x640 [ 654.028942] worker_thread+0x4a/0x3f0 [ 654.029002] kthread+0x141/0x180 [ 654.029056] ret_from_fork+0x24/0x30 [ 654.029114] irq event stamp: 579088 [ 654.029174] hardirqs last enabled at (579088): [] ip6_finish_output2+0x49a/0x8c0 [ 654.029309] hardirqs last disabled at (579087): [] ip6_finish_output2+0x44e/0x8c0 [ 654.029446] softirqs last enabled at (579030): [] irq_enter+0x6d/0x80 [ 654.029567] softirqs last disabled at (579031): [] irq_exit+0xb5/0xc0 [ 654.029684] other info that might help us debug this: [ 654.029781] Possible unsafe locking scenario: [ 654.029868] CPU0 [ 654.029908] ---- [ 654.029947] lock(&(&vxlan_db->lock)->rlock); [ 654.030045] [ 654.030090] lock(&(&vxlan_db->lock)->rlock); [ 654.030162] *** DEADLOCK *** Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/vxlan.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 07a9ba6cfc70..f8238275759f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); - spin_unlock(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); return vxlan; } @@ -100,9 +100,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) vxlan->udp_port = port; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (err) goto err_free; @@ -121,9 +121,9 @@ static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (!vxlan) return; @@ -171,12 +171,12 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); } - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); } -- GitLab From 604225824f063bb6a737af564cd5261fba7d8221 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 3 Dec 2017 13:58:50 +0200 Subject: [PATCH 0712/1398] net/mlx5e: Add refcount to VXLAN structure [ Upstream commit 23f4cc2cd9ed92570647220aca60d0197d8c1fa9 ] A refcount mechanism must be implemented in order to prevent unwanted scenarios such as: - Open an IPv4 VXLAN interface - Open an IPv6 VXLAN interface (different socket) - Remove one of the interfaces With current implementation, the UDP port will be removed from our VXLAN database and turn off the offloads for the other interface, which is still active. The reference count mechanism will only allow UDP port removals once all consumers are gone. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/vxlan.c | 50 ++++++++++--------- .../net/ethernet/mellanox/mlx5/core/vxlan.h | 1 + 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index f8238275759f..25f782344667 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -88,8 +88,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; - if (mlx5e_vxlan_lookup_port(priv, port)) + vxlan = mlx5e_vxlan_lookup_port(priv, port); + if (vxlan) { + atomic_inc(&vxlan->refcount); goto free_work; + } if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; @@ -99,6 +102,7 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) goto err_delete_port; vxlan->udp_port = port; + atomic_set(&vxlan->refcount, 1); spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); @@ -116,32 +120,33 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) kfree(vxlan_work); } -static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) +static void mlx5e_vxlan_del_port(struct work_struct *work) { + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + u16 port = vxlan_work->port; struct mlx5e_vxlan *vxlan; + bool remove = false; spin_lock_bh(&vxlan_db->lock); - vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_bh(&vxlan_db->lock); - + vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) - return; - - mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port); - - kfree(vxlan); -} + goto out_unlock; -static void mlx5e_vxlan_del_port(struct work_struct *work) -{ - struct mlx5e_vxlan_work *vxlan_work = - container_of(work, struct mlx5e_vxlan_work, work); - struct mlx5e_priv *priv = vxlan_work->priv; - u16 port = vxlan_work->port; + if (atomic_dec_and_test(&vxlan->refcount)) { + radix_tree_delete(&vxlan_db->tree, port); + remove = true; + } - __mlx5e_vxlan_core_del_port(priv, port); +out_unlock: + spin_unlock_bh(&vxlan_db->lock); + if (remove) { + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); + } kfree(vxlan_work); } @@ -171,12 +176,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_bh(&vxlan_db->lock); + /* Lockless since we are the only radix-tree consumers, wq is disabled */ while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_bh(&vxlan_db->lock); - __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_bh(&vxlan_db->lock); + radix_tree_delete(&vxlan_db->tree, port); + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); } - spin_unlock_bh(&vxlan_db->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 5def12c048e3..5ef6ae7d568a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -36,6 +36,7 @@ #include "en.h" struct mlx5e_vxlan { + atomic_t refcount; u16 udp_port; }; -- GitLab From c844a458948711c36105bf0b4e8771b104e1906b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 4 Dec 2017 09:57:43 +0200 Subject: [PATCH 0713/1398] net/mlx5e: Prevent possible races in VXLAN control flow [ Upstream commit 0c1cc8b2215f5122ca614b5adca60346018758c3 ] When calling add/remove VXLAN port, a lock must be held in order to prevent race scenarios when more than one add/remove happens at the same time. Fix by holding our state_lock (mutex) as done by all other parts of the driver. Note that the spinlock protecting the radix-tree is still needed in order to synchronize radix-tree access from softirq context. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 25f782344667..2f74953e4561 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -88,6 +88,7 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; + mutex_lock(&priv->state_lock); vxlan = mlx5e_vxlan_lookup_port(priv, port); if (vxlan) { atomic_inc(&vxlan->refcount); @@ -117,6 +118,7 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) err_delete_port: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); free_work: + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } @@ -130,6 +132,7 @@ static void mlx5e_vxlan_del_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; bool remove = false; + mutex_lock(&priv->state_lock); spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) @@ -147,6 +150,7 @@ static void mlx5e_vxlan_del_port(struct work_struct *work) mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); kfree(vxlan); } + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } -- GitLab From 7493d98ea84c1fb92feef5b2e863d696a1634448 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 4 Dec 2017 08:59:25 +0200 Subject: [PATCH 0714/1398] net/mlx5: Fix error flow in CREATE_QP command [ Upstream commit dbff26e44dc3ec4de6578733b054a0114652a764 ] In error flow, when DESTROY_QP command should be executed, the wrong mailbox was set with data, not the one that is written to hardware, Fix that. Fixes: 09a7d9eca1a6 '{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc' Signed-off-by: Moni Shoua Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index d0a4005fe63a..9346f3985edf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -303,8 +303,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); - MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); - MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } -- GitLab From 6ed7c48e93bb800077b1bd7a24b5b86e675f3764 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:29 +0100 Subject: [PATCH 0715/1398] s390/qeth: apply takeover changes when mode is toggled [ Upstream commit 7fbd9493f0eeae8cef58300505a9ef5c8fce6313 ] Just as for an explicit enable/disable, toggling the takeover mode also requires that the IP addresses get updated. Otherwise all IPs that were added to the table before the mode-toggle, get registered with the old settings. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 2 +- drivers/s390/net/qeth_l3_sys.c | 35 +++++++++++++++---------------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e72234efb648..48eee3c671a8 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -576,7 +576,7 @@ enum qeth_cq { }; struct qeth_ipato { - int enabled; + bool enabled; int invert4; int invert6; struct list_head entries; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 838ed6213118..606b506d46ef 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1475,7 +1475,7 @@ static int qeth_setup_card(struct qeth_card *card) qeth_set_intial_options(card); /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); - card->ipato.enabled = 0; + card->ipato.enabled = false; card->ipato.invert4 = 0; card->ipato.invert6 = 0; /* init QDIO stuff */ diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index cffe42f5775d..b595a2bc6b37 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -374,6 +374,7 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, struct qeth_card *card = dev_get_drvdata(dev); struct qeth_ipaddr *addr; int i, rc = 0; + bool enable; if (!card) return -EINVAL; @@ -386,25 +387,23 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, } if (sysfs_streq(buf, "toggle")) { - card->ipato.enabled = (card->ipato.enabled)? 0 : 1; - } else if (sysfs_streq(buf, "1")) { - card->ipato.enabled = 1; - hash_for_each(card->ip_htable, i, addr, hnode) { - if ((addr->type == QETH_IP_TYPE_NORMAL) && - qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= - QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else if (sysfs_streq(buf, "0")) { - card->ipato.enabled = 0; - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->set_flags & - QETH_IPA_SETIP_TAKEOVER_FLAG) - addr->set_flags &= - ~QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else + enable = !card->ipato.enabled; + } else if (kstrtobool(buf, &enable)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.enabled == enable) + goto out; + card->ipato.enabled = enable; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (!enable) + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + else if (addr->type == QETH_IP_TYPE_NORMAL && + qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + } out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; -- GitLab From 475018c79742ecd5806a9fde88582f13b2d5a5c7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:30 +0100 Subject: [PATCH 0716/1398] s390/qeth: don't apply takeover changes to RXIP [ Upstream commit b22d73d6689fd902a66c08ebe71ab2f3b351e22f ] When takeover is switched off, current code clears the 'TAKEOVER' flag on all IPs. But the flag is also used for RXIP addresses, and those should not be affected by the takeover mode. Fix the behaviour by consistenly applying takover logic to NORMAL addresses only. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 5 +++-- drivers/s390/net/qeth_l3_sys.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index f91e70c369ed..04240cc55e2e 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -178,6 +178,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, if (!card->ipato.enabled) return 0; + if (addr->type != QETH_IP_TYPE_NORMAL) + return 0; qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits, (addr->proto == QETH_PROT_IPV4)? 4:16); @@ -293,8 +295,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr)); addr->ref_counter = 1; - if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) { + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) { QETH_CARD_TEXT(card, 2, "tkovaddr"); addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index b595a2bc6b37..85214439d175 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -398,10 +398,11 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, card->ipato.enabled = enable; hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; if (!enable) addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; - else if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) + else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } out: -- GitLab From 476d7d6932953a3ca8d7cde01e75284268f4f52e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:31 +0100 Subject: [PATCH 0717/1398] s390/qeth: lock IP table while applying takeover changes [ Upstream commit 8a03a3692b100d84785ee7a834e9215e304c9e00 ] Modifying the flags of an IP addr object needs to be protected against eg. concurrent removal of the same object from the IP table. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 85214439d175..f95135f197da 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -397,6 +397,7 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, goto out; card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); hash_for_each(card->ip_htable, i, addr, hnode) { if (addr->type != QETH_IP_TYPE_NORMAL) continue; @@ -405,6 +406,7 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } + spin_unlock_bh(&card->ip_lock); out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; -- GitLab From 67b539cab43cf3e46896a747004c923abf33ae17 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:32 +0100 Subject: [PATCH 0718/1398] s390/qeth: update takeover IPs after configuration change [ Upstream commit 02f510f326501470348a5df341e8232c3497bbbb ] Any modification to the takeover IP-ranges requires that we re-evaluate which IP addresses are takeover-eligible. Otherwise we might do takeover for some addresses when we no longer should, or vice-versa. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core.h | 4 +- drivers/s390/net/qeth_core_main.c | 4 +- drivers/s390/net/qeth_l3.h | 2 +- drivers/s390/net/qeth_l3_main.c | 31 +++++++++++++-- drivers/s390/net/qeth_l3_sys.c | 63 +++++++++++++++++-------------- 5 files changed, 67 insertions(+), 37 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 48eee3c671a8..9b5fc502f6a1 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -577,8 +577,8 @@ enum qeth_cq { struct qeth_ipato { bool enabled; - int invert4; - int invert6; + bool invert4; + bool invert6; struct list_head entries; }; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 606b506d46ef..df8f74cb1406 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1476,8 +1476,8 @@ static int qeth_setup_card(struct qeth_card *card) /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); card->ipato.enabled = false; - card->ipato.invert4 = 0; - card->ipato.invert6 = 0; + card->ipato.invert4 = false; + card->ipato.invert6 = false; /* init QDIO stuff */ qeth_init_qdio_info(card); INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work); diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index 26f79533e62e..eedf9b01a496 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -80,7 +80,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *); int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *); void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions, const u8 *); -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *); +void qeth_l3_update_ipato(struct qeth_card *card); struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions); int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *); int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 04240cc55e2e..1487f8a0c575 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -168,8 +168,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len) } } -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, - struct qeth_ipaddr *addr) +static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, + struct qeth_ipaddr *addr) { struct qeth_ipato_entry *ipatoe; u8 addr_bits[128] = {0, }; @@ -608,6 +608,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card) /* * IP address takeover related functions */ + +/** + * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs. + * + * Caller must hold ip_lock. + */ +void qeth_l3_update_ipato(struct qeth_card *card) +{ + struct qeth_ipaddr *addr; + unsigned int i; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + else + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + } +} + static void qeth_l3_clear_ipato_list(struct qeth_card *card) { struct qeth_ipato_entry *ipatoe, *tmp; @@ -619,6 +640,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card) kfree(ipatoe); } + qeth_l3_update_ipato(card); spin_unlock_bh(&card->ip_lock); } @@ -643,8 +665,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, } } - if (!rc) + if (!rc) { list_add_tail(&new->entry, &card->ipato.entries); + qeth_l3_update_ipato(card); + } spin_unlock_bh(&card->ip_lock); @@ -667,6 +691,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card, (proto == QETH_PROT_IPV4)? 4:16) && (ipatoe->mask_bits == mask_bits)) { list_del(&ipatoe->entry); + qeth_l3_update_ipato(card); kfree(ipatoe); } } diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index f95135f197da..d6bdfc6e905a 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -372,9 +372,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - struct qeth_ipaddr *addr; - int i, rc = 0; bool enable; + int rc = 0; if (!card) return -EINVAL; @@ -393,20 +392,12 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, goto out; } - if (card->ipato.enabled == enable) - goto out; - card->ipato.enabled = enable; - - spin_lock_bh(&card->ip_lock); - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->type != QETH_IP_TYPE_NORMAL) - continue; - if (!enable) - addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; - else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + if (card->ipato.enabled != enable) { + card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); } - spin_unlock_bh(&card->ip_lock); out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; @@ -432,20 +423,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert4 = (card->ipato.invert4)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert4 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert4 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert4; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert4 != invert) { + card->ipato.invert4 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } @@ -611,20 +609,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert6 = (card->ipato.invert6)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert6 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert6 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert6; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert6 != invert) { + card->ipato.invert6 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } -- GitLab From 3c579d0b4f0f07ab5c57dd3329ca07d70168879b Mon Sep 17 00:00:00 2001 From: Juan Zea Date: Fri, 15 Dec 2017 10:21:20 +0100 Subject: [PATCH 0719/1398] usbip: fix usbip bind writing random string after command in match_busid commit 544c4605acc5ae4afe7dd5914147947db182f2fb upstream. usbip bind writes commands followed by random string when writing to match_busid attribute in sysfs, caused by using full variable size instead of string length. Signed-off-by: Juan Zea Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/utils.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c index 2b3d6d235015..3d7b42e77299 100644 --- a/tools/usb/usbip/src/utils.c +++ b/tools/usb/usbip/src/utils.c @@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add) char command[SYSFS_BUS_ID_SIZE + 4]; char match_busid_attr_path[SYSFS_PATH_MAX]; int rc; + int cmd_size; snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, @@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add) attr_name); if (add) - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", + busid); else - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", + busid); rc = write_sysfs_attribute(match_busid_attr_path, command, - sizeof(command)); + cmd_size); if (rc < 0) { dbg("failed to write match_busid: %s", strerror(errno)); return -1; -- GitLab From 1ef5c433b3b9deaa81095d7065df04877567b778 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Dec 2017 10:50:09 -0700 Subject: [PATCH 0720/1398] usbip: prevent leaking socket pointer address in messages commit 90120d15f4c397272aaf41077960a157fc4212bf upstream. usbip driver is leaking socket pointer address in messages. Remove the messages that aren't useful and print sockfd in the ones that are useful for debugging. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 3 +-- drivers/usb/usbip/usbip_common.c | 14 ++++---------- drivers/usb/usbip/vhci_hcd.c | 2 +- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index c653ce533430..1886d8e4f14e 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -163,8 +163,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) * step 1? */ if (ud->tcp_socket) { - dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", - ud->tcp_socket); + dev_dbg(&sdev->udev->dev, "shutdown sockfd\n"); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 8b232290be6b..e24b24fa0f16 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -335,13 +335,10 @@ int usbip_recv(struct socket *sock, void *buf, int size) char *bp = buf; int osize = size; - usbip_dbg_xmit("enter\n"); - - if (!sock || !buf || !size) { - pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf, - size); + if (!sock || !buf || !size) return -EINVAL; - } + + usbip_dbg_xmit("enter\n"); do { sock->sk->sk_allocation = GFP_NOIO; @@ -354,11 +351,8 @@ int usbip_recv(struct socket *sock, void *buf, int size) msg.msg_flags = MSG_NOSIGNAL; result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL); - if (result <= 0) { - pr_debug("receive sock %p buf %p size %u ret %d total %d\n", - sock, buf, size, result, total); + if (result <= 0) goto err; - } size -= result; buf += result; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index d6dc165e924b..ab148cf136a7 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -823,7 +823,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) /* need this? see stub_dev.c */ if (ud->tcp_socket) { - pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); + pr_debug("shutdown tcp_socket\n"); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } -- GitLab From 9e9f4255c0d3465c11fe6262e008ccf832db5f26 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 18 Dec 2017 17:23:37 -0700 Subject: [PATCH 0721/1398] usbip: stub: stop printing kernel pointer addresses in messages commit 248a22044366f588d46754c54dfe29ffe4f8b4df upstream. Remove and/or change debug, info. and error messages to not print kernel pointer addresses. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 5 +++-- drivers/usb/usbip/stub_rx.c | 7 ++----- drivers/usb/usbip/stub_tx.c | 6 +++--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index af10f7b131a4..325b4c05acdd 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -252,11 +252,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev) struct stub_priv *priv; struct urb *urb; - dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); + dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); + dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", + priv->seqnum); usb_kill_urb(urb); kmem_cache_free(stub_priv_cache, priv); diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 283a9be77a22..5b807185f79e 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -225,9 +225,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, if (priv->seqnum != pdu->u.cmd_unlink.seqnum) continue; - dev_info(&priv->urb->dev->dev, "unlink urb %p\n", - priv->urb); - /* * This matched urb is not completed yet (i.e., be in * flight in usb hcd hardware/driver). Now we are @@ -266,8 +263,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, ret = usb_unlink_urb(priv->urb); if (ret != -EINPROGRESS) dev_err(&priv->urb->dev->dev, - "failed to unlink a urb %p, ret %d\n", - priv->urb, ret); + "failed to unlink a urb # %lu, ret %d\n", + priv->seqnum, ret); return 0; } diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index 87ff94be4235..96aa375b80d9 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -102,7 +102,7 @@ void stub_complete(struct urb *urb) /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { - usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); + usbip_dbg_stub_tx("ignore urb for closed connection\n"); /* It will be freed in stub_device_cleanup_urbs(). */ } else if (priv->unlinking) { stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); @@ -204,8 +204,8 @@ static int stub_send_ret_submit(struct stub_device *sdev) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; -- GitLab From 03dce0573d38a17ca5ffc798611a9026f9792477 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 18 Dec 2017 17:24:22 -0700 Subject: [PATCH 0722/1398] usbip: vhci: stop printing kernel pointer addresses in messages commit 8272d099d05f7ab2776cf56a2ab9f9443be18907 upstream. Remove and/or change debug, info. and error messages to not print kernel pointer addresses. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 10 ---------- drivers/usb/usbip/vhci_rx.c | 23 +++++++++++------------ drivers/usb/usbip/vhci_tx.c | 3 ++- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index ab148cf136a7..7f161b095176 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -506,9 +506,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, struct vhci_device *vdev; unsigned long flags; - usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n", - hcd, urb, mem_flags); - if (portnum > VHCI_HC_PORTS) { pr_err("invalid port number %d\n", portnum); return -ENODEV; @@ -671,8 +668,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) struct vhci_device *vdev; unsigned long flags; - pr_info("dequeue a urb %p\n", urb); - spin_lock_irqsave(&vhci->lock, flags); priv = urb->hcpriv; @@ -700,7 +695,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* tcp connection is closed */ spin_lock(&vdev->priv_lock); - pr_info("device %p seems to be disconnected\n", vdev); list_del(&priv->list); kfree(priv); urb->hcpriv = NULL; @@ -712,8 +706,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * vhci_rx will receive RET_UNLINK and give back the URB. * Otherwise, we give back it here. */ - pr_info("gives back urb %p\n", urb); - usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&vhci->lock, flags); @@ -741,8 +733,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) unlink->unlink_seqnum = priv->seqnum; - pr_info("device %p seems to be still connected\n", vdev); - /* send cmd_unlink and try to cancel the pending URB in the * peer */ list_add_tail(&unlink->list, &vdev->unlink_tx); diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index fc2d319e2360..5943deeec115 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -37,24 +37,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum) urb = priv->urb; status = urb->status; - usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", - urb, priv, seqnum); + usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum); switch (status) { case -ENOENT: /* fall through */ case -ECONNRESET: - dev_info(&urb->dev->dev, - "urb %p was unlinked %ssynchronuously.\n", urb, - status == -ENOENT ? "" : "a"); + dev_dbg(&urb->dev->dev, + "urb seq# %u was unlinked %ssynchronuously\n", + seqnum, status == -ENOENT ? "" : "a"); break; case -EINPROGRESS: /* no info output */ break; default: - dev_info(&urb->dev->dev, - "urb %p may be in a error, status %d\n", urb, - status); + dev_dbg(&urb->dev->dev, + "urb seq# %u may be in a error, status %d\n", + seqnum, status); } list_del(&priv->list); @@ -80,8 +79,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, spin_unlock_irqrestore(&vdev->priv_lock, flags); if (!urb) { - pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); - pr_info("max seqnum %d\n", + pr_err("cannot find a urb of seqnum %u max seqnum %d\n", + pdu->base.seqnum, atomic_read(&vhci->seqnum)); usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return; @@ -104,7 +103,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); usb_hcd_unlink_urb_from_ep(vhci_to_hcd(vhci), urb); @@ -170,7 +169,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev, pr_info("the urb (seqnum %d) was already given back\n", pdu->base.seqnum); } else { - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum); /* If unlink is successful, status is -ECONNRESET */ urb->status = pdu->u.ret_unlink.status; diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index 3e7878fe2fd4..a9a663a578b6 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -83,7 +83,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) memset(&msg, 0, sizeof(msg)); memset(&iov, 0, sizeof(iov)); - usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); + usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", + priv->seqnum); /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); -- GitLab From 0af1aebb6a8b0d9816777b4fea304c4272e067fe Mon Sep 17 00:00:00 2001 From: Max Schulze Date: Wed, 20 Dec 2017 20:47:44 +0100 Subject: [PATCH 0723/1398] USB: serial: ftdi_sio: add id for Airbus DS P8GR commit c6a36ad383559a60a249aa6016cebf3cb8b6c485 upstream. Add AIRBUS_DS_P8GR device IDs to ftdi_sio driver. Signed-off-by: Max Schulze Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 3249f42b4b93..0c743e4cca1e 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1017,6 +1017,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, + { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index f9d15bd62785..543d2801632b 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -913,6 +913,12 @@ #define ICPDAS_I7561U_PID 0x0104 #define ICPDAS_I7563U_PID 0x0105 +/* + * Airbus Defence and Space + */ +#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */ +#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */ + /* * RT Systems programming cables for various ham radios */ -- GitLab From 6ab3d87ad702941243b53be7e1c45393dc5df4c6 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Fri, 15 Dec 2017 00:39:27 +0100 Subject: [PATCH 0724/1398] USB: serial: qcserial: add Sierra Wireless EM7565 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 92a18a657fb2e2ffbfa0659af32cc18fd2346516 upstream. Sierra Wireless EM7565 devices use the QCSERIAL_SWI layout for their serial ports T: Bus=01 Lev=03 Prnt=29 Port=01 Cnt=02 Dev#= 31 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=9091 Rev= 0.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM7565 Qualcomm Snapdragon X16 LTE-A S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=qcserial E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=qcserial E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms but need sendsetup = true for the NMEA port to make it work properly. Simplify the patch compared to v1 as suggested by Bjørn Mork by taking advantage of the fact that existing devices work with sendsetup = true too. Use sendsetup = true for the NMEA interface of QCSERIAL_SWI and add DEVICE_SWI entries for the EM7565 PID 0x9091 and the EM7565 QDL PID 0x9090. Tests with several MC73xx/MC74xx/MC77xx devices have been performed in order to verify backward compatibility. Signed-off-by: Reinhard Speyerer Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 4516291df1b8..fb6dc16c754a 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ + {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ @@ -346,6 +348,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case 2: dev_dbg(dev, "NMEA GPS interface found\n"); + sendsetup = true; break; case 3: dev_dbg(dev, "Modem port found\n"); -- GitLab From 192cdf5ecaf841d172e2c3a6bb6b5aaed530b68b Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 14 Dec 2017 16:54:45 +0100 Subject: [PATCH 0725/1398] USB: serial: option: add support for Telit ME910 PID 0x1101 commit 08933099e6404f588f81c2050bfec7313e06eeaf upstream. This patch adds support for PID 0x1101 of Telit ME910. Signed-off-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ffa8ec917ff5..9a8c5a160a3d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -283,6 +283,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_ME910 0x1100 +#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -648,6 +649,11 @@ static const struct option_blacklist_info telit_me910_blacklist = { .reserved = BIT(1) | BIT(3), }; +static const struct option_blacklist_info telit_me910_dual_modem_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1247,6 +1253,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), + .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), -- GitLab From d98f4d4d028393aac5113c943c289ef3ad1c36f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZ=20Lin=20=28=E6=9E=97=E4=B8=8A=E6=99=BA=29?= Date: Tue, 19 Dec 2017 17:40:32 +0800 Subject: [PATCH 0726/1398] USB: serial: option: adding support for YUGA CLM920-NC5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3920bb713038810f25770e7545b79f204685c8f2 upstream. This patch adds support for YUGA CLM920-NC5 PID 0x9625 USB modem to option driver. Interface layout: 0: QCDM/DIAG 1: ADB 2: MODEM 3: AT 4: RMNET Signed-off-by: Taiyi Wu Signed-off-by: SZ Lin (林上智) Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9a8c5a160a3d..a818c43a02ec 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -236,6 +236,8 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Qualcomm's vendor ID */ #define QUECTEL_PRODUCT_UC20 0x9003 #define QUECTEL_PRODUCT_UC15 0x9090 +/* These Yuga products use Qualcomm's vendor ID */ +#define YUGA_PRODUCT_CLM920_NC5 0x9625 #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ @@ -683,6 +685,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = { .reserved = BIT(4) | BIT(5), }; +static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { + .reserved = BIT(1) | BIT(4), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1187,6 +1193,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Yuga products use Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5), + .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, -- GitLab From 0f2e9cbc231c0de2dfd5785e7b83b10145ace76b Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Dmitry Fleytman Date: Tue, 19 Dec 2017 06:02:04 +0200 Subject: [PATCH 0727/1398] usb: Add device quirk for Logitech HD Pro Webcam C925e commit 7f038d256c723dd390d2fca942919573995f4cfd upstream. Commit e0429362ab15 ("usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e") introduced quirk to workaround an issue with some Logitech webcams. There is one more model that has the same issue - C925e, so applying the same quirk as well. See aforementioned commit message for detailed explanation of the problem. Signed-off-by: Dmitry Fleytman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 50010282c010..60674a932c77 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -57,10 +57,11 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920, C920-C and C930e */ + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, -- GitLab From 425d2f15338d9a65ea3bd57b481c8a55a1deebbd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 12 Dec 2017 16:11:30 +0100 Subject: [PATCH 0728/1398] usb: add RESET_RESUME for ELSA MicroLink 56K commit b9096d9f15c142574ebebe8fbb137012bb9d99c2 upstream. This modem needs this quirk to operate. It produces timeouts when resumed without reset. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 60674a932c77..c05c4f877750 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -155,6 +155,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + /* ELSA MicroLink 56K */ + { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, -- GitLab From ab1fbfecd3300325c69b6dd2434329fadec26257 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 19 Dec 2017 11:14:42 +0200 Subject: [PATCH 0729/1398] USB: Fix off by one in type-specific length check of BOS SSP capability commit 07b9f12864d16c3a861aef4817eb1efccbc5d0e6 upstream. USB 3.1 devices are not detected as 3.1 capable since 4.15-rc3 due to a off by one in commit 81cf4a45360f ("USB: core: Add type-specific length check of BOS descriptors") It uses USB_DT_USB_SSP_CAP_SIZE() to get SSP capability size which takes the zero based SSAC as argument, not the actual count of sublink speed attributes. USB3 spec 9.6.2.5 says "The number of Sublink Speed Attributes = SSAC + 1." The type-specific length check patch was added to stable and needs to be fixed there as well Fixes: 81cf4a45360f ("USB: core: Add type-specific length check of BOS descriptors") CC: Masakazu Mokuno Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index ba9b29bc441f..7c54a19b20e0 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -1002,7 +1002,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) case USB_SSP_CAP_TYPE: ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; ssac = (le32_to_cpu(ssp_cap->bmAttributes) & - USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; + USB_SSP_SUBLINK_SPEED_ATTRIBS); if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) dev->bos->ssp_cap = ssp_cap; break; -- GitLab From 09d3e69305b32a43de75d36430f9856df8055022 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 21 Dec 2017 15:06:15 +0200 Subject: [PATCH 0730/1398] usb: xhci: Add XHCI_TRUST_TX_LENGTH for Renesas uPD720201 commit da99706689481717998d1d48edd389f339eea979 upstream. When plugging in a USB webcam I see the following message: xhci_hcd 0000:04:00.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk? handle_tx_event: 913 callbacks suppressed All is quiet again with this patch (and I've done a fair but of soak testing with the camera since). Signed-off-by: Daniel Thompson Acked-by: Ard Biesheuvel Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c87ef38e7416..f6782a347cde 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -189,6 +189,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_TRUST_TX_LENGTH; xhci->quirks |= XHCI_BROKEN_STREAMS; } + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + pdev->device == 0x0014) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) xhci->quirks |= XHCI_RESET_ON_RESUME; -- GitLab From d840687aa8a3ca7b8219b1a207a1c55e47c90225 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Fri, 22 Dec 2017 15:51:12 +0100 Subject: [PATCH 0731/1398] timers: Use deferrable base independent of base::nohz_active commit ced6d5c11d3e7b342f1a80f908e6756ebd4b8ddd upstream. During boot and before base::nohz_active is set in the timer bases, deferrable timers are enqueued into the standard timer base. This works correctly as long as base::nohz_active is false. Once it base::nohz_active is set and a timer which was enqueued before that is accessed the lock selector code choses the lock of the deferred base. This causes unlocked access to the standard base and in case the timer is removed it does not clear the pending flag in the standard base bitmap which causes get_next_timer_interrupt() to return bogus values. To prevent that, the deferrable timers must be enqueued in the deferrable base, even when base::nohz_active is not set. Those deferrable timers also need to be expired unconditional. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: rt@linutronix.de Cc: Paul McKenney Link: https://lkml.kernel.org/r/20171222145337.633328378@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 7d670362891a..d1cc71d5d5ba 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -849,11 +849,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); return base; } @@ -863,11 +862,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = this_cpu_ptr(&timer_bases[BASE_DEF]); return base; } @@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) base->must_forward_clk = false; __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } -- GitLab From 574e543ff970ea208d6d97524e0373d3741a6a34 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Dec 2017 15:51:14 +0100 Subject: [PATCH 0732/1398] timers: Invoke timer_start_debug() where it makes sense commit fd45bb77ad682be728d1002431d77b8c73342836 upstream. The timer start debug function is called before the proper timer base is set. As a consequence the trace data contains the stale CPU and flags values. Call the debug function after setting the new base and flags. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: rt@linutronix.de Cc: Paul McKenney Cc: Anna-Maria Gleixner Link: https://lkml.kernel.org/r/20171222145337.792907137@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d1cc71d5d5ba..26c2f659a40f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1019,8 +1019,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) if (!ret && pending_only) goto out_unlock; - debug_activate(timer, expires); - new_base = get_target_base(base, timer->flags); if (base != new_base) { @@ -1044,6 +1042,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } + debug_activate(timer, expires); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance -- GitLab From 249d4a9b3246f4ec92433ba8ea3bae5ceb4dc1ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Dec 2017 21:37:25 +0100 Subject: [PATCH 0733/1398] timers: Reinitialize per cpu bases on hotplug commit 26456f87aca7157c057de65c9414b37f1ab881d1 upstream. The timer wheel bases are not (re)initialized on CPU hotplug. That leaves them with a potentially stale clk and next_expiry valuem, which can cause trouble then the CPU is plugged. Add a prepare callback which forwards the clock, sets next_expiry to far in the future and reset the control flags to a known state. Set base->must_forward_clk so the first timer which is queued will try to forward the clock to current jiffies. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: Anna-Maria Gleixner Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712272152200.2431@nanos Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuhotplug.h | 2 +- include/linux/timer.h | 4 +++- kernel/cpu.c | 4 ++-- kernel/time/timer.c | 15 +++++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 965cc5693a46..c9447a689522 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -48,7 +48,7 @@ enum cpuhp_state { CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BRINGUP_CPU, diff --git a/include/linux/timer.h b/include/linux/timer.h index 51d601f192d4..ec86e4e55ea3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -274,9 +274,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index e1436ca4aed0..802eb3361a0a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1309,9 +1309,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ - [CPUHP_TIMERS_DEAD] = { + [CPUHP_TIMERS_PREPARE] = { .name = "timers:dead", - .startup.single = NULL, + .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, /* Kicks the plugged cpu into life */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 26c2f659a40f..e872f7f05e8a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1851,6 +1851,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } +int timers_prepare_cpu(unsigned int cpu) +{ + struct timer_base *base; + int b; + + for (b = 0; b < NR_BASES; b++) { + base = per_cpu_ptr(&timer_bases[b], cpu); + base->clk = jiffies; + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->is_idle = false; + base->must_forward_clk = true; + } + return 0; +} + int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; -- GitLab From e8119ac05d7160cce59ce1ff04c210c22e147a6c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Dec 2017 15:51:13 +0100 Subject: [PATCH 0734/1398] nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick() commit 5d62c183f9e9df1deeea0906d099a94e8a43047a upstream. The conditions in irq_exit() to invoke tick_nohz_irq_exit() which subsequently invokes tick_nohz_stop_sched_tick() are: if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) If need_resched() is not set, but a timer softirq is pending then this is an indication that the softirq code punted and delegated the execution to softirqd. need_resched() is not true because the current interrupted task takes precedence over softirqd. Invoking tick_nohz_irq_exit() in this case can cause an endless loop of timer interrupts because the timer wheel contains an expired timer, but softirqs are not yet executed. So it returns an immediate expiry request, which causes the timer to fire immediately again. Lather, rinse and repeat.... Prevent that by adding a check for a pending timer soft interrupt to the conditions in tick_nohz_stop_sched_tick() which avoid calling get_next_timer_interrupt(). That keeps the tick sched timer on the tick and prevents a repetitive programming of an already expired timer. Reported-by: Sebastian Siewior Signed-off-by: Thomas Gleixner Acked-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul McKenney Cc: Anna-Maria Gleixner Cc: Sebastian Siewior Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712272156050.2431@nanos Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3bcb61b52f6c..dae1a45be504 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -663,6 +663,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } +static inline bool local_timer_softirq_pending(void) +{ + return local_softirq_pending() & TIMER_SOFTIRQ; +} + static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now, int cpu) { @@ -679,8 +684,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); ts->last_jiffies = basejiff; - if (rcu_needs_cpu(basemono, &next_rcu) || - arch_needs_cpu() || irq_work_needs_cpu()) { + /* + * Keep the periodic tick, when RCU, architecture or irq_work + * requests it. + * Aside of that check whether the local timer softirq is + * pending. If so its a bad idea to call get_next_timer_interrupt() + * because there is an already expired timer, so it will request + * immeditate expiry, which rearms the hardware timer with a + * minimal delta which brings us back to this place + * immediately. Lather, rinse and repeat... + */ + if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* -- GitLab From 404ae546c7d1927b877d24bf447a462a5c5a5ad7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 30 Dec 2017 22:13:53 +0100 Subject: [PATCH 0735/1398] x86/smpboot: Remove stale TLB flush invocations commit 322f8b8b340c824aef891342b0f5795d15e11562 upstream. smpboot_setup_warm_reset_vector() and smpboot_restore_warm_reset_vector() invoke local_flush_tlb() for no obvious reason. Digging in history revealed that the original code in the 2.1 era added those because the code manipulated a swapper_pg_dir pagetable entry. The pagetable manipulation was removed long ago in the 2.3 timeframe, but the TLB flush invocations stayed around forever. Remove them along with the pointless pr_debug()s which come from the same 2.1 change. Reported-by: Dominik Brodowski Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171230211829.586548655@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/smpboot.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9fe7b9e1ae30..e803d72ef525 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -115,25 +115,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) { unsigned long flags; - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - /* * Paranoid: Set warm reset code and vector here back * to default values. -- GitLab From 00fc57ae06c3519a5e32e39882549e4e1f6328fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 20 Dec 2017 17:57:06 -0800 Subject: [PATCH 0736/1398] n_tty: fix EXTPROC vs ICANON interaction with TIOCINQ (aka FIONREAD) commit 966031f340185eddd05affcf72b740549f056348 upstream. We added support for EXTPROC back in 2010 in commit 26df6d13406d ("tty: Add EXTPROC support for LINEMODE") and the intent was to allow it to override some (all?) ICANON behavior. Quoting from that original commit message: There is a new bit in the termios local flag word, EXTPROC. When this bit is set, several aspects of the terminal driver are disabled. Input line editing, character echo, and mapping of signals are all disabled. This allows the telnetd to turn off these functions when in linemode, but still keep track of what state the user wants the terminal to be in. but the problem turns out that "several aspects of the terminal driver are disabled" is a bit ambiguous, and you can really confuse the n_tty layer by setting EXTPROC and then causing some of the ICANON invariants to no longer be maintained. This fixes at least one such case (TIOCINQ) becoming unhappy because of the confusion over whether ICANON really means ICANON when EXTPROC is set. This basically makes TIOCINQ match the case of read: if EXTPROC is set, we ignore ICANON. Also, make sure to reset the ICANON state ie EXTPROC changes, not just if ICANON changes. Fixes: 26df6d13406d ("tty: Add EXTPROC support for LINEMODE") Reported-by: Tetsuo Handa Reported-by: syzkaller Cc: Jiri Slaby Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index bdf0e6e89991..faf50df81622 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1764,7 +1764,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { struct n_tty_data *ldata = tty->disc_data; - if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { + if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->line_start = ldata->read_tail; if (!L_ICANON(tty) || !read_cnt(ldata)) { @@ -2427,7 +2427,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file, return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: down_write(&tty->termios_rwsem); - if (L_ICANON(tty)) + if (L_ICANON(tty) && !L_EXTPROC(tty)) retval = inq_canon(ldata); else retval = read_cnt(ldata); -- GitLab From d76dabb5af066e8da87924825f725f585a41daa0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2017 15:18:05 +0100 Subject: [PATCH 0737/1398] tty: fix tty_ldisc_receive_buf() documentation commit e7e51dcf3b8a5f65c5653a054ad57eb2492a90d0 upstream. The tty_ldisc_receive_buf() helper returns the number of bytes processed so drop the bogus "not" from the kernel doc comment. Fixes: 8d082cd300ab ("tty: Unify receive_buf() code paths") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index c220c2c0893f..e99f1c5b1df6 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -446,7 +446,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string); * Callers other than flush_to_ldisc() need to exclude the kworker * from concurrent use of the line discipline, see paste_selection(). * - * Returns the number of bytes not processed + * Returns the number of bytes processed */ int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p, char *f, int count) -- GitLab From 181a832c2e26ac7ff1e3b3c8bd6b7e9b8d70f870 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Jun 2017 07:40:25 -0700 Subject: [PATCH 0738/1398] mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP commit 5dd0b16cdaff9b94da06074d5888b03235c0bf17 upstream. This fixes CONFIG_SMP=n, CONFIG_DEBUG_TLBFLUSH=y without introducing further #ifdef soup. Caught by a Kbuild bot randconfig build. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code") Link: http://lkml.kernel.org/r/76da9a3cc4415996f2ad2c905b93414add322021.1496673616.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/vm_event_item.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 4d6ec58a8d45..2edb150f1a4d 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -89,10 +89,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #endif #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ -- GitLab From 07bcb2489b96b2bd8b030822b4495e4a18c7b5da Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 2 Jan 2018 20:35:18 +0100 Subject: [PATCH 0739/1398] Linux 4.9.74 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dc9012a6d14b..075e429732e7 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 73 +SUBLEVEL = 74 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 4406671f9ec830ed2d41bffe28b801daab0bff3b Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 2 Nov 2017 16:16:07 +0100 Subject: [PATCH 0740/1398] UPSTREAM: netfilter: conntrack: use power efficient workqueue conntrack uses the bounded system_long_wq workqueue for its works that don't have to run on the cpu they have been queued. Using bounded workqueue prevents the scheduler to make smart decision about the best place to schedule the work. This patch replaces system_long_wq with system_power_efficient_wq. the work stays bounded to a cpu by default unless the CONFIG_WQ_POWER_EFFICIENT is enable. In the latter case, the work can be scheduled on the best cpu from a power or a performance point of view. Change-Id: I12f862e7476b8e77c50e989ce16bd49b52063c1a (cherry picked from commit 0984d427c1d3cb2aa882c9ad9e787ba973cf2915) Signed-off-by: Vincent Guittot Signed-off-by: Pablo Neira Ayuso Signed-off-by: Viresh Kumar --- net/netfilter/nf_conntrack_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 750b8bf13e60..8da893c397da 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1023,7 +1023,7 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; - queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) @@ -1922,7 +1922,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); + queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); return 0; -- GitLab From 11537d5ac91a51571027818b98a99f43caaacb1b Mon Sep 17 00:00:00 2001 From: Hyojun Kim Date: Thu, 21 Dec 2017 09:57:41 -0800 Subject: [PATCH 0741/1398] blkdev: Refactoring block io latency histogram codes The current io_latency_state structure includes entries for read and write requests. There are special types of write commands such as sync and discard (trim) commands, and the current implementation is not general enough if we want to separate latency histogram for such special commands. This change makes io_latency_state structure request-type neutral. It also changes to print the latency average. Signed-off-by: Hyojun Kim --- block/blk-core.c | 93 +++++++++++++-------------------------- drivers/mmc/core/core.c | 22 ++++++--- drivers/scsi/ufs/ufshcd.c | 23 ++++++---- drivers/scsi/ufs/ufshcd.h | 3 +- include/linux/blkdev.h | 39 +++++----------- include/linux/mmc/host.h | 3 +- 6 files changed, 75 insertions(+), 108 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 4c090a0d8812..b2e57642dbb9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3583,76 +3583,43 @@ int __init blk_dev_init(void) * TODO : If necessary, we can make the histograms per-cpu and aggregate * them when printing them out. */ -void -blk_zero_latency_hist(struct io_latency_state *s) -{ - memset(s->latency_y_axis_read, 0, - sizeof(s->latency_y_axis_read)); - memset(s->latency_y_axis_write, 0, - sizeof(s->latency_y_axis_write)); - s->latency_reads_elems = 0; - s->latency_writes_elems = 0; -} -EXPORT_SYMBOL(blk_zero_latency_hist); - ssize_t -blk_latency_hist_show(struct io_latency_state *s, char *buf) +blk_latency_hist_show(char* name, struct io_latency_state *s, char *buf, + int buf_size) { int i; int bytes_written = 0; u_int64_t num_elem, elem; int pct; - - num_elem = s->latency_reads_elems; - if (num_elem > 0) { - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "IO svc_time Read Latency Histogram (n = %llu):\n", - num_elem); - for (i = 0; - i < ARRAY_SIZE(latency_x_axis_us); - i++) { - elem = s->latency_y_axis_read[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t< %5lluus%15llu%15d%%\n", - latency_x_axis_us[i], - elem, pct); - } - /* Last element in y-axis table is overflow */ - elem = s->latency_y_axis_read[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t> %5dms%15llu%15d%%\n", 10, - elem, pct); - } - num_elem = s->latency_writes_elems; - if (num_elem > 0) { - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "IO svc_time Write Latency Histogram (n = %llu):\n", - num_elem); - for (i = 0; - i < ARRAY_SIZE(latency_x_axis_us); - i++) { - elem = s->latency_y_axis_write[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t< %5lluus%15llu%15d%%\n", - latency_x_axis_us[i], - elem, pct); - } - /* Last element in y-axis table is overflow */ - elem = s->latency_y_axis_write[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t> %5dms%15llu%15d%%\n", 10, - elem, pct); + u_int64_t average; + + num_elem = s->latency_elems; + if (num_elem > 0) { + average = div64_u64(s->latency_sum, s->latency_elems); + bytes_written += scnprintf(buf + bytes_written, + buf_size - bytes_written, + "IO svc_time %s Latency Histogram (n = %llu," + " average = %llu):\n", name, num_elem, average); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %6lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t>=%6lluus%15llu%15d%%\n", + latency_x_axis_us[i - 1], elem, pct); } + return bytes_written; } EXPORT_SYMBOL(blk_latency_hist_show); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 40ddc3e69a4d..00b9d7ef392f 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -209,9 +209,10 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) completion = ktime_get(); delta_us = ktime_us_delta(completion, mrq->io_start); - blk_update_latency_hist(&host->io_lat_s, - (mrq->data->flags & MMC_DATA_READ), - delta_us); + blk_update_latency_hist( + (mrq->data->flags & MMC_DATA_READ) ? + &host->io_lat_read : + &host->io_lat_write, delta_us); } #endif } @@ -3100,8 +3101,14 @@ static ssize_t latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mmc_host *host = cls_dev_to_mmc_host(dev); + size_t written_bytes; - return blk_latency_hist_show(&host->io_lat_s, buf); + written_bytes = blk_latency_hist_show("Read", &host->io_lat_read, + buf, PAGE_SIZE); + written_bytes += blk_latency_hist_show("Write", &host->io_lat_write, + buf + written_bytes, PAGE_SIZE - written_bytes); + + return written_bytes; } /* @@ -3119,9 +3126,10 @@ latency_hist_store(struct device *dev, struct device_attribute *attr, if (kstrtol(buf, 0, &value)) return -EINVAL; - if (value == BLK_IO_LAT_HIST_ZERO) - blk_zero_latency_hist(&host->io_lat_s); - else if (value == BLK_IO_LAT_HIST_ENABLE || + if (value == BLK_IO_LAT_HIST_ZERO) { + memset(&host->io_lat_read, 0, sizeof(host->io_lat_read)); + memset(&host->io_lat_write, 0, sizeof(host->io_lat_write)); + } else if (value == BLK_IO_LAT_HIST_ENABLE || value == BLK_IO_LAT_HIST_DISABLE) host->latency_hist_enabled = value; return count; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index f0a015aca608..fe893f23d693 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3622,10 +3622,10 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, completion = ktime_get(); delta_us = ktime_us_delta(completion, req->lat_hist_io_start); - /* rq_data_dir() => true if WRITE */ - blk_update_latency_hist(&hba->io_lat_s, - (rq_data_dir(req) == READ), - delta_us); + blk_update_latency_hist( + (rq_data_dir(req) == READ) ? + &hba->io_lat_read : + &hba->io_lat_write, delta_us); } } /* Do not touch lrbp after scsi done */ @@ -6371,9 +6371,10 @@ latency_hist_store(struct device *dev, struct device_attribute *attr, if (kstrtol(buf, 0, &value)) return -EINVAL; - if (value == BLK_IO_LAT_HIST_ZERO) - blk_zero_latency_hist(&hba->io_lat_s); - else if (value == BLK_IO_LAT_HIST_ENABLE || + if (value == BLK_IO_LAT_HIST_ZERO) { + memset(&hba->io_lat_read, 0, sizeof(hba->io_lat_read)); + memset(&hba->io_lat_write, 0, sizeof(hba->io_lat_write)); + } else if (value == BLK_IO_LAT_HIST_ENABLE || value == BLK_IO_LAT_HIST_DISABLE) hba->latency_hist_enabled = value; return count; @@ -6384,8 +6385,14 @@ latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ufs_hba *hba = dev_get_drvdata(dev); + size_t written_bytes; - return blk_latency_hist_show(&hba->io_lat_s, buf); + written_bytes = blk_latency_hist_show("Read", &hba->io_lat_read, + buf, PAGE_SIZE); + written_bytes += blk_latency_hist_show("Write", &hba->io_lat_write, + buf + written_bytes, PAGE_SIZE - written_bytes); + + return written_bytes; } static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index dbe68b27c6cb..b35a5b999412 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -564,7 +564,8 @@ struct ufs_hba { enum bkops_status urgent_bkops_lvl; bool is_urgent_bkops_lvl_checked; int latency_hist_enabled; - struct io_latency_state io_lat_s; + struct io_latency_state io_lat_read; + struct io_latency_state io_lat_write; }; /* Returns true if clocks can be gated. Otherwise false */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e47a7f7025a0..fe8dd2797b1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1738,43 +1738,26 @@ static const u_int64_t latency_x_axis_us[] = { #define BLK_IO_LAT_HIST_ZERO 2 struct io_latency_state { - u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1]; - u_int64_t latency_reads_elems; - u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1]; - u_int64_t latency_writes_elems; + u_int64_t latency_y_axis[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_elems; + u_int64_t latency_sum; }; static inline void -blk_update_latency_hist(struct io_latency_state *s, - int read, - u_int64_t delta_us) +blk_update_latency_hist(struct io_latency_state *s, u_int64_t delta_us) { int i; - for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) { - if (delta_us < (u_int64_t)latency_x_axis_us[i]) { - if (read) - s->latency_y_axis_read[i]++; - else - s->latency_y_axis_write[i]++; + for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) + if (delta_us < (u_int64_t)latency_x_axis_us[i]) break; - } - } - if (i == ARRAY_SIZE(latency_x_axis_us)) { - /* Overflowed the histogram */ - if (read) - s->latency_y_axis_read[i]++; - else - s->latency_y_axis_write[i]++; - } - if (read) - s->latency_reads_elems++; - else - s->latency_writes_elems++; + s->latency_y_axis[i]++; + s->latency_elems++; + s->latency_sum += delta_us; } -void blk_zero_latency_hist(struct io_latency_state *s); -ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf); +ssize_t blk_latency_hist_show(char* name, struct io_latency_state *s, + char *buf, int buf_size); #else /* CONFIG_BLOCK */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index fac3b5c27f4f..1808b7895dac 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -409,7 +409,8 @@ struct mmc_host { #ifdef CONFIG_BLOCK int latency_hist_enabled; - struct io_latency_state io_lat_s; + struct io_latency_state io_lat_read; + struct io_latency_state io_lat_write; #endif unsigned long private[0] ____cacheline_aligned; -- GitLab From f18c44dc552e2ed0655ed5ec49b578da4dd30588 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 20 Dec 2017 16:59:11 -0800 Subject: [PATCH 0742/1398] ANDROID: sdcardfs: notify lower file of opens fsnotify_open is not called within dentry_open, so we need to call it ourselves. Change-Id: Ia7f323b3d615e6ca5574e114e8a5d7973fb4c119 Signed-off-by: Daniel Rosenberg Bug: 70706497 --- fs/sdcardfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index 5ac0b0bbb0ec..dd76ecf33cf3 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -18,6 +18,7 @@ * General Public License. */ +#include #include "sdcardfs.h" #ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE #include @@ -259,6 +260,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) fput(lower_file); /* fput calls dput for lower_dentry */ } } else { + fsnotify_open(lower_file); sdcardfs_set_lower_file(file, lower_file); } -- GitLab From 173c52eae9283363f730c69bfc8694f331571fd3 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 2 Jan 2018 14:44:49 -0800 Subject: [PATCH 0743/1398] ANDROID: sdcardfs: Add default_normal option The default_normal option causes mounts with the gid set to AID_SDCARD_RW to have user specific gids, as in the normal case. Signed-off-by: Daniel Rosenberg Change-Id: I9619b8ac55f41415df943484dc8db1ea986cef6f Bug: 64672411 --- fs/sdcardfs/main.c | 6 ++++++ fs/sdcardfs/sdcardfs.h | 3 ++- fs/sdcardfs/super.c | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 0a2b5167e9a2..3d1023a7ff7c 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -33,6 +33,7 @@ enum { Opt_userid, Opt_reserved_mb, Opt_gid_derivation, + Opt_default_normal, Opt_err, }; @@ -45,6 +46,7 @@ static const match_table_t sdcardfs_tokens = { {Opt_userid, "userid=%d"}, {Opt_multiuser, "multiuser"}, {Opt_gid_derivation, "derive_gid"}, + {Opt_default_normal, "default_normal"}, {Opt_reserved_mb, "reserved_mb=%u"}, {Opt_err, NULL} }; @@ -68,6 +70,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->reserved_mb = 0; /* by default, gid derivation is off */ opts->gid_derivation = false; + vfsopts->default_normal = false; *debug = 0; @@ -122,6 +125,8 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_gid_derivation: opts->gid_derivation = true; break; + case Opt_default_normal: + vfsopts->default_normal = true; /* unknown option */ default: if (!silent) @@ -175,6 +180,7 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, return 0; vfsopts->mask = option; break; + case Opt_default_normal: case Opt_multiuser: case Opt_userid: case Opt_fsuid: diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index d1d8bab00fe5..365b302149c5 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -226,6 +226,7 @@ struct sdcardfs_mount_options { struct sdcardfs_vfsmount_options { gid_t gid; mode_t mask; + bool default_normal; }; extern int parse_options_remount(struct super_block *sb, char *options, int silent, @@ -417,7 +418,7 @@ static inline int get_gid(struct vfsmount *mnt, { struct sdcardfs_vfsmount_options *opts = mnt->data; - if (opts->gid == AID_SDCARD_RW) + if (opts->gid == AID_SDCARD_RW && !opts->default_normal) /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index b89947d878e3..a28b40f5adc8 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -304,6 +304,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_printf(m, ",userid=%u", opts->fs_user_id); if (opts->gid_derivation) seq_puts(m, ",derive_gid"); + if (vfsopts->default_normal) + seq_puts(m, ",default_normal"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); -- GitLab From 61c51da2b4bd8abb2920f4dec0861a6fe5b8b486 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 7 Dec 2017 12:43:31 -0500 Subject: [PATCH 0744/1398] tcp_bbr: reset full pipe detection on loss recovery undo commit 2f6c498e4f15d27852c04ed46d804a39137ba364 upstream. Fix BBR so that upon notification of a loss recovery undo BBR resets the full pipe detection (STARTUP exit) state machine. Under high reordering, reordering events can be interpreted as loss. If the reordering and spurious loss estimates are high enough, this could previously cause BBR to spuriously estimate that the pipe is full. Since spurious loss recovery means that our overall sending will have slowed down spuriously, this commit gives a flow more time to probe robustly for bandwidth and decide the pipe is really full. Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 97f9cac98348..64ff4966aad6 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -843,6 +843,10 @@ static u32 bbr_sndbuf_expand(struct sock *sk) */ static u32 bbr_undo_cwnd(struct sock *sk) { + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; return tcp_sk(sk)->snd_cwnd; } -- GitLab From 8824b2d7abfb113e33eae268ea22b11eb15b30b4 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 7 Dec 2017 12:43:32 -0500 Subject: [PATCH 0745/1398] tcp_bbr: reset long-term bandwidth sampling on loss recovery undo commit 600647d467c6d04b3954b41a6ee1795b5ae00550 upstream. Fix BBR so that upon notification of a loss recovery undo BBR resets long-term bandwidth sampling. Under high reordering, reordering events can be interpreted as loss. If the reordering and spurious loss estimates are high enough, this can cause BBR to spuriously estimate that we are seeing loss rates high enough to trigger long-term bandwidth estimation. To avoid that problem, this commit resets long-term bandwidth sampling on loss recovery undo events. Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 64ff4966aad6..e86a34fd5484 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -847,6 +847,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ bbr->full_bw_cnt = 0; + bbr_reset_lt_bw_sampling(sk); return tcp_sk(sk)->snd_cwnd; } -- GitLab From b5fd58e997cf6294d2c027d585f7dcbd1e096bce Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:33 -0500 Subject: [PATCH 0746/1398] x86/boot: Add early cmdline parsing for options with arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e505371dd83963caae1a37ead9524e8d997341be upstream. Add a cmdline_find_option() function to look for cmdline options that take arguments. The argument is returned in a supplied buffer and the argument length (regardless of whether it fits in the supplied buffer) is returned, with -1 indicating not found. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cmdline.h | 2 + arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index e01f7f7ccb0c..84ae170bc3d0 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,5 +2,7 @@ #define _ASM_X86_CMDLINE_H int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); +int cmdline_find_option(const char *cmdline_ptr, const char *option, + char *buffer, int bufsize); #endif /* _ASM_X86_CMDLINE_H */ diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 5cc78bf57232..3261abb21ef4 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, return 0; /* Buffer overrun */ } +/* + * Find a non-boolean option (i.e. option=argument). In accordance with + * standard Linux practice, if this option is repeated, this returns the + * last instance on the command line. + * + * @cmdline: the cmdline string + * @max_cmdline_size: the maximum size of cmdline + * @option: option string to look for + * @buffer: memory buffer to return the option argument + * @bufsize: size of the supplied memory buffer + * + * Returns the length of the argument (regardless of if it was + * truncated to fit in the buffer), or -1 on not found. + */ +static int +__cmdline_find_option(const char *cmdline, int max_cmdline_size, + const char *option, char *buffer, int bufsize) +{ + char c; + int pos = 0, len = -1; + const char *opptr = NULL; + char *bufptr = buffer; + enum { + st_wordstart = 0, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + st_bufcpy, /* Copying this to buffer */ + } state = st_wordstart; + + if (!cmdline) + return -1; /* No command line */ + + /* + * This 'pos' check ensures we do not overrun + * a non-NULL-terminated 'cmdline' + */ + while (pos++ < max_cmdline_size) { + c = *(char *)cmdline++; + if (!c) + break; + + switch (state) { + case st_wordstart: + if (myisspace(c)) + break; + + state = st_wordcmp; + opptr = option; + /* fall through */ + + case st_wordcmp: + if ((c == '=') && !*opptr) { + /* + * We matched all the way to the end of the + * option we were looking for, prepare to + * copy the argument. + */ + len = 0; + bufptr = buffer; + state = st_bufcpy; + break; + } else if (c == *opptr++) { + /* + * We are currently matching, so continue + * to the next character on the cmdline. + */ + break; + } + state = st_wordskip; + /* fall through */ + + case st_wordskip: + if (myisspace(c)) + state = st_wordstart; + break; + + case st_bufcpy: + if (myisspace(c)) { + state = st_wordstart; + } else { + /* + * Increment len, but don't overrun the + * supplied buffer and leave room for the + * NULL terminator. + */ + if (++len < bufsize) + *bufptr++ = c; + } + break; + } + } + + if (bufsize) + *bufptr = '\0'; + + return len; +} + int cmdline_find_option_bool(const char *cmdline, const char *option) { return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); } + +int cmdline_find_option(const char *cmdline, const char *option, char *buffer, + int bufsize) +{ + return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, + buffer, bufsize); +} -- GitLab From 13be4483bb487176c48732b887780630a141ae96 Mon Sep 17 00:00:00 2001 From: Richard Fellner Date: Thu, 4 May 2017 14:26:50 +0200 Subject: [PATCH 0747/1398] KAISER: Kernel Address Isolation This patch introduces our implementation of KAISER (Kernel Address Isolation to have Side-channels Efficiently Removed), a kernel isolation technique to close hardware side channels on kernel address information. More information about the patch can be found on: https://github.com/IAIK/KAISER From: Richard Fellner From: Daniel Gruss Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode Date: Thu, 4 May 2017 14:26:50 +0200 Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2 Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5 To: To: Cc: Cc: Cc: Michael Schwarz Cc: Richard Fellner Cc: Ingo Molnar Cc: Cc: After several recent works [1,2,3] KASLR on x86_64 was basically considered dead by many researchers. We have been working on an efficient but effective fix for this problem and found that not mapping the kernel space when running in user mode is the solution to this problem [4] (the corresponding paper [5] will be presented at ESSoS17). With this RFC patch we allow anybody to configure their kernel with the flag CONFIG_KAISER to add our defense mechanism. If there are any questions we would love to answer them. We also appreciate any comments! Cheers, Daniel (+ the KAISER team from Graz University of Technology) [1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf [2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf [3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf [4] https://github.com/IAIK/KAISER [5] https://gruss.cc/files/kaiser.pdf [patch based also on https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch] Signed-off-by: Richard Fellner Signed-off-by: Moritz Lipp Signed-off-by: Daniel Gruss Signed-off-by: Michael Schwarz Acked-by: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 17 +++ arch/x86/entry/entry_64_compat.S | 7 +- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/include/asm/kaiser.h | 113 +++++++++++++++++++ arch/x86/include/asm/pgtable.h | 4 + arch/x86/include/asm/pgtable_64.h | 21 ++++ arch/x86/include/asm/pgtable_types.h | 12 +- arch/x86/include/asm/processor.h | 7 +- arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/espfix_64.c | 6 + arch/x86/kernel/head_64.S | 16 ++- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/process.c | 2 +- arch/x86/mm/Makefile | 2 +- arch/x86/mm/kaiser.c | 160 +++++++++++++++++++++++++++ arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pgtable.c | 26 +++++ include/asm-generic/vmlinux.lds.h | 11 +- include/linux/percpu-defs.h | 30 +++++ init/main.c | 6 + kernel/fork.c | 8 ++ security/Kconfig | 7 ++ 22 files changed, 449 insertions(+), 16 deletions(-) create mode 100644 arch/x86/include/asm/kaiser.h create mode 100644 arch/x86/mm/kaiser.c diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7b0e7ff4c58..9467a2c4bc60 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,6 +36,7 @@ #include #include #include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK /* * A hypervisor implementation might want to use a label * after the swapgs, so that it can do the swapgs @@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath: movq RIP(%rsp), %rcx movq EFLAGS(%rsp), %r11 RESTORE_C_REGS_EXCEPT_RCX_R11 + SWITCH_USER_CR3 movq RSP(%rsp), %rsp USERGS_SYSRET64 @@ -323,10 +326,12 @@ return_from_SYSCALL_64: syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 + SWITCH_USER_CR3 movq RSP(%rsp), %rsp USERGS_SYSRET64 opportunistic_sysret_failed: + SWITCH_USER_CR3 SWAPGS jmp restore_c_regs_and_iret END(entry_SYSCALL_64) @@ -424,6 +429,7 @@ ENTRY(ret_from_fork) movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ + SWITCH_USER_CR3 SWAPGS jmp restore_regs_and_iret @@ -478,6 +484,7 @@ END(irq_entries_start) * tracking that we're in kernel mode. */ SWAPGS + SWITCH_KERNEL_CR3 /* * We need to tell lockdep that IRQs are off. We can't do this until @@ -535,6 +542,7 @@ GLOBAL(retint_user) mov %rsp,%rdi call prepare_exit_to_usermode TRACE_IRQS_IRETQ + SWITCH_USER_CR3 SWAPGS jmp restore_regs_and_iret @@ -612,6 +620,7 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ SWAPGS + SWITCH_KERNEL_CR3 movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -638,6 +647,7 @@ native_irq_return_ldt: * still points to an RO alias of the ESPFIX stack. */ orq PER_CPU_VAR(espfix_stack), %rax + SWITCH_USER_CR3 SWAPGS movq %rax, %rsp @@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry) testl %edx, %edx js 1f /* negative -> in kernel */ SWAPGS + SWITCH_KERNEL_CR3 xorl %ebx, %ebx 1: ret END(paranoid_entry) @@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz paranoid_exit_no_swapgs TRACE_IRQS_IRETQ + SWITCH_USER_CR3_NO_STACK SWAPGS_UNSAFE_STACK jmp paranoid_exit_restore paranoid_exit_no_swapgs: @@ -1084,6 +1096,7 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + SWITCH_KERNEL_CR3 .Lerror_entry_from_usermode_after_swapgs: /* @@ -1135,6 +1148,7 @@ ENTRY(error_entry) * Switch to kernel gsbase: */ SWAPGS + SWITCH_KERNEL_CR3 /* * Pretend that the exception came from user mode: set up pt_regs @@ -1235,6 +1249,7 @@ ENTRY(nmi) */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -1275,6 +1290,7 @@ ENTRY(nmi) * work, because we don't want to enable interrupts. Fortunately, * do_nmi doesn't modify pt_regs. */ + SWITCH_USER_CR3 SWAPGS jmp restore_c_regs_and_iret @@ -1486,6 +1502,7 @@ end_repeat_nmi: testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: + SWITCH_USER_CR3_NO_STACK SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_EXTRA_REGS diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e1721dafbcb1..f0e384ee8fc6 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,7 @@ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat) ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK /* Stash user ESP and switch to the kernel stack. */ movl %esp, %r8d @@ -259,6 +262,7 @@ sysret32_from_system_call: xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r10 + SWITCH_USER_CR3 movq RSP-ORIG_RAX(%rsp), %rsp swapgs sysretl @@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat) PARAVIRT_ADJUST_EXCEPTION_FRAME ASM_CLAC /* Do this early to minimize exposure */ SWAPGS - + SWITCH_KERNEL_CR3_NO_STACK /* * User tracing code (ptrace or signal handlers) might assume that * the saved RAX contains a 32-bit number when we're invoking a 32-bit @@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat) /* Go back to user mode. */ TRACE_IRQS_ON + SWITCH_USER_CR3_NO_STACK SWAPGS jmp restore_regs_and_iret END(entry_INT80_compat) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b90e1053049b..0817d63bce41 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -178,7 +178,7 @@ extern char irq_entries_start[]; #define VECTOR_RETRIGGERED ((void *)~0UL) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; -DECLARE_PER_CPU(vector_irq_t, vector_irq); +DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq); #endif /* !ASSEMBLY_ */ diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h new file mode 100644 index 000000000000..63ee8309b35b --- /dev/null +++ b/arch/x86/include/asm/kaiser.h @@ -0,0 +1,113 @@ +#ifndef _ASM_X86_KAISER_H +#define _ASM_X86_KAISER_H + +/* This file includes the definitions for the KAISER feature. + * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory. + * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped, + * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled, + * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled. + * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory. + * + * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions + * of the user space, or the stacks. + */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_KAISER + +.macro _SWITCH_TO_KERNEL_CR3 reg +movq %cr3, \reg +andq $(~0x1000), \reg +movq \reg, %cr3 +.endm + +.macro _SWITCH_TO_USER_CR3 reg +movq %cr3, \reg +orq $(0x1000), \reg +movq \reg, %cr3 +.endm + +.macro SWITCH_KERNEL_CR3 +pushq %rax +_SWITCH_TO_KERNEL_CR3 %rax +popq %rax +.endm + +.macro SWITCH_USER_CR3 +pushq %rax +_SWITCH_TO_USER_CR3 %rax +popq %rax +.endm + +.macro SWITCH_KERNEL_CR3_NO_STACK +movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) +_SWITCH_TO_KERNEL_CR3 %rax +movq PER_CPU_VAR(unsafe_stack_register_backup), %rax +.endm + + +.macro SWITCH_USER_CR3_NO_STACK + +movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) +_SWITCH_TO_USER_CR3 %rax +movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + +.endm + +#else /* CONFIG_KAISER */ + +.macro SWITCH_KERNEL_CR3 reg +.endm +.macro SWITCH_USER_CR3 reg +.endm +.macro SWITCH_USER_CR3_NO_STACK +.endm +.macro SWITCH_KERNEL_CR3_NO_STACK +.endm + +#endif /* CONFIG_KAISER */ +#else /* __ASSEMBLY__ */ + + +#ifdef CONFIG_KAISER +// Upon kernel/user mode switch, it may happen that +// the address space has to be switched before the registers have been stored. +// To change the address space, another register is needed. +// A register therefore has to be stored/restored. +// +DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +#endif /* CONFIG_KAISER */ + +/** + * shadowmem_add_mapping - map a virtual memory part to the shadow mapping + * @addr: the start address of the range + * @size: the size of the range + * @flags: The mapping flags of the pages + * + * the mapping is done on a global scope, so no bigger synchronization has to be done. + * the pages have to be manually unmapped again when they are not needed any longer. + */ +extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); + + +/** + * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping + * @addr: the start address of the range + * @size: the size of the range + */ +extern void kaiser_remove_mapping(unsigned long start, unsigned long size); + +/** + * shadowmem_initialize_mapping - Initalize the shadow mapping + * + * most parts of the shadow mapping can be mapped upon boot time. + * only the thread stacks have to be mapped on runtime. + * the mapped regions are not unmapped at all. + */ +extern void kaiser_init(void); + +#endif + + + +#endif /* _ASM_X86_KAISER_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 437feb436efa..4b479c9b064f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_KAISER + // clone the shadow pgd part as well + memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t)); +#endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1cc82ece9ac1..e6ea39fa6d0b 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_t *pud) native_set_pud(pud, native_make_pud(0)); } +#ifdef CONFIG_KAISER +static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) { + return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); +} + +static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) { + return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); +} +#endif /* CONFIG_KAISER */ + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { +#ifdef CONFIG_KAISER + // We know that a pgd is page aligned. + // Therefore the lower indices have to be mapped to user space. + // These pages are mapped to the shadow mapping. + if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) { + native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; + } + + pgdp->pgd = pgd.pgd & ~_PAGE_USER; +#else /* CONFIG_KAISER */ *pgdp = pgd; +#endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..00fecbb153ac 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -45,7 +45,11 @@ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) -#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#ifdef CONFIG_KAISER +#define _PAGE_GLOBAL (_AT(pteval_t, 0)) +#else +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#endif #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) @@ -119,7 +123,11 @@ #define _PAGE_DEVMAP (_AT(pteval_t, 0)) #endif -#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +#ifdef CONFIG_KAISER +#define _PAGE_PROTNONE (_AT(pteval_t, 0)) +#else +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +#endif #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ _PAGE_ACCESSED | _PAGE_DIRTY) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 83db0eae9979..3d4784e2f862 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -308,7 +308,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); @@ -335,6 +335,11 @@ union irq_stack_union { char gs_base[40]; unsigned long stack_canary; }; + + struct { + char irq_stack_pointer[64]; + char unused[IRQ_STACK_SIZE - 64]; + }; }; DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 91588be529b9..3efde13eaed0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu = { static const struct cpu_dev *this_cpu = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 /* * We need valid kernel segments for data and code in long mode too @@ -1365,7 +1365,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [DEBUG_STACK - 1] = DEBUG_STKSZ }; -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks +DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); /* May not be marked __init: used by software suspend */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 04f89caef9c4..9ff875a1aa24 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Note: we only need 6*8 = 48 bytes for the espfix stack, but round @@ -126,6 +127,11 @@ void __init init_espfix_bsp(void) /* Install the espfix pud into the kernel page directory */ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); +#ifdef CONFIG_KAISER + // add the esp stack pud to the shadow mapping here. + // This can be done directly, because the fixup stack has its own pud + set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page))); +#endif /* Randomize the locations */ init_espfix_random(); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b4421cc191b0..9e849b520780 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) +#ifdef CONFIG_KAISER +#define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ +GLOBAL(name) +#else +#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#endif + /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ @@ -414,7 +422,7 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_level4_pgt) +NEXT_PGD_PAGE(early_level4_pgt) .fill 511,8,0 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE @@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts) .data #ifndef CONFIG_XEN -NEXT_PAGE(init_level4_pgt) - .fill 512,8,0 +NEXT_PGD_PAGE(init_level4_pgt) + .fill 2*512,8,0 #else -NEXT_PAGE(init_level4_pgt) +NEXT_PGD_PAGE(init_level4_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1423ab1b0312..f480b38a03c3 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -51,7 +51,7 @@ static struct irqaction irq2 = { .flags = IRQF_NO_THREAD, }; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { +DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = { [0 ... NR_VECTORS - 1] = VECTOR_UNUSED, }; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8e10e72bf6ee..a55b32007785 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,7 +41,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = { .x86_tss = { .sp0 = TOP_OF_INIT_STACK, #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 96d2b847e09e..682c162333ba 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o - +obj-$(CONFIG_KAISER) += kaiser.o diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c new file mode 100644 index 000000000000..cf1bb922d467 --- /dev/null +++ b/arch/x86/mm/kaiser.c @@ -0,0 +1,160 @@ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#ifdef CONFIG_KAISER + +__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +/** + * Get the real ppn from a address in kernel mapping. + * @param address The virtual adrress + * @return the physical address + */ +static inline unsigned long get_pa_from_mapping (unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(address); + BUG_ON(pgd_none(*pgd) || pgd_large(*pgd)); + + pud = pud_offset(pgd, address); + BUG_ON(pud_none(*pud)); + + if (pud_large(*pud)) { + return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK); + } + + pmd = pmd_offset(pud, address); + BUG_ON(pmd_none(*pmd)); + + if (pmd_large(*pmd)) { + return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK); + } + + pte = pte_offset_kernel(pmd, address); + BUG_ON(pte_none(*pte)); + + return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK); +} + +void _kaiser_copy (unsigned long start_addr, unsigned long size, + unsigned long flags) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long address; + unsigned long end_addr = start_addr + size; + unsigned long target_address; + + for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1)); + address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); + + pgd = native_get_shadow_pgd(pgd_offset_k(address)); + + BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n"); + BUG_ON(pgd_large(*pgd)); + + pud = pud_offset(pgd, address); + if (pud_none(*pud)) { + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address)))); + } + BUG_ON(pud_large(*pud)); + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address)))); + } + BUG_ON(pmd_large(*pmd)); + + pte = pte_offset_kernel(pmd, address); + if (pte_none(*pte)) { + set_pte(pte, __pte(flags | target_address)); + } else { + BUG_ON(__pa(pte_page(*pte)) != target_address); + } + } +} + +// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping +static inline void __init _kaiser_init(void) +{ + pgd_t *pgd; + int i = 0; + + pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); + for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { + set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0)))); + } +} + +extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +spinlock_t shadow_table_lock; +void __init kaiser_init(void) +{ + int cpu; + spin_lock_init(&shadow_table_lock); + + spin_lock(&shadow_table_lock); + + _kaiser_init(); + + for_each_possible_cpu(cpu) { + // map the per cpu user variables + _kaiser_copy( + (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)), + (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start, + __PAGE_KERNEL); + } + + // map the entry/exit text section, which is responsible to switch between user- and kernel mode + _kaiser_copy( + (unsigned long) __entry_text_start, + (unsigned long) __entry_text_end - (unsigned long) __entry_text_start, + __PAGE_KERNEL_RX); + + // the fixed map address of the idt_table + _kaiser_copy( + (unsigned long) idt_descr.address, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL_RO); + + spin_unlock(&shadow_table_lock); +} + +// add a mapping to the shadow-mapping, and synchronize the mappings +void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +{ + spin_lock(&shadow_table_lock); + _kaiser_copy(addr, size, flags); + spin_unlock(&shadow_table_lock); +} + +extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end); +void kaiser_remove_mapping(unsigned long start, unsigned long size) +{ + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start)); + spin_lock(&shadow_table_lock); + do { + unmap_pud_range(pgd, start, start + size); + } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size))); + spin_unlock(&shadow_table_lock); +} +#endif /* CONFIG_KAISER */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e3353c97d086..c17412f92d77 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) pud_clear(pud); } -static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) { pud_t *pud = pud_offset(pgd, start); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3feec5af4e67..27d218b38538 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd) #else static inline pgd_t *_pgd_alloc(void) { +#ifdef CONFIG_KAISER + // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory + // block. Therefore, we have to allocate at least 3 pages. However, the + // __get_free_pages returns us 4 pages. Hence, we store the base pointer at + // the beginning of the page of our 8kb-aligned memory block in order to + // correctly free it afterwars. + + unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE)); + + if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages) + { + *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages; + return (pgd_t *) pages; + } + else + { + *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages; + return (pgd_t *) (pages + PAGE_SIZE); + } +#else return (pgd_t *)__get_free_page(PGALLOC_GFP); +#endif } static inline void _pgd_free(pgd_t *pgd) { +#ifdef CONFIG_KAISER + unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE)); + free_pages(pages, get_order(4*PAGE_SIZE)); +#else free_page((unsigned long)pgd); +#endif } #endif /* CONFIG_X86_PAE */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dc81e5287ebf..d6ab14414776 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -778,7 +778,16 @@ */ #define PERCPU_INPUT(cacheline) \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ + \ + VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ + *(.data..percpu..first) \ + . = ALIGN(cacheline); \ + *(.data..percpu..user_mapped) \ + *(.data..percpu..user_mapped..shared_aligned) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..user_mapped..page_aligned) \ + VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ + \ . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f16299ca068..8ea945f63a05 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -35,6 +35,12 @@ #endif +#ifdef CONFIG_KAISER +#define USER_MAPPED_SECTION "..user_mapped" +#else +#define USER_MAPPED_SECTION "" +#endif + /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the @@ -115,6 +121,12 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") +#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + +#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + /* * Declaration/definition used for per-CPU variables that must come first in * the set of variables. @@ -144,6 +156,14 @@ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp +#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + #define DECLARE_PER_CPU_ALIGNED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ ____cacheline_aligned @@ -162,6 +182,16 @@ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. + */ +#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) + +#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) /* * Declaration/definition used for per-CPU variables that must be read mostly. diff --git a/init/main.c b/init/main.c index 25bac88bc66e..2c009f77e655 100644 --- a/init/main.c +++ b/init/main.c @@ -86,6 +86,9 @@ #include #include #include +#ifdef CONFIG_KAISER +#include +#endif static int kernel_init(void *); @@ -473,6 +476,9 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); +#ifdef CONFIG_KAISER + kaiser_init(); +#endif } asmlinkage __visible void __init start_kernel(void) diff --git a/kernel/fork.c b/kernel/fork.c index 9321b1ad3335..4014be1dd2b6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) #endif } +extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size); static inline void free_thread_stack(struct task_struct *tsk) { +#ifdef CONFIG_KAISER + kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE); +#endif #ifdef CONFIG_VMAP_STACK if (task_stack_vm_area(tsk)) { unsigned long flags; @@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) *stackend = STACK_END_MAGIC; /* for overflow detection */ } +extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; @@ -495,6 +500,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) * functions again. */ tsk->stack = stack; +#ifdef CONFIG_KAISER + kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); +#endif #ifdef CONFIG_VMAP_STACK tsk->stack_vm_area = stack_vm_area; #endif diff --git a/security/Kconfig b/security/Kconfig index 118f4549404e..f515ac302257 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -30,6 +30,13 @@ config SECURITY model will be used. If you are unsure how to answer this question, answer N. +config KAISER + bool "Remove the kernel mapping in user mode" + depends on X86_64 + depends on !PARAVIRT + help + This enforces a strict kernel and user space isolation in order to close + hardware side channels on kernel address information. config SECURITYFS bool "Enable the securityfs filesystem" -- GitLab From 8f0baadf2bea3861217763734b57e1dd2db703dd Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 30 Aug 2017 16:23:00 -0700 Subject: [PATCH 0748/1398] kaiser: merged update Merged fixes and cleanups, rebased to 4.9.51 tree (no 5-level paging). Signed-off-by: Dave Hansen Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 105 +++++++-- arch/x86/include/asm/kaiser.h | 43 ++-- arch/x86/include/asm/pgtable.h | 18 +- arch/x86/include/asm/pgtable_64.h | 48 +++- arch/x86/include/asm/pgtable_types.h | 6 +- arch/x86/kernel/espfix_64.c | 13 +- arch/x86/kernel/head_64.S | 19 +- arch/x86/kernel/ldt.c | 27 ++- arch/x86/kernel/tracepoint.c | 2 + arch/x86/mm/kaiser.c | 313 ++++++++++++++++++++------- arch/x86/mm/pageattr.c | 63 ++++-- arch/x86/mm/pgtable.c | 40 ++-- include/linux/kaiser.h | 26 +++ kernel/fork.c | 9 +- security/Kconfig | 5 + 15 files changed, 549 insertions(+), 188 deletions(-) create mode 100644 include/linux/kaiser.h diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9467a2c4bc60..2869fc178658 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -230,6 +230,13 @@ entry_SYSCALL_64_fastpath: movq RIP(%rsp), %rcx movq EFLAGS(%rsp), %r11 RESTORE_C_REGS_EXCEPT_RCX_R11 + /* + * This opens a window where we have a user CR3, but are + * running in the kernel. This makes using the CS + * register useless for telling whether or not we need to + * switch CR3 in NMIs. Normal interrupts are OK because + * they are off here. + */ SWITCH_USER_CR3 movq RSP(%rsp), %rsp USERGS_SYSRET64 @@ -326,11 +333,25 @@ return_from_SYSCALL_64: syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 + /* + * This opens a window where we have a user CR3, but are + * running in the kernel. This makes using the CS + * register useless for telling whether or not we need to + * switch CR3 in NMIs. Normal interrupts are OK because + * they are off here. + */ SWITCH_USER_CR3 movq RSP(%rsp), %rsp USERGS_SYSRET64 opportunistic_sysret_failed: + /* + * This opens a window where we have a user CR3, but are + * running in the kernel. This makes using the CS + * register useless for telling whether or not we need to + * switch CR3 in NMIs. Normal interrupts are OK because + * they are off here. + */ SWITCH_USER_CR3 SWAPGS jmp restore_c_regs_and_iret @@ -1087,6 +1108,13 @@ ENTRY(error_entry) cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 + /* + * error_entry() always returns with a kernel gsbase and + * CR3. We must also have a kernel CR3/gsbase before + * calling TRACE_IRQS_*. Just unconditionally switch to + * the kernel CR3 here. + */ + SWITCH_KERNEL_CR3 xorl %ebx, %ebx testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1096,7 +1124,6 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS - SWITCH_KERNEL_CR3 .Lerror_entry_from_usermode_after_swapgs: /* @@ -1148,7 +1175,6 @@ ENTRY(error_entry) * Switch to kernel gsbase: */ SWAPGS - SWITCH_KERNEL_CR3 /* * Pretend that the exception came from user mode: set up pt_regs @@ -1249,7 +1275,10 @@ ENTRY(nmi) */ SWAPGS_UNSAFE_STACK - SWITCH_KERNEL_CR3_NO_STACK + /* + * percpu variables are mapped with user CR3, so no need + * to switch CR3 here. + */ cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -1283,14 +1312,33 @@ ENTRY(nmi) movq %rsp, %rdi movq $-1, %rsi +#ifdef CONFIG_KAISER + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +#ifdef CONFIG_KAISER_REAL_SWITCH + andq $(~0x1000), %rax +#endif + movq %rax, %cr3 +#endif call do_nmi + /* + * Unconditionally restore CR3. I know we return to + * kernel code that needs user CR3, but do we ever return + * to "user mode" where we need the kernel CR3? + */ +#ifdef CONFIG_KAISER + popq %rax + mov %rax, %cr3 +#endif /* * Return back to user mode. We must *not* do the normal exit - * work, because we don't want to enable interrupts. Fortunately, - * do_nmi doesn't modify pt_regs. + * work, because we don't want to enable interrupts. Do not + * switch to user CR3: we might be going back to kernel code + * that had a user CR3 set. */ - SWITCH_USER_CR3 SWAPGS jmp restore_c_regs_and_iret @@ -1486,23 +1534,54 @@ end_repeat_nmi: ALLOC_PT_GPREGS_ON_STACK /* - * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit - * as we should not be calling schedule in NMI context. - * Even with normal interrupts enabled. An NMI should not be - * setting NEED_RESCHED or anything that normal interrupts and - * exceptions might do. + * Use the same approach as paranoid_entry to handle SWAPGS, but + * without CR3 handling since we do that differently in NMIs. No + * need to use paranoid_exit as we should not be calling schedule + * in NMI context. Even with normal interrupts enabled. An NMI + * should not be setting NEED_RESCHED or anything that normal + * interrupts and exceptions might do. */ - call paranoid_entry + cld + SAVE_C_REGS + SAVE_EXTRA_REGS + movl $1, %ebx + movl $MSR_GS_BASE, %ecx + rdmsr + testl %edx, %edx + js 1f /* negative -> in kernel */ + SWAPGS + xorl %ebx, %ebx +1: +#ifdef CONFIG_KAISER + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +#ifdef CONFIG_KAISER_REAL_SWITCH + andq $(~0x1000), %rax +#endif + movq %rax, %cr3 +#endif /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi + addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */ movq $-1, %rsi call do_nmi + /* + * Unconditionally restore CR3. We might be returning to + * kernel code that needs user CR3, like just just before + * a sysret. + */ +#ifdef CONFIG_KAISER + popq %rax + mov %rax, %cr3 +#endif testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: - SWITCH_USER_CR3_NO_STACK + /* We fixed up CR3 above, so no need to switch it here */ SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_EXTRA_REGS diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 63ee8309b35b..0703f48777f3 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -16,13 +16,17 @@ .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg +#ifdef CONFIG_KAISER_REAL_SWITCH andq $(~0x1000), \reg +#endif movq \reg, %cr3 .endm .macro _SWITCH_TO_USER_CR3 reg movq %cr3, \reg +#ifdef CONFIG_KAISER_REAL_SWITCH orq $(0x1000), \reg +#endif movq \reg, %cr3 .endm @@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .endm #endif /* CONFIG_KAISER */ + #else /* __ASSEMBLY__ */ #ifdef CONFIG_KAISER -// Upon kernel/user mode switch, it may happen that -// the address space has to be switched before the registers have been stored. -// To change the address space, another register is needed. -// A register therefore has to be stored/restored. -// -DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); +/* + * Upon kernel/user mode switch, it may happen that the address + * space has to be switched before the registers have been + * stored. To change the address space, another register is + * needed. A register therefore has to be stored/restored. +*/ -#endif /* CONFIG_KAISER */ +DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); /** - * shadowmem_add_mapping - map a virtual memory part to the shadow mapping + * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping * @addr: the start address of the range * @size: the size of the range * @flags: The mapping flags of the pages * - * the mapping is done on a global scope, so no bigger synchronization has to be done. - * the pages have to be manually unmapped again when they are not needed any longer. + * The mapping is done on a global scope, so no bigger + * synchronization has to be done. the pages have to be + * manually unmapped again when they are not needed any longer. */ -extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); +extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); /** - * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping + * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping * @addr: the start address of the range * @size: the size of the range */ extern void kaiser_remove_mapping(unsigned long start, unsigned long size); /** - * shadowmem_initialize_mapping - Initalize the shadow mapping + * kaiser_initialize_mapping - Initalize the shadow mapping * - * most parts of the shadow mapping can be mapped upon boot time. - * only the thread stacks have to be mapped on runtime. - * the mapped regions are not unmapped at all. + * Most parts of the shadow mapping can be mapped upon boot + * time. Only per-process things like the thread stacks + * or a new LDT have to be mapped at runtime. These boot- + * time mappings are permanent and nevertunmapped. */ extern void kaiser_init(void); -#endif +#endif /* CONFIG_KAISER */ + +#endif /* __ASSEMBLY */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4b479c9b064f..1cee98e182b7 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -690,7 +690,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + pgdval_t ignore_flags = _PAGE_USER; + /* + * We set NX on KAISER pgds that map userspace memory so + * that userspace can not meaningfully use the kernel + * page table by accident; it will fault on the first + * instruction it tries to run. See native_set_pgd(). + */ + if (IS_ENABLED(CONFIG_KAISER)) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -905,8 +915,10 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_KAISER - // clone the shadow pgd part as well - memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t)); + /* Clone the shadow pgd part as well */ + memcpy(native_get_shadow_pgd(dst), + native_get_shadow_pgd(src), + count * sizeof(pgd_t)); #endif } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e6ea39fa6d0b..000265c8b74e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_t *pud) } #ifdef CONFIG_KAISER -static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) { +static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) +{ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); } -static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) { +static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) +{ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); } +#else +static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) +{ + BUILD_BUG_ON(1); + return NULL; +} +static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) +{ + return pgdp; +} #endif /* CONFIG_KAISER */ +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * This returns true for user pages that need to get copied into + * both the user and kernel copies of the page tables, and false + * for kernel pages that should only be in the kernel copy. + */ +static inline bool is_userspace_pgd(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); +} + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { #ifdef CONFIG_KAISER - // We know that a pgd is page aligned. - // Therefore the lower indices have to be mapped to user space. - // These pages are mapped to the shadow mapping. - if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) { + pteval_t extra_kern_pgd_flags = 0; + /* Do we need to also populate the shadow pgd? */ + if (is_userspace_pgd(pgdp)) { native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; + /* + * Even if the entry is *mapping* userspace, ensure + * that userspace can not use it. This way, if we + * get out to userspace running on the kernel CR3, + * userspace will crash instead of running. + */ + extra_kern_pgd_flags = _PAGE_NX; } - - pgdp->pgd = pgd.pgd & ~_PAGE_USER; + pgdp->pgd = pgd.pgd; + pgdp->pgd |= extra_kern_pgd_flags; #else /* CONFIG_KAISER */ *pgdp = pgd; #endif diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 00fecbb153ac..8bc8d02fb4b1 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -48,7 +48,7 @@ #ifdef CONFIG_KAISER #define _PAGE_GLOBAL (_AT(pteval_t, 0)) #else -#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #endif #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) @@ -123,11 +123,7 @@ #define _PAGE_DEVMAP (_AT(pteval_t, 0)) #endif -#ifdef CONFIG_KAISER -#define _PAGE_PROTNONE (_AT(pteval_t, 0)) -#else #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#endif #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ _PAGE_ACCESSED | _PAGE_DIRTY) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 9ff875a1aa24..560c2fd73b28 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -127,11 +127,14 @@ void __init init_espfix_bsp(void) /* Install the espfix pud into the kernel page directory */ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); -#ifdef CONFIG_KAISER - // add the esp stack pud to the shadow mapping here. - // This can be done directly, because the fixup stack has its own pud - set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page))); -#endif + /* + * Just copy the top-level PGD that is mapping the espfix + * area to ensure it is mapped into the shadow user page + * tables. + */ + if (IS_ENABLED(CONFIG_KAISER)) + set_pgd(native_get_shadow_pgd(pgd_p), + __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); /* Randomize the locations */ init_espfix_random(); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9e849b520780..5775379237f7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -406,11 +406,24 @@ GLOBAL(early_recursion_flag) GLOBAL(name) #ifdef CONFIG_KAISER +/* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not + * strictly *need* the second page, but this allows us to + * have a single set_pgd() implementation that does not + * need to worry about whether it has 4k or 8k to work + * with. + * + * This ensures PGDs are 8k long: + */ +#define KAISER_USER_PGD_FILL 512 +/* This ensures they are 8k-aligned: */ #define NEXT_PGD_PAGE(name) \ .balign 2 * PAGE_SIZE; \ GLOBAL(name) #else #define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define KAISER_USER_PGD_FILL 0 #endif /* Automate the creation of 1 to 1 mapping pmd entries */ @@ -425,6 +438,7 @@ GLOBAL(name) NEXT_PGD_PAGE(early_level4_pgt) .fill 511,8,0 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + .fill KAISER_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -433,7 +447,8 @@ NEXT_PAGE(early_dynamic_pgts) #ifndef CONFIG_XEN NEXT_PGD_PAGE(init_level4_pgt) - .fill 2*512,8,0 + .fill 512,8,0 + .fill KAISER_USER_PGD_FILL,8,0 #else NEXT_PGD_PAGE(init_level4_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE @@ -442,6 +457,7 @@ NEXT_PGD_PAGE(init_level4_pgt) .org init_level4_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + .fill KAISER_USER_PGD_FILL,8,0 NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE @@ -452,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt) */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #endif + .fill KAISER_USER_PGD_FILL,8,0 NEXT_PAGE(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 5f70014ca602..dd31f9f11f01 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -34,11 +35,21 @@ static void flush_ldt(void *current_mm) set_ldt(pc->ldt->entries, pc->ldt->size); } +static void __free_ldt_struct(struct ldt_struct *ldt) +{ + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + free_page((unsigned long)ldt->entries); + kfree(ldt); +} + /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ static struct ldt_struct *alloc_ldt_struct(int size) { struct ldt_struct *new_ldt; int alloc_size; + int ret = 0; if (size > LDT_ENTRIES) return NULL; @@ -66,6 +77,14 @@ static struct ldt_struct *alloc_ldt_struct(int size) return NULL; } + // FIXME: make kaiser_add_mapping() return an error code + // when it fails + kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, + __PAGE_KERNEL); + if (ret) { + __free_ldt_struct(new_ldt); + return NULL; + } new_ldt->size = size; return new_ldt; } @@ -92,12 +111,10 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (likely(!ldt)) return; + kaiser_remove_mapping((unsigned long)ldt->entries, + ldt->size * LDT_ENTRY_SIZE); paravirt_free_ldt(ldt->entries, ldt->size); - if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(ldt->entries); - else - free_page((unsigned long)ldt->entries); - kfree(ldt); + __free_ldt_struct(ldt); } /* diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 1c113db9ed57..2bb5ee464df3 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -9,10 +9,12 @@ #include atomic_t trace_idt_ctr = ATOMIC_INIT(0); +__aligned(PAGE_SIZE) struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) trace_idt_table }; /* No need to be aligned, but done to keep all IDTs defined the same way. */ +__aligned(PAGE_SIZE) gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss; static int trace_irq_vector_refcount; diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index cf1bb922d467..7270a293175d 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -1,160 +1,305 @@ - - +#include #include #include #include #include #include #include +#include #include #include - #include + +#include #include #include #include #ifdef CONFIG_KAISER __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); +/* + * At runtime, the only things we map are some things for CPU + * hotplug, and stacks for new processes. No two CPUs will ever + * be populating the same addresses, so we only need to ensure + * that we protect between two CPUs trying to allocate and + * populate the same page table page. + * + * Only take this lock when doing a set_p[4um]d(), but it is not + * needed for doing a set_pte(). We assume that only the *owner* + * of a given allocation will be doing this for _their_ + * allocation. + * + * This ensures that once a system has been running for a while + * and there have been stacks all over and these page tables + * are fully populated, there will be no further acquisitions of + * this lock. + */ +static DEFINE_SPINLOCK(shadow_table_allocation_lock); -/** - * Get the real ppn from a address in kernel mapping. - * @param address The virtual adrress - * @return the physical address +/* + * Returns -1 on error. */ -static inline unsigned long get_pa_from_mapping (unsigned long address) +static inline unsigned long get_pa_from_mapping(unsigned long vaddr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - pgd = pgd_offset_k(address); - BUG_ON(pgd_none(*pgd) || pgd_large(*pgd)); - - pud = pud_offset(pgd, address); - BUG_ON(pud_none(*pud)); + pgd = pgd_offset_k(vaddr); + /* + * We made all the kernel PGDs present in kaiser_init(). + * We expect them to stay that way. + */ + BUG_ON(pgd_none(*pgd)); + /* + * PGDs are either 512GB or 128TB on all x86_64 + * configurations. We don't handle these. + */ + BUG_ON(pgd_large(*pgd)); - if (pud_large(*pud)) { - return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK); + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + WARN_ON_ONCE(1); + return -1; } - pmd = pmd_offset(pud, address); - BUG_ON(pmd_none(*pmd)); + if (pud_large(*pud)) + return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK); - if (pmd_large(*pmd)) { - return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK); + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd)) { + WARN_ON_ONCE(1); + return -1; } - pte = pte_offset_kernel(pmd, address); - BUG_ON(pte_none(*pte)); + if (pmd_large(*pmd)) + return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK); - return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK); + pte = pte_offset_kernel(pmd, vaddr); + if (pte_none(*pte)) { + WARN_ON_ONCE(1); + return -1; + } + + return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); } -void _kaiser_copy (unsigned long start_addr, unsigned long size, - unsigned long flags) +/* + * This is a relatively normal page table walk, except that it + * also tries to allocate page tables pages along the way. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) { - pgd_t *pgd; - pud_t *pud; pmd_t *pmd; - pte_t *pte; - unsigned long address; - unsigned long end_addr = start_addr + size; - unsigned long target_address; + pud_t *pud; + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1)); - address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) { - target_address = get_pa_from_mapping(address); + might_sleep(); + if (is_atomic) { + gfp &= ~GFP_KERNEL; + gfp |= __GFP_HIGH | __GFP_ATOMIC; + } - pgd = native_get_shadow_pgd(pgd_offset_k(address)); + if (pgd_none(*pgd)) { + WARN_ONCE(1, "All shadow pgds should have been populated"); + return NULL; + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); - BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n"); - BUG_ON(pgd_large(*pgd)); + pud = pud_offset(pgd, address); + /* The shadow page tables do not use large mappings: */ + if (pud_large(*pud)) { + WARN_ON(1); + return NULL; + } + if (pud_none(*pud)) { + unsigned long new_pmd_page = __get_free_page(gfp); + if (!new_pmd_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); + if (pud_none(*pud)) + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); + else + free_page(new_pmd_page); + spin_unlock(&shadow_table_allocation_lock); + } - pud = pud_offset(pgd, address); - if (pud_none(*pud)) { - set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address)))); - } - BUG_ON(pud_large(*pud)); + pmd = pmd_offset(pud, address); + /* The shadow page tables do not use large mappings: */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + if (pmd_none(*pmd)) { + unsigned long new_pte_page = __get_free_page(gfp); + if (!new_pte_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); + if (pmd_none(*pmd)) + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); + else + free_page(new_pte_page); + spin_unlock(&shadow_table_allocation_lock); + } - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address)))); - } - BUG_ON(pmd_large(*pmd)); + return pte_offset_kernel(pmd, address); +} - pte = pte_offset_kernel(pmd, address); +int kaiser_add_user_map(const void *__start_addr, unsigned long size, + unsigned long flags) +{ + int ret = 0; + pte_t *pte; + unsigned long start_addr = (unsigned long )__start_addr; + unsigned long address = start_addr & PAGE_MASK; + unsigned long end_addr = PAGE_ALIGN(start_addr + size); + unsigned long target_address; + + for (;address < end_addr; address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); + if (target_address == -1) { + ret = -EIO; + break; + } + pte = kaiser_pagetable_walk(address, false); if (pte_none(*pte)) { set_pte(pte, __pte(flags | target_address)); } else { - BUG_ON(__pa(pte_page(*pte)) != target_address); + pte_t tmp; + set_pte(&tmp, __pte(flags | target_address)); + WARN_ON_ONCE(!pte_same(*pte, tmp)); } } + return ret; +} + +static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags) +{ + unsigned long size = end - start; + + return kaiser_add_user_map(start, size, flags); } -// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping -static inline void __init _kaiser_init(void) +/* + * Ensure that the top level of the (shadow) page tables are + * entirely populated. This ensures that all processes that get + * forked have the same entries. This way, we do not have to + * ever go set up new entries in older processes. + * + * Note: we never free these, so there are no updates to them + * after this. + */ +static void __init kaiser_init_all_pgds(void) { pgd_t *pgd; int i = 0; pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { - set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0)))); + pgd_t new_pgd; + pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); + if (!pud) { + WARN_ON(1); + break; + } + new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); + /* + * Make sure not to stomp on some other pgd entry. + */ + if (!pgd_none(pgd[i])) { + WARN_ON(1); + continue; + } + set_pgd(pgd + i, new_pgd); } } +#define kaiser_add_user_map_early(start, size, flags) do { \ + int __ret = kaiser_add_user_map(start, size, flags); \ + WARN_ON(__ret); \ +} while (0) + +#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \ + int __ret = kaiser_add_user_map_ptrs(start, end, flags); \ + WARN_ON(__ret); \ +} while (0) + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -spinlock_t shadow_table_lock; +/* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we + * will have most of the kernel up by then and should be able to + * get a clean warning out of it. If we BUG_ON() here, we run + * the risk of being before we have good console output. + */ void __init kaiser_init(void) { int cpu; - spin_lock_init(&shadow_table_lock); - - spin_lock(&shadow_table_lock); - _kaiser_init(); + kaiser_init_all_pgds(); for_each_possible_cpu(cpu) { - // map the per cpu user variables - _kaiser_copy( - (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)), - (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start, - __PAGE_KERNEL); + void *percpu_vaddr = __per_cpu_user_mapped_start + + per_cpu_offset(cpu); + unsigned long percpu_sz = __per_cpu_user_mapped_end - + __per_cpu_user_mapped_start; + kaiser_add_user_map_early(percpu_vaddr, percpu_sz, + __PAGE_KERNEL); } - // map the entry/exit text section, which is responsible to switch between user- and kernel mode - _kaiser_copy( - (unsigned long) __entry_text_start, - (unsigned long) __entry_text_end - (unsigned long) __entry_text_start, - __PAGE_KERNEL_RX); + /* + * Map the entry/exit text section, which is needed at + * switches from user to and from kernel. + */ + kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end, + __PAGE_KERNEL_RX); - // the fixed map address of the idt_table - _kaiser_copy( - (unsigned long) idt_descr.address, - sizeof(gate_desc) * NR_VECTORS, - __PAGE_KERNEL_RO); - - spin_unlock(&shadow_table_lock); +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) + kaiser_add_user_map_ptrs_early(__irqentry_text_start, + __irqentry_text_end, + __PAGE_KERNEL_RX); +#endif + kaiser_add_user_map_early((void *)idt_descr.address, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL_RO); +#ifdef CONFIG_TRACING + kaiser_add_user_map_early(&trace_idt_descr, + sizeof(trace_idt_descr), + __PAGE_KERNEL); + kaiser_add_user_map_early(&trace_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); +#endif + kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr), + __PAGE_KERNEL); + kaiser_add_user_map_early(&debug_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); } +extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); // add a mapping to the shadow-mapping, and synchronize the mappings -void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) { - spin_lock(&shadow_table_lock); - _kaiser_copy(addr, size, flags); - spin_unlock(&shadow_table_lock); + return kaiser_add_user_map((const void *)addr, size, flags); } -extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end); void kaiser_remove_mapping(unsigned long start, unsigned long size) { - pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start)); - spin_lock(&shadow_table_lock); - do { - unmap_pud_range(pgd, start, start + size); - } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size))); - spin_unlock(&shadow_table_lock); + unsigned long end = start + size; + unsigned long addr; + + for (addr = start; addr < end; addr += PGDIR_SIZE) { + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); + /* + * unmap_p4d_range() handles > P4D_SIZE unmaps, + * so no need to trim 'end'. + */ + unmap_pud_range_nofree(pgd, addr, end); + } } #endif /* CONFIG_KAISER */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c17412f92d77..73dcb0e18c1b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_FLUSHTLB 1 #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 +#define CPA_FREE_PAGETABLES 8 #ifdef CONFIG_PROC_FS static unsigned long direct_pages_count[PG_LEVEL_NUM]; @@ -729,10 +730,13 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } -static bool try_to_free_pte_page(pte_t *pte) +static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte) { int i; + if (!(cpa->flags & CPA_FREE_PAGETABLES)) + return false; + for (i = 0; i < PTRS_PER_PTE; i++) if (!pte_none(pte[i])) return false; @@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *pte) return true; } -static bool try_to_free_pmd_page(pmd_t *pmd) +static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd) { int i; + if (!(cpa->flags & CPA_FREE_PAGETABLES)) + return false; + for (i = 0; i < PTRS_PER_PMD; i++) if (!pmd_none(pmd[i])) return false; @@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } -static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) +static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd, + unsigned long start, + unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) pte++; } - if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { + if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) { pmd_clear(pmd); return true; } return false; } -static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, +static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd, unsigned long start, unsigned long end) { - if (unmap_pte_range(pmd, start, end)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + if (unmap_pte_range(cpa, pmd, start, end)) + if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) pud_clear(pud); } -static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) +static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, + unsigned long start, unsigned long end) { pmd_t *pmd = pmd_offset(pud, start); @@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; unsigned long pre_end = min_t(unsigned long, end, next_page); - __unmap_pmd_range(pud, pmd, start, pre_end); + __unmap_pmd_range(cpa, pud, pmd, start, pre_end); start = pre_end; pmd++; @@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) if (pmd_large(*pmd)) pmd_clear(pmd); else - __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); + __unmap_pmd_range(cpa, pud, pmd, + start, start + PMD_SIZE); start += PMD_SIZE; pmd++; @@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) * 4K leftovers? */ if (start < end) - return __unmap_pmd_range(pud, pmd, start, end); + return __unmap_pmd_range(cpa, pud, pmd, start, end); /* * Try again to free the PMD page if haven't succeeded above. */ if (!pud_none(*pud)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) pud_clear(pud); } -void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd, + unsigned long start, + unsigned long end) { pud_t *pud = pud_offset(pgd, start); @@ -834,7 +847,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; unsigned long pre_end = min_t(unsigned long, end, next_page); - unmap_pmd_range(pud, start, pre_end); + unmap_pmd_range(cpa, pud, start, pre_end); start = pre_end; pud++; @@ -848,7 +861,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) if (pud_large(*pud)) pud_clear(pud); else - unmap_pmd_range(pud, start, start + PUD_SIZE); + unmap_pmd_range(cpa, pud, start, start + PUD_SIZE); start += PUD_SIZE; pud++; @@ -858,7 +871,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) * 2M leftovers? */ if (start < end) - unmap_pmd_range(pud, start, end); + unmap_pmd_range(cpa, pud, start, end); /* * No need to try to free the PUD page because we'll free it in @@ -866,6 +879,24 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } +static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +{ + struct cpa_data cpa = { + .flags = CPA_FREE_PAGETABLES, + }; + + __unmap_pud_range(&cpa, pgd, start, end); +} + +void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end) +{ + struct cpa_data cpa = { + .flags = 0, + }; + + __unmap_pud_range(&cpa, pgd, start, end); +} + static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 27d218b38538..352fd015bc63 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -344,40 +344,26 @@ static inline void _pgd_free(pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } #else -static inline pgd_t *_pgd_alloc(void) -{ + #ifdef CONFIG_KAISER - // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory - // block. Therefore, we have to allocate at least 3 pages. However, the - // __get_free_pages returns us 4 pages. Hence, we store the base pointer at - // the beginning of the page of our 8kb-aligned memory block in order to - // correctly free it afterwars. - - unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE)); - - if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages) - { - *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages; - return (pgd_t *) pages; - } - else - { - *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages; - return (pgd_t *) (pages + PAGE_SIZE); - } +/* + * Instead of one pmd, we aquire two pmds. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 #else - return (pgd_t *)__get_free_page(PGALLOC_GFP); +#define PGD_ALLOCATION_ORDER 0 #endif + +static inline pgd_t *_pgd_alloc(void) +{ + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { -#ifdef CONFIG_KAISER - unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE)); - free_pages(pages, get_order(4*PAGE_SIZE)); -#else - free_page((unsigned long)pgd); -#endif + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h new file mode 100644 index 000000000000..9db5433c2284 --- /dev/null +++ b/include/linux/kaiser.h @@ -0,0 +1,26 @@ +#ifndef _INCLUDE_KAISER_H +#define _INCLUDE_KAISER_H + +#ifdef CONFIG_KAISER +#include +#else + +/* + * These stubs are used whenever CONFIG_KAISER is off, which + * includes architectures that support KAISER, but have it + * disabled. + */ + +static inline void kaiser_init(void) +{ +} +static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) +{ +} +static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +{ + return 0; +} + +#endif /* !CONFIG_KAISER */ +#endif /* _INCLUDE_KAISER_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 4014be1dd2b6..0b9688d91eaa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -472,7 +473,6 @@ void set_task_stack_end_magic(struct task_struct *tsk) *stackend = STACK_END_MAGIC; /* for overflow detection */ } -extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; @@ -500,9 +500,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) * functions again. */ tsk->stack = stack; -#ifdef CONFIG_KAISER - kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); -#endif + + err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); + if (err) + goto free_stack; #ifdef CONFIG_VMAP_STACK tsk->stack_vm_area = stack_vm_area; #endif diff --git a/security/Kconfig b/security/Kconfig index f515ac302257..334d2e85fa7c 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -32,12 +32,17 @@ config SECURITY If you are unsure how to answer this question, answer N. config KAISER bool "Remove the kernel mapping in user mode" + default y depends on X86_64 depends on !PARAVIRT help This enforces a strict kernel and user space isolation in order to close hardware side channels on kernel address information. +config KAISER_REAL_SWITCH + bool "KAISER: actually switch page tables" + default y + config SECURITYFS bool "Enable the securityfs filesystem" help -- GitLab From ac2f1018ac210cfedcfab82dbafbda4e2db7ed08 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 5 Sep 2017 12:05:01 -0700 Subject: [PATCH 0749/1398] kaiser: do not set _PAGE_NX on pgd_none native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry: usually that just generated a warning on exit, but sometimes more mysterious and damaging failures (our production machines could not complete booting). The original fix to this just avoided adding _PAGE_NX to an empty entry; but eventually more problems surfaced with kexec, and EFI mapping expected to be a problem too. So now instead change native_set_pgd() to update shadow only if _PAGE_USER: A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure) use set_pgd() to set up a temporary internal virtual address space, with physical pages remapped at what Kaiser regards as userspace addresses: Kaiser then assumes a shadow pgd follows, which it will try to corrupt. This appears to be responsible for the recent kexec and kdump failures; though it's unclear how those did not manifest as a problem before. Ah, the shadow pgd will only be assumed to "follow" if the requested pgd is on an even-numbered page: so I suppose it was going wrong 50% of the time all along. What we need is a flag to set_pgd(), to tell it we're dealing with userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying? Add a test for that. But we cannot do the same for pgd_clear() (which may be called to clear corrupted entries - set aside the question of "corrupt in which pgd?" until later), so there just rely on pgd_clear() not being called in the problematic cases - with a WARN_ON_ONCE() which should fire half the time if it is. But this is getting too big for an inline function: move it into arch/x86/mm/kaiser.c (which then demands a boot/compressed mod); and de-void and de-space native_get_shadow/normal_pgd() while here. Also make an unnecessary change to KASLR's init_trampoline(): it was using set_pgd() to assign a pgd-value to a global variable (not in a pg directory page), which was rather scary given Kaiser's previous set_pgd() implementation: not a problem now, but too scary to leave as was, it could easily blow up if we have to change set_pgd() again. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.h | 1 + arch/x86/include/asm/pgtable_64.h | 51 ++++++++----------------------- arch/x86/mm/kaiser.c | 42 +++++++++++++++++++++++++ arch/x86/mm/kaslr.c | 4 +-- 4 files changed, 58 insertions(+), 40 deletions(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 766a5211f827..80e2cce110b9 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -9,6 +9,7 @@ */ #undef CONFIG_PARAVIRT #undef CONFIG_PARAVIRT_SPINLOCKS +#undef CONFIG_KAISER #undef CONFIG_KASAN #include diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 000265c8b74e..177caf3f7ab1 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud) } #ifdef CONFIG_KAISER -static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) +extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { - return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); + return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); } -static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) +static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) { - return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); + return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); } #else -static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) +static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { BUILD_BUG_ON(1); return NULL; } -static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) +static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) { return pgdp; } #endif /* CONFIG_KAISER */ -/* - * Page table pages are page-aligned. The lower half of the top - * level is used for userspace and the top half for the kernel. - * This returns true for user pages that need to get copied into - * both the user and kernel copies of the page tables, and false - * for kernel pages that should only be in the kernel copy. - */ -static inline bool is_userspace_pgd(void *__ptr) -{ - unsigned long ptr = (unsigned long)__ptr; - - return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); -} - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { -#ifdef CONFIG_KAISER - pteval_t extra_kern_pgd_flags = 0; - /* Do we need to also populate the shadow pgd? */ - if (is_userspace_pgd(pgdp)) { - native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; - /* - * Even if the entry is *mapping* userspace, ensure - * that userspace can not use it. This way, if we - * get out to userspace running on the kernel CR3, - * userspace will crash instead of running. - */ - extra_kern_pgd_flags = _PAGE_NX; - } - pgdp->pgd = pgd.pgd; - pgdp->pgd |= extra_kern_pgd_flags; -#else /* CONFIG_KAISER */ - *pgdp = pgd; -#endif + *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 7270a293175d..8d6061c2bc79 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) unmap_pud_range_nofree(pgd, addr, end); } } + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * This returns true for user pages that need to get copied into + * both the user and kernel copies of the page tables, and false + * for kernel pages that should only be in the kernel copy. + */ +static inline bool is_userspace_pgd(pgd_t *pgdp) +{ + return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2); +} + +pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) +{ + /* + * Do we need to also populate the shadow pgd? Check _PAGE_USER to + * skip cases like kexec and EFI which make temporary low mappings. + */ + if (pgd.pgd & _PAGE_USER) { + if (is_userspace_pgd(pgdp)) { + native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; + /* + * Even if the entry is *mapping* userspace, ensure + * that userspace can not use it. This way, if we + * get out to userspace running on the kernel CR3, + * userspace will crash instead of running. + */ + pgd.pgd |= _PAGE_NX; + } + } else if (!pgd.pgd) { + /* + * pgd_clear() cannot check _PAGE_USER, and is even used to + * clear corrupted pgd entries: so just rely on cases like + * kexec and EFI never to be using pgd_clear(). + */ + if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) && + is_userspace_pgd(pgdp)) + native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; + } + return pgd; +} #endif /* CONFIG_KAISER */ diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index aed206475aa7..9284ec1e8aa9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -189,6 +189,6 @@ void __meminit init_trampoline(void) *pud_tramp = *pud; } - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ + trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } -- GitLab From 0994a2cf8fe4e884bad4810681117a7d0096c8e7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 18:57:03 -0700 Subject: [PATCH 0750/1398] kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE Kaiser only needs to map one page of the stack; and kernel/fork.c did not build on powerpc (no __PAGE_KERNEL). It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack() and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's kaiser_add_mapping() and kaiser_remove_mapping(). And use linux/kaiser.h in init/main.c to avoid the #ifdefs there. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- include/linux/kaiser.h | 40 +++++++++++++++++++++++++++++++++------- init/main.c | 6 +----- kernel/fork.c | 7 ++----- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h index 9db5433c2284..4a4d6d911a14 100644 --- a/include/linux/kaiser.h +++ b/include/linux/kaiser.h @@ -1,26 +1,52 @@ -#ifndef _INCLUDE_KAISER_H -#define _INCLUDE_KAISER_H +#ifndef _LINUX_KAISER_H +#define _LINUX_KAISER_H #ifdef CONFIG_KAISER #include + +static inline int kaiser_map_thread_stack(void *stack) +{ + /* + * Map that page of kernel stack on which we enter from user context. + */ + return kaiser_add_mapping((unsigned long)stack + + THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL); +} + +static inline void kaiser_unmap_thread_stack(void *stack) +{ + /* + * Note: may be called even when kaiser_map_thread_stack() failed. + */ + kaiser_remove_mapping((unsigned long)stack + + THREAD_SIZE - PAGE_SIZE, PAGE_SIZE); +} #else /* * These stubs are used whenever CONFIG_KAISER is off, which - * includes architectures that support KAISER, but have it - * disabled. + * includes architectures that support KAISER, but have it disabled. */ static inline void kaiser_init(void) { } -static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) +static inline int kaiser_add_mapping(unsigned long addr, + unsigned long size, unsigned long flags) +{ + return 0; +} +static inline void kaiser_remove_mapping(unsigned long start, + unsigned long size) { } -static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +static inline int kaiser_map_thread_stack(void *stack) { return 0; } +static inline void kaiser_unmap_thread_stack(void *stack) +{ +} #endif /* !CONFIG_KAISER */ -#endif /* _INCLUDE_KAISER_H */ +#endif /* _LINUX_KAISER_H */ diff --git a/init/main.c b/init/main.c index 2c009f77e655..99f026565608 100644 --- a/init/main.c +++ b/init/main.c @@ -80,15 +80,13 @@ #include #include #include +#include #include #include #include #include #include -#ifdef CONFIG_KAISER -#include -#endif static int kernel_init(void *); @@ -476,9 +474,7 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); -#ifdef CONFIG_KAISER kaiser_init(); -#endif } asmlinkage __visible void __init start_kernel(void) diff --git a/kernel/fork.c b/kernel/fork.c index 0b9688d91eaa..70e10cb49be0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -212,12 +212,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) #endif } -extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size); static inline void free_thread_stack(struct task_struct *tsk) { -#ifdef CONFIG_KAISER - kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE); -#endif + kaiser_unmap_thread_stack(tsk->stack); #ifdef CONFIG_VMAP_STACK if (task_stack_vm_area(tsk)) { unsigned long flags; @@ -501,7 +498,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) */ tsk->stack = stack; - err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); + err= kaiser_map_thread_stack(tsk->stack); if (err) goto free_stack; #ifdef CONFIG_VMAP_STACK -- GitLab From 7a92e20d157f02d0259e2799dea43c9fa1a4541a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 17:09:44 -0700 Subject: [PATCH 0751/1398] kaiser: fix build and FIXME in alloc_ldt_struct() Include linux/kaiser.h instead of asm/kaiser.h to build ldt.c without CONFIG_KAISER. kaiser_add_mapping() does already return an error code, so fix the FIXME. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ldt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index dd31f9f11f01..567bdbd7d917 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -16,9 +16,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -49,7 +49,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) { struct ldt_struct *new_ldt; int alloc_size; - int ret = 0; + int ret; if (size > LDT_ENTRIES) return NULL; @@ -77,10 +77,8 @@ static struct ldt_struct *alloc_ldt_struct(int size) return NULL; } - // FIXME: make kaiser_add_mapping() return an error code - // when it fails - kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, - __PAGE_KERNEL); + ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, + __PAGE_KERNEL); if (ret) { __free_ldt_struct(new_ldt); return NULL; -- GitLab From 639c005daeebab077596b034fecd6b8902a88024 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 13 Sep 2017 14:03:10 -0700 Subject: [PATCH 0752/1398] kaiser: KAISER depends on SMP It is absurd that KAISER should depend on SMP, but apparently nobody has tried a UP build before: which breaks on implicit declaration of function 'per_cpu_offset' in arch/x86/mm/kaiser.c. Now, you would expect that to be trivially fixed up; but looking at the System.map when that block is #ifdef'ed out of kaiser_init(), I see that in a UP build __per_cpu_user_mapped_end is precisely at __per_cpu_user_mapped_start, and the items carefully gathered into that section for user-mapping on SMP, dispersed elsewhere on UP. So, some other kind of section assignment will be needed on UP, but implementing that is not a priority: just make KAISER depend on SMP for now. Also inserted a blank line before the option, tidied up the brief Kconfig help message, and added an "If unsure, Y". Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- security/Kconfig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/security/Kconfig b/security/Kconfig index 334d2e85fa7c..dc78671a5a70 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -30,14 +30,16 @@ config SECURITY model will be used. If you are unsure how to answer this question, answer N. + config KAISER bool "Remove the kernel mapping in user mode" default y - depends on X86_64 - depends on !PARAVIRT + depends on X86_64 && SMP && !PARAVIRT help - This enforces a strict kernel and user space isolation in order to close - hardware side channels on kernel address information. + This enforces a strict kernel and user space isolation, in order + to close hardware side channels on kernel address information. + + If you are unsure how to answer this question, answer Y. config KAISER_REAL_SWITCH bool "KAISER: actually switch page tables" -- GitLab From 19377944317f7d1f706d7da2c7cea5c4bcb7440b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 21 Sep 2017 20:39:56 -0700 Subject: [PATCH 0753/1398] kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi() adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly when not CONFIG_KAISER. Although the minimal change is to add an #ifdef CONFIG_KAISER around the addq line, that looks cluttered, and I prefer how the first call to do_nmi() handled it: prepare args in %rdi and %rsi before getting into the CONFIG_KAISER block, since it does not touch them at all. And while we're here, place the "#ifdef CONFIG_KAISER" that follows each, to enclose the "Unconditionally restore CR3" comment: matching how the "Unconditionally use kernel CR3" comment above is enclosed. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2869fc178658..756dcbb41ccf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1323,12 +1323,13 @@ ENTRY(nmi) movq %rax, %cr3 #endif call do_nmi + +#ifdef CONFIG_KAISER /* * Unconditionally restore CR3. I know we return to * kernel code that needs user CR3, but do we ever return * to "user mode" where we need the kernel CR3? */ -#ifdef CONFIG_KAISER popq %rax mov %rax, %cr3 #endif @@ -1552,6 +1553,8 @@ end_repeat_nmi: SWAPGS xorl %ebx, %ebx 1: + movq %rsp, %rdi + movq $-1, %rsi #ifdef CONFIG_KAISER /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ @@ -1564,16 +1567,14 @@ end_repeat_nmi: #endif /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - movq %rsp, %rdi - addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */ - movq $-1, %rsi call do_nmi + +#ifdef CONFIG_KAISER /* * Unconditionally restore CR3. We might be returning to * kernel code that needs user CR3, like just just before * a sysret. */ -#ifdef CONFIG_KAISER popq %rax mov %rax, %cr3 #endif -- GitLab From f881e626849c62641a91a13cb5344908c6613d11 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 23 Aug 2017 14:21:14 -0700 Subject: [PATCH 0754/1398] kaiser: fix perf crashes Avoid perf crashes: place debug_store in the user-mapped per-cpu area instead of allocating, and use page allocator plus kaiser_add_mapping() to keep the BTS and PEBS buffers user-mapped (that is, present in the user mapping, though visible only to kernel and hardware). The PEBS fixup buffer does not need this treatment. The need for a user-mapped struct debug_store showed up before doing any conscious perf testing: in a couple of kernel paging oopses on Westmere, implicating the debug_store offset of the per-cpu area. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 57 ++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 9dfeeeca0ea8..51170f69682d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2,11 +2,15 @@ #include #include +#include #include #include #include "../perf_event.h" +static +DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); +static void *dsalloc(size_t size, gfp_t flags, int node) +{ +#ifdef CONFIG_KAISER + unsigned int order = get_order(size); + struct page *page; + unsigned long addr; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + if (!page) + return NULL; + addr = (unsigned long)page_address(page); + if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) { + __free_pages(page, order); + addr = 0; + } + return (void *)addr; +#else + return kmalloc_node(size, flags | __GFP_ZERO, node); +#endif +} + +static void dsfree(const void *buffer, size_t size) +{ +#ifdef CONFIG_KAISER + if (!buffer) + return; + kaiser_remove_mapping((unsigned long)buffer, size); + free_pages((unsigned long)buffer, get_order(size)); +#else + kfree(buffer); +#endif +} + static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree(buffer, x86_pmu.pebs_buffer_size); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; @@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + dsfree((void *)(unsigned long)ds->pebs_buffer_base, + x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; } @@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; @@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu) if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu); + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; return 0; @@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu) return; per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) -- GitLab From f43f386f0bf0380fad5483be3ac678eeee934e95 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 18:48:02 -0700 Subject: [PATCH 0755/1398] kaiser: ENOMEM if kaiser_pagetable_walk() NULL kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed. And avoid its might_sleep() when atomic (though atomic at present unused). Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 8d6061c2bc79..ba6fc2c88c82 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -98,11 +98,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - might_sleep(); if (is_atomic) { gfp &= ~GFP_KERNEL; gfp |= __GFP_HIGH | __GFP_ATOMIC; - } + } else + might_sleep(); if (pgd_none(*pgd)) { WARN_ONCE(1, "All shadow pgds should have been populated"); @@ -159,13 +159,17 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, unsigned long end_addr = PAGE_ALIGN(start_addr + size); unsigned long target_address; - for (;address < end_addr; address += PAGE_SIZE) { + for (; address < end_addr; address += PAGE_SIZE) { target_address = get_pa_from_mapping(address); if (target_address == -1) { ret = -EIO; break; } pte = kaiser_pagetable_walk(address, false); + if (!pte) { + ret = -ENOMEM; + break; + } if (pte_none(*pte)) { set_pte(pte, __pte(flags | target_address)); } else { -- GitLab From 67fab0d4acb3556134127ac285b625f715b12ca1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 19:18:07 -0700 Subject: [PATCH 0756/1398] kaiser: tidied up asm/kaiser.h somewhat Mainly deleting a surfeit of blank lines, and reflowing header comment. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kaiser.h | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 0703f48777f3..7394ba9f9951 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -1,15 +1,17 @@ #ifndef _ASM_X86_KAISER_H #define _ASM_X86_KAISER_H - -/* This file includes the definitions for the KAISER feature. - * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory. - * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped, - * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled, - * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled. - * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory. +/* + * This file includes the definitions for the KAISER feature. + * KAISER is a counter measure against x86_64 side channel attacks on + * the kernel virtual memory. It has a shadow pgd for every process: the + * shadow pgd has a minimalistic kernel-set mapped, but includes the whole + * user memory. Within a kernel context switch, or when an interrupt is handled, + * the pgd is switched to the normal one. When the system switches to user mode, + * the shadow pgd is enabled. By this, the virtual memory caches are freed, + * and the user may not attack the whole kernel memory. * - * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions - * of the user space, or the stacks. + * A minimalistic kernel mapping holds the parts needed to be mapped in user + * mode, such as the entry/exit functions of the user space, or the stacks. */ #ifdef __ASSEMBLY__ #ifdef CONFIG_KAISER @@ -48,13 +50,10 @@ _SWITCH_TO_KERNEL_CR3 %rax movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .endm - .macro SWITCH_USER_CR3_NO_STACK - movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) _SWITCH_TO_USER_CR3 %rax movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - .endm #else /* CONFIG_KAISER */ @@ -72,7 +71,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax #else /* __ASSEMBLY__ */ - #ifdef CONFIG_KAISER /* * Upon kernel/user mode switch, it may happen that the address @@ -80,7 +78,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax * stored. To change the address space, another register is * needed. A register therefore has to be stored/restored. */ - DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); /** @@ -95,7 +92,6 @@ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); */ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); - /** * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping * @addr: the start address of the range @@ -104,12 +100,12 @@ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned l extern void kaiser_remove_mapping(unsigned long start, unsigned long size); /** - * kaiser_initialize_mapping - Initalize the shadow mapping + * kaiser_init - Initialize the shadow mapping * * Most parts of the shadow mapping can be mapped upon boot * time. Only per-process things like the thread stacks * or a new LDT have to be mapped at runtime. These boot- - * time mappings are permanent and nevertunmapped. + * time mappings are permanent and never unmapped. */ extern void kaiser_init(void); @@ -117,6 +113,4 @@ extern void kaiser_init(void); #endif /* __ASSEMBLY */ - - #endif /* _ASM_X86_KAISER_H */ -- GitLab From be6bf01f4caa523433030a2267df57d6afefa53d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 19:23:08 -0700 Subject: [PATCH 0757/1398] kaiser: tidied up kaiser_add/remove_mapping slightly Yes, unmap_pud_range_nofree()'s declaration ought to be in a header file really, but I'm not sure we want to use it anyway: so for now just declare it inside kaiser_remove_mapping(). And there doesn't seem to be such a thing as unmap_p4d_range(), even in a 5-level paging tree. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index ba6fc2c88c82..7a7e850b8381 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -285,8 +285,7 @@ void __init kaiser_init(void) __PAGE_KERNEL); } -extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); -// add a mapping to the shadow-mapping, and synchronize the mappings +/* Add a mapping to the shadow mapping, and synchronize the mappings */ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) { return kaiser_add_user_map((const void *)addr, size, flags); @@ -294,15 +293,13 @@ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long fla void kaiser_remove_mapping(unsigned long start, unsigned long size) { + extern void unmap_pud_range_nofree(pgd_t *pgd, + unsigned long start, unsigned long end); unsigned long end = start + size; unsigned long addr; for (addr = start; addr < end; addr += PGDIR_SIZE) { pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); - /* - * unmap_p4d_range() handles > P4D_SIZE unmaps, - * so no need to trim 'end'. - */ unmap_pud_range_nofree(pgd, addr, end); } } -- GitLab From 604db49610853bffcc65029886688ef151469fa8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 19:51:10 -0700 Subject: [PATCH 0758/1398] kaiser: align addition to x86/mm/Makefile Use tab not space so they line up properly, kaslr.o also. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 682c162333ba..c505569e3835 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -37,5 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o -obj-$(CONFIG_KAISER) += kaiser.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_KAISER) += kaiser.o -- GitLab From 61b7a404fa132ee0ffad04b0018b8d01ba5cf026 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Aug 2017 20:11:43 -0700 Subject: [PATCH 0759/1398] kaiser: cleanups while trying for gold link While trying to get our gold link to work, four cleanups: matched the gdt_page declaration to its definition; in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes; lined up the backslashes according to convention in percpu-defs.h; deleted the unused irq_stack_pointer addition to irq_stack_union. Sad to report that aligning backslashes does not appear to help gold align to 8192: but while these did not help, they are worth keeping. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/processor.h | 5 ----- include/asm-generic/vmlinux.lds.h | 18 ++++++++---------- include/linux/percpu-defs.h | 22 +++++++++++----------- 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 12080d87da3b..2ed5a2b3f8f7 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -43,7 +43,7 @@ struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3d4784e2f862..8cb52ee3ade6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -335,11 +335,6 @@ union irq_stack_union { char gs_base[40]; unsigned long stack_canary; }; - - struct { - char irq_stack_pointer[64]; - char unused[IRQ_STACK_SIZE - 64]; - }; }; DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index d6ab14414776..2e6000a4eb2c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -778,16 +778,14 @@ */ #define PERCPU_INPUT(cacheline) \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - \ - VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(cacheline); \ - *(.data..percpu..user_mapped) \ - *(.data..percpu..user_mapped..shared_aligned) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..user_mapped..page_aligned) \ - VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ - \ + VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ + *(.data..percpu..first) \ + . = ALIGN(cacheline); \ + *(.data..percpu..user_mapped) \ + *(.data..percpu..user_mapped..shared_aligned) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..user_mapped..page_aligned) \ + VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8ea945f63a05..cfe13cb4ec63 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -121,10 +121,10 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") -#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ +#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) -#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ +#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) /* @@ -156,11 +156,11 @@ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp -#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ +#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp -#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ +#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp @@ -185,18 +185,18 @@ /* * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. */ -#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ - __aligned(PAGE_SIZE) +#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) -#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ - __aligned(PAGE_SIZE) +#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) /* * Declaration/definition used for per-CPU variables that must be read mostly. */ -#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ +#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ -- GitLab From c27cdea56c546d0a6aa9114d9f5166404f3f14dd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 9 Sep 2017 17:31:18 -0700 Subject: [PATCH 0760/1398] kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET There's a 0x1000 in various places, which looks better with a name. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/kaiser.h | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 756dcbb41ccf..299375379b05 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1318,7 +1318,7 @@ ENTRY(nmi) movq %cr3, %rax pushq %rax #ifdef CONFIG_KAISER_REAL_SWITCH - andq $(~0x1000), %rax + andq $(~KAISER_SHADOW_PGD_OFFSET), %rax #endif movq %rax, %cr3 #endif @@ -1561,7 +1561,7 @@ end_repeat_nmi: movq %cr3, %rax pushq %rax #ifdef CONFIG_KAISER_REAL_SWITCH - andq $(~0x1000), %rax + andq $(~KAISER_SHADOW_PGD_OFFSET), %rax #endif movq %rax, %cr3 #endif diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 7394ba9f9951..051acf678cda 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -13,13 +13,16 @@ * A minimalistic kernel mapping holds the parts needed to be mapped in user * mode, such as the entry/exit functions of the user space, or the stacks. */ + +#define KAISER_SHADOW_PGD_OFFSET 0x1000 + #ifdef __ASSEMBLY__ #ifdef CONFIG_KAISER .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg #ifdef CONFIG_KAISER_REAL_SWITCH -andq $(~0x1000), \reg +andq $(~KAISER_SHADOW_PGD_OFFSET), \reg #endif movq \reg, %cr3 .endm @@ -27,7 +30,7 @@ movq \reg, %cr3 .macro _SWITCH_TO_USER_CR3 reg movq %cr3, \reg #ifdef CONFIG_KAISER_REAL_SWITCH -orq $(0x1000), \reg +orq $(KAISER_SHADOW_PGD_OFFSET), \reg #endif movq \reg, %cr3 .endm -- GitLab From 1ce27de4011e57460d454f9bedd301e5b7757e68 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 18:30:43 -0700 Subject: [PATCH 0761/1398] kaiser: delete KAISER_REAL_SWITCH option We fail to see what CONFIG_KAISER_REAL_SWITCH is for: it seems to be left over from early development, and now just obscures tricky parts of the code. Delete it before adding PCIDs, or nokaiser boot option. (Or if there is some good reason to keep the option, then it needs a help text - and a "depends on KAISER", so that all those without KAISER are not asked the question. But we'd much rather delete it.) Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 4 ---- arch/x86/include/asm/kaiser.h | 4 ---- security/Kconfig | 4 ---- 3 files changed, 12 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 299375379b05..4214ac1f1c0f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1317,9 +1317,7 @@ ENTRY(nmi) /* %rax is saved above, so OK to clobber here */ movq %cr3, %rax pushq %rax -#ifdef CONFIG_KAISER_REAL_SWITCH andq $(~KAISER_SHADOW_PGD_OFFSET), %rax -#endif movq %rax, %cr3 #endif call do_nmi @@ -1560,9 +1558,7 @@ end_repeat_nmi: /* %rax is saved above, so OK to clobber here */ movq %cr3, %rax pushq %rax -#ifdef CONFIG_KAISER_REAL_SWITCH andq $(~KAISER_SHADOW_PGD_OFFSET), %rax -#endif movq %rax, %cr3 #endif diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 051acf678cda..e0fc45e77aee 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -21,17 +21,13 @@ .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg -#ifdef CONFIG_KAISER_REAL_SWITCH andq $(~KAISER_SHADOW_PGD_OFFSET), \reg -#endif movq \reg, %cr3 .endm .macro _SWITCH_TO_USER_CR3 reg movq %cr3, \reg -#ifdef CONFIG_KAISER_REAL_SWITCH orq $(KAISER_SHADOW_PGD_OFFSET), \reg -#endif movq \reg, %cr3 .endm diff --git a/security/Kconfig b/security/Kconfig index dc78671a5a70..d8ae933471ad 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -41,10 +41,6 @@ config KAISER If you are unsure how to answer this question, answer Y. -config KAISER_REAL_SWITCH - bool "KAISER: actually switch page tables" - default y - config SECURITYFS bool "Enable the securityfs filesystem" help -- GitLab From 1972bb9d92066fcf7deb0d798e02c88caa45e035 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 9 Sep 2017 21:27:32 -0700 Subject: [PATCH 0762/1398] kaiser: vmstat show NR_KAISERTABLE as nr_overhead The kaiser update made an interesting choice, never to free any shadow page tables. Contention on global spinlock was worrying, particularly with it held across page table scans when freeing. Something had to be done: I was going to add refcounting; but simply never to free them is an appealing choice, minimizing contention without complicating the code (the more a page table is found already, the less the spinlock is used). But leaking pages in this way is also a worry: can we get away with it? At the very least, we need a count to show how bad it actually gets: in principle, one might end up wasting about 1/256 of memory that way (1/512 for when direct-mapped pages have to be user-mapped, plus 1/512 for when they are user-mapped from the vmalloc area on another occasion (but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed). Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the shared pgd entries, and 1 for each intermediate page table added thereafter for user-mapping - but leave out the 1 per mm, for its shadow pgd, because that distracts from the monotonic increase. Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled). In practice, it doesn't look so bad so far: more like 1/12000 after nine hours of gtests below; and movable pageblock segregation should tend to cluster the kaiser tables into a subset of the address space (if not, they will be bad for compaction too). But production may tell a different story: keep an eye on this number, and bring back lighter freeing if it gets out of control (maybe a shrinker). ["nr_overhead" should of course say "nr_kaisertable", if it needs to stay; but for the moment we are being coy, preferring that when Joe Blow notices a new line in his /proc/vmstat, he does not get too curious about what this "kaiser" stuff might be.] Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 16 +++++++++++----- include/linux/mmzone.h | 3 ++- mm/vmstat.c | 1 + 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 7a7e850b8381..bd22ef51aa2d 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) if (!new_pmd_page) return NULL; spin_lock(&shadow_table_allocation_lock); - if (pud_none(*pud)) + if (pud_none(*pud)) { set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); - else + __inc_zone_page_state(virt_to_page((void *) + new_pmd_page), NR_KAISERTABLE); + } else free_page(new_pmd_page); spin_unlock(&shadow_table_allocation_lock); } @@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) if (!new_pte_page) return NULL; spin_lock(&shadow_table_allocation_lock); - if (pmd_none(*pmd)) + if (pmd_none(*pmd)) { set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); - else + __inc_zone_page_state(virt_to_page((void *) + new_pte_page), NR_KAISERTABLE); + } else free_page(new_pte_page); spin_unlock(&shadow_table_allocation_lock); } @@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(void) pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { pgd_t new_pgd; - pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); + pud_t *pud = pud_alloc_one(&init_mm, + PAGE_OFFSET + i * PGDIR_SIZE); if (!pud) { WARN_ON(1); break; } + inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE); new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); /* * Make sure not to stomp on some other pgd entry. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fff21a82780c..490f5a83f947 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -124,8 +124,9 @@ enum zone_stat_item { NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ - /* Second 128 byte cacheline */ + NR_KAISERTABLE, NR_BOUNCE, + /* Second 128 byte cacheline */ #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif diff --git a/mm/vmstat.c b/mm/vmstat.c index 604f26a4f696..6a088df04b29 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -932,6 +932,7 @@ const char * const vmstat_text[] = { "nr_slab_unreclaimable", "nr_page_table_pages", "nr_kernel_stack", + "nr_overhead", "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", -- GitLab From 2684b12a169ee244ffc05d34234b0a3dec238c40 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 30 Aug 2017 16:23:00 -0700 Subject: [PATCH 0763/1398] kaiser: enhanced by kernel and user PCIDs Merged performance improvements to Kaiser, using distinct kernel and user Process Context Identifiers to minimize the TLB flushing. [This work actually all from Dave Hansen 2017-08-30: still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.] Signed-off-by: Dave Hansen Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 10 +++- arch/x86/entry/entry_64_compat.S | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/kaiser.h | 15 +++++- arch/x86/include/asm/pgtable_types.h | 26 +++++++++++ arch/x86/include/asm/tlbflush.h | 52 +++++++++++++++++---- arch/x86/include/uapi/asm/processor-flags.h | 3 +- arch/x86/kernel/cpu/common.c | 34 ++++++++++++++ arch/x86/kvm/x86.c | 3 +- arch/x86/mm/kaiser.c | 7 +++ arch/x86/mm/tlb.c | 46 ++++++++++++++++-- 11 files changed, 181 insertions(+), 17 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4214ac1f1c0f..01ff191830f4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1317,7 +1317,10 @@ ENTRY(nmi) /* %rax is saved above, so OK to clobber here */ movq %cr3, %rax pushq %rax - andq $(~KAISER_SHADOW_PGD_OFFSET), %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + /* Add back kernel PCID and "no flush" bit */ + orq X86_CR3_PCID_KERN_VAR, %rax movq %rax, %cr3 #endif call do_nmi @@ -1558,7 +1561,10 @@ end_repeat_nmi: /* %rax is saved above, so OK to clobber here */ movq %cr3, %rax pushq %rax - andq $(~KAISER_SHADOW_PGD_OFFSET), %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + /* Add back kernel PCID and "no flush" bit */ + orq X86_CR3_PCID_KERN_VAR, %rax movq %rax, %cr3 #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index f0e384ee8fc6..0eb5801947ff 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ed10b5bf9b93..dc508830d0a1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -189,6 +189,7 @@ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index e0fc45e77aee..360ff3bc44a9 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -1,5 +1,8 @@ #ifndef _ASM_X86_KAISER_H #define _ASM_X86_KAISER_H + +#include /* For PCID constants */ + /* * This file includes the definitions for the KAISER feature. * KAISER is a counter measure against x86_64 side channel attacks on @@ -21,13 +24,21 @@ .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg -andq $(~KAISER_SHADOW_PGD_OFFSET), \reg +andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +orq X86_CR3_PCID_KERN_VAR, \reg movq \reg, %cr3 .endm .macro _SWITCH_TO_USER_CR3 reg movq %cr3, \reg -orq $(KAISER_SHADOW_PGD_OFFSET), \reg +andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +/* + * This can obviously be one instruction by putting the + * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. + * But, just leave it now for simplicity. + */ +orq X86_CR3_PCID_USER_VAR, \reg +orq $(KAISER_SHADOW_PGD_OFFSET), \reg movq \reg, %cr3 .endm diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8bc8d02fb4b1..ada77fdc283f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -141,6 +141,32 @@ _PAGE_SOFT_DIRTY) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) +/* The ASID is the lower 12 bits of CR3 */ +#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL)) + +/* Mask for all the PCID-related bits in CR3: */ +#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) +#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) +#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) +#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) + +#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) +#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) +#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) +#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) +#else +#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) +#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) +/* + * PCIDs are unsupported on 32-bit and none of these bits can be + * set in CR3: + */ +#define X86_CR3_PCID_KERN_FLUSH (0) +#define X86_CR3_PCID_USER_FLUSH (0) +#define X86_CR3_PCID_KERN_NOFLUSH (0) +#define X86_CR3_PCID_USER_NOFLUSH (0) +#endif + /* * The cache modes defined here are used to translate between pure SW usage * and the HW defined cache mode bits and/or PAT entries. diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 7d2ea6b1f7d9..5d0c0b504729 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -13,7 +13,6 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) { struct { u64 d[2]; } desc = { { pcid, addr } }; - /* * The memory clobber is because the whole point is to invalidate * stale TLB entries and, especially if we're flushing global @@ -134,14 +133,25 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) static inline void __native_flush_tlb(void) { + if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { + /* + * If current->mm == NULL then we borrow a mm which may change during a + * task switch and therefore we must not be preempted while we write CR3 + * back: + */ + preempt_disable(); + native_write_cr3(native_read_cr3()); + preempt_enable(); + return; + } /* - * If current->mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 - * back: + * We are no longer using globals with KAISER, so a + * "nonglobals" flush would work too. But, this is more + * conservative. + * + * Note, this works with CR4.PCIDE=0 or 1. */ - preempt_disable(); - native_write_cr3(native_read_cr3()); - preempt_enable(); + invpcid_flush_all(); } static inline void __native_flush_tlb_global_irq_disabled(void) @@ -163,6 +173,8 @@ static inline void __native_flush_tlb_global(void) /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; @@ -182,7 +194,31 @@ static inline void __native_flush_tlb_global(void) static inline void __native_flush_tlb_single(unsigned long addr) { - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + /* + * SIMICS #GP's if you run INVPCID with type 2/3 + * and X86_CR4_PCIDE clear. Shame! + * + * The ASIDs used below are hard-coded. But, we must not + * call invpcid(type=1/2) before CR4.PCIDE=1. Just call + * invpcid in the case we are called early. + */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + return; + } + /* Flush the address out of both PCIDs. */ + /* + * An optimization here might be to determine addresses + * that are only kernel-mapped and only flush the kernel + * ASID. But, userspace flushes are probably much more + * important performance-wise. + * + * Make sure to do only a single invpcid when KAISER is + * disabled and we have only a single ASID. + */ + if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) + invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); + invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); } static inline void __flush_tlb_all(void) diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 567de50a4c2a..6768d1321016 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -77,7 +77,8 @@ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) -#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ +#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ +#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) /* * Intel CPU features in CR4 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3efde13eaed0..b4c0ae5495f3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -324,11 +324,45 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } } +/* + * These can have bit 63 set, so we can not just use a plain "or" + * instruction to get their value or'd into CR3. It would take + * another register. So, we use a memory reference to these + * instead. + * + * This is also handy because systems that do not support + * PCIDs just end up or'ing a 0 into their CR3, which does + * no harm. + */ +__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; +__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; + static void setup_pcid(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PCID)) { if (cpu_has(c, X86_FEATURE_PGE)) { cr4_set_bits(X86_CR4_PCIDE); + /* + * These variables are used by the entry/exit + * code to change PCIDs. + */ +#ifdef CONFIG_KAISER + X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; + X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; +#endif + /* + * INVPCID has two "groups" of types: + * 1/2: Invalidate an individual address + * 3/4: Invalidate all contexts + * + * 1/2 take a PCID, but 3/4 do not. So, 3/4 + * ignore the PCID argument in the descriptor. + * But, we have to be careful not to call 1/2 + * with an actual non-zero PCID in them before + * we do the above cr4_set_bits(). + */ + if (cpu_has(c, X86_FEATURE_INVPCID)) + set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE); } else { /* * flush_tlb_all(), as currently implemented, won't diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e28e6c877d9..73304b1a03cc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ - if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) || + !is_long_mode(vcpu)) return 1; } diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index bd22ef51aa2d..f5c75f73bb71 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(void) } while (0) extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +extern unsigned long X86_CR3_PCID_KERN_VAR; +extern unsigned long X86_CR3_PCID_USER_VAR; /* * If anything in here fails, we will likely die on one of the * first kernel->user transitions and init will die. But, we @@ -289,6 +291,11 @@ void __init kaiser_init(void) kaiser_add_user_map_early(&debug_idt_table, sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL); + + kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, + __PAGE_KERNEL); + kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, + __PAGE_KERNEL); } /* Add a mapping to the shadow mapping, and synchronize the mappings */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 53b72fb4e781..43ce5d316eae 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -34,6 +34,46 @@ struct flush_tlb_info { unsigned long flush_end; }; +static void load_new_mm_cr3(pgd_t *pgdir) +{ + unsigned long new_mm_cr3 = __pa(pgdir); + + /* + * KAISER, plus PCIDs needs some extra work here. But, + * if either of features is not present, we need no + * PCIDs here and just do a normal, full TLB flush with + * the write_cr3() + */ + if (!IS_ENABLED(CONFIG_KAISER) || + !cpu_feature_enabled(X86_FEATURE_PCID)) + goto out_set_cr3; + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entires for the PCID out when we change + * tasks. + */ + new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); + + /* + * The flush from load_cr3() may leave old TLB entries + * for userspace in place. We must flush that context + * separately. We can theoretically delay doing this + * until we actually load up the userspace CR3, but + * that's a bit tricky. We have to have the "need to + * flush userspace PCID" bit per-cpu and check it in the + * exit-to-userspace paths. + */ + invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); + +out_set_cr3: + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. @@ -45,7 +85,7 @@ void leave_mm(int cpu) BUG(); if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); - load_cr3(swapper_pg_dir); + load_new_mm_cr3(swapper_pg_dir); /* * This gets called in the idle path where RCU * functions differently. Tracing normally @@ -120,7 +160,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * ordering guarantee we need. * */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); @@ -167,7 +207,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * As above, load_cr3() is serializing and orders TLB * fills with respect to the mm_cpumask write. */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); load_mm_ldt(next); -- GitLab From 0b5ca9d99599087971a3cd7634a0b61d4e2653e3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 17 Aug 2017 15:00:37 -0700 Subject: [PATCH 0764/1398] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user We have many machines (Westmere, Sandybridge, Ivybridge) supporting PCID but not INVPCID: on these load_new_mm_cr3() simply crashed. Flushing user context inside load_new_mm_cr3() without the use of invpcid is difficult: momentarily switch from kernel to user context and back to do so? I'm not sure whether that can be safely done at all, and would risk polluting user context with kernel internals, and kernel context with stale user externals. Instead, follow the hint in the comment that was there: change X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3() can leave a note in it, for SWITCH_USER_CR3 on return to userspace to flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH. Which works well enough that there's no need to do it this way only when invpcid is unsupported: it's a good alternative to invpcid here. But there's a couple of inlines in asm/tlbflush.h that need to do the same trick, so it's best to localize all this per-cpu business in mm/kaiser.c: moving that part of the initialization from setup_pcid() to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit. I did try to make the feature tests in asm/tlbflush.h more consistent with each other: there seem to be far too many ways of performing such tests, and I don't have a good grasp of their differences. At first I converted them all to be static_cpu_has(): but that proved to be a mistake, as the comment in __native_flush_tlb_single() hints; so then I reversed and made them all this_cpu_has(). Probably all gratuitous change, but that's the way it's working at present. I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR gets re-initialized by each cpu (before and after these changes): no problem when (as usual) all cpus on a machine have the same features, but in principle incorrect. However, my experiment to per-cpu-ify that one did not end well... Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kaiser.h | 18 ++++++----- arch/x86/include/asm/tlbflush.h | 56 +++++++++++++++++++++++---------- arch/x86/kernel/cpu/common.c | 22 +------------ arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++---- arch/x86/mm/tlb.c | 46 +++++++++++---------------- 5 files changed, 113 insertions(+), 79 deletions(-) diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 360ff3bc44a9..009bca514c20 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -32,13 +32,12 @@ movq \reg, %cr3 .macro _SWITCH_TO_USER_CR3 reg movq %cr3, \reg andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -/* - * This can obviously be one instruction by putting the - * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. - * But, just leave it now for simplicity. - */ -orq X86_CR3_PCID_USER_VAR, \reg -orq $(KAISER_SHADOW_PGD_OFFSET), \reg +orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg +js 9f +// FLUSH this time, reset to NOFLUSH for next time +// But if nopcid? Consider using 0x80 for user pcid? +movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) +9: movq \reg, %cr3 .endm @@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax */ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); +extern unsigned long X86_CR3_PCID_KERN_VAR; +DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); + +extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; + /** * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping * @addr: the start address of the range diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5d0c0b504729..01584d488754 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -13,6 +13,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) { struct { u64 d[2]; } desc = { { pcid, addr } }; + /* * The memory clobber is because the whole point is to invalidate * stale TLB entries and, especially if we're flushing global @@ -131,27 +132,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) cr4_set_bits(mask); } +/* + * Declare a couple of kaiser interfaces here for convenience, + * to avoid the need for asm/kaiser.h in unexpected places. + */ +#ifdef CONFIG_KAISER +extern void kaiser_setup_pcid(void); +extern void kaiser_flush_tlb_on_return_to_user(void); +#else +static inline void kaiser_setup_pcid(void) +{ +} +static inline void kaiser_flush_tlb_on_return_to_user(void) +{ +} +#endif + static inline void __native_flush_tlb(void) { - if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { + if (this_cpu_has(X86_FEATURE_INVPCID)) { /* - * If current->mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 - * back: + * Note, this works with CR4.PCIDE=0 or 1. */ - preempt_disable(); - native_write_cr3(native_read_cr3()); - preempt_enable(); + invpcid_flush_all_nonglobals(); return; } + /* - * We are no longer using globals with KAISER, so a - * "nonglobals" flush would work too. But, this is more - * conservative. - * - * Note, this works with CR4.PCIDE=0 or 1. + * If current->mm == NULL then we borrow a mm which may change during a + * task switch and therefore we must not be preempted while we write CR3 + * back: */ - invpcid_flush_all(); + preempt_disable(); + if (this_cpu_has(X86_FEATURE_PCID)) + kaiser_flush_tlb_on_return_to_user(); + native_write_cr3(native_read_cr3()); + preempt_enable(); } static inline void __native_flush_tlb_global_irq_disabled(void) @@ -167,9 +183,13 @@ static inline void __native_flush_tlb_global_irq_disabled(void) static inline void __native_flush_tlb_global(void) { +#ifdef CONFIG_KAISER + /* Globals are not used at all */ + __native_flush_tlb(); +#else unsigned long flags; - if (static_cpu_has(X86_FEATURE_INVPCID)) { + if (this_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. @@ -186,10 +206,9 @@ static inline void __native_flush_tlb_global(void) * be called from deep inside debugging code.) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); - raw_local_irq_restore(flags); +#endif } static inline void __native_flush_tlb_single(unsigned long addr) @@ -200,9 +219,12 @@ static inline void __native_flush_tlb_single(unsigned long addr) * * The ASIDs used below are hard-coded. But, we must not * call invpcid(type=1/2) before CR4.PCIDE=1. Just call - * invpcid in the case we are called early. + * invlpg in the case we are called early. */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { + if (this_cpu_has(X86_FEATURE_PCID)) + kaiser_flush_tlb_on_return_to_user(); asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); return; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b4c0ae5495f3..e6be5f3b1259 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -324,32 +324,11 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } } -/* - * These can have bit 63 set, so we can not just use a plain "or" - * instruction to get their value or'd into CR3. It would take - * another register. So, we use a memory reference to these - * instead. - * - * This is also handy because systems that do not support - * PCIDs just end up or'ing a 0 into their CR3, which does - * no harm. - */ -__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; -__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; - static void setup_pcid(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PCID)) { if (cpu_has(c, X86_FEATURE_PGE)) { cr4_set_bits(X86_CR4_PCIDE); - /* - * These variables are used by the entry/exit - * code to change PCIDs. - */ -#ifdef CONFIG_KAISER - X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; - X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; -#endif /* * INVPCID has two "groups" of types: * 1/2: Invalidate an individual address @@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_PCID); } } + kaiser_setup_pcid(); } /* diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index f5c75f73bb71..7056840e2739 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -11,12 +11,26 @@ #include #include +#include /* to verify its kaiser declarations */ #include #include #include + #ifdef CONFIG_KAISER +__visible +DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +/* + * These can have bit 63 set, so we can not just use a plain "or" + * instruction to get their value or'd into CR3. It would take + * another register. So, we use a memory reference to these instead. + * + * This is also handy because systems that do not support PCIDs + * just end up or'ing a 0 into their CR3, which does no harm. + */ +__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; +DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); -__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); /* * At runtime, the only things we map are some things for CPU * hotplug, and stacks for new processes. No two CPUs will ever @@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(void) WARN_ON(__ret); \ } while (0) -extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -extern unsigned long X86_CR3_PCID_KERN_VAR; -extern unsigned long X86_CR3_PCID_USER_VAR; /* * If anything in here fails, we will likely die on one of the * first kernel->user transitions and init will die. But, we @@ -294,8 +305,6 @@ void __init kaiser_init(void) kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, __PAGE_KERNEL); - kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, - __PAGE_KERNEL); } /* Add a mapping to the shadow mapping, and synchronize the mappings */ @@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) } return pgd; } + +void kaiser_setup_pcid(void) +{ + unsigned long kern_cr3 = 0; + unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; + + if (this_cpu_has(X86_FEATURE_PCID)) { + kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; + user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; + } + /* + * These variables are used by the entry/exit + * code to change PCID and pgd and TLB flushing. + */ + X86_CR3_PCID_KERN_VAR = kern_cr3; + this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); +} + +/* + * Make a note that this cpu will need to flush USER tlb on return to user. + * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: + * if cpu does not, then the NOFLUSH bit will never have been set. + */ +void kaiser_flush_tlb_on_return_to_user(void) +{ + this_cpu_write(X86_CR3_PCID_USER_VAR, + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); +} +EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); #endif /* CONFIG_KAISER */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 43ce5d316eae..4c841c9c4d9c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,13 +6,14 @@ #include #include #include +#include #include #include #include #include #include -#include +#include /* * TLB flushing, formerly SMP-only @@ -38,34 +39,23 @@ static void load_new_mm_cr3(pgd_t *pgdir) { unsigned long new_mm_cr3 = __pa(pgdir); - /* - * KAISER, plus PCIDs needs some extra work here. But, - * if either of features is not present, we need no - * PCIDs here and just do a normal, full TLB flush with - * the write_cr3() - */ - if (!IS_ENABLED(CONFIG_KAISER) || - !cpu_feature_enabled(X86_FEATURE_PCID)) - goto out_set_cr3; - /* - * We reuse the same PCID for different tasks, so we must - * flush all the entires for the PCID out when we change - * tasks. - */ - new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); - - /* - * The flush from load_cr3() may leave old TLB entries - * for userspace in place. We must flush that context - * separately. We can theoretically delay doing this - * until we actually load up the userspace CR3, but - * that's a bit tricky. We have to have the "need to - * flush userspace PCID" bit per-cpu and check it in the - * exit-to-userspace paths. - */ - invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); +#ifdef CONFIG_KAISER + if (this_cpu_has(X86_FEATURE_PCID)) { + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entries for the PCID out when we change tasks. + * Flush KERN below, flush USER when returning to userspace in + * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro. + * + * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could + * do it here, but can only be used if X86_FEATURE_INVPCID is + * available - and many machines support pcid without invpcid. + */ + new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; + kaiser_flush_tlb_on_return_to_user(); + } +#endif /* CONFIG_KAISER */ -out_set_cr3: /* * Caution: many callers of this function expect * that load_cr3() is serializing and orders TLB -- GitLab From 6a2b4117614c5d4239d1120b08591b2d296f4f53 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 8 Sep 2017 19:26:30 -0700 Subject: [PATCH 0765/1398] kaiser: PCID 0 for kernel and 128 for user Why was 4 chosen for kernel PCID and 6 for user PCID? No good reason in a backport where PCIDs are only used for Kaiser. If we continue with those, then we shall need to add Andy Lutomirski's 4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa() and __read_cr3()"), which deals with the problem of read_cr3() callers finding stray bits in the cr3 that they expected to be page-aligned; and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64: Mask off CR3's PCID bits in the saved CR3"). But if 0 is used for kernel PCID, then there's no need to add in those commits - whenever the kernel looks, it sees 0 in the lower bits; and 0 for kernel seems an obvious choice. And I naughtily propose 128 for user PCID. Because there's a place in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH, but needs to reset that to NOFLUSH for the next occasion. Currently it does so with a "movb $(0x80)" into the high byte of the per-cpu quadword, but that will cause a machine without PCID support to crash. Now, if %al just happened to have 0x80 in it at that point, on a machine with PCID support, but 0 on a machine without PCID support... (That will go badly wrong once the pgd can be at a physical address above 2^56, but even with 5-level paging, physical goes up to 2^52.) Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kaiser.h | 19 ++++++++++++------- arch/x86/include/asm/pgtable_types.h | 7 ++++--- arch/x86/mm/tlb.c | 3 +++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 009bca514c20..110a73e0572d 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -29,14 +29,19 @@ orq X86_CR3_PCID_KERN_VAR, \reg movq \reg, %cr3 .endm -.macro _SWITCH_TO_USER_CR3 reg +.macro _SWITCH_TO_USER_CR3 reg regb +/* + * regb must be the low byte portion of reg: because we have arranged + * for the low byte of the user PCID to serve as the high byte of NOFLUSH + * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are + * not enabled): so that the one register can update both memory and cr3. + */ movq %cr3, \reg andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg js 9f -// FLUSH this time, reset to NOFLUSH for next time -// But if nopcid? Consider using 0x80 for user pcid? -movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) +/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ +movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) 9: movq \reg, %cr3 .endm @@ -49,7 +54,7 @@ popq %rax .macro SWITCH_USER_CR3 pushq %rax -_SWITCH_TO_USER_CR3 %rax +_SWITCH_TO_USER_CR3 %rax %al popq %rax .endm @@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .macro SWITCH_USER_CR3_NO_STACK movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) -_SWITCH_TO_USER_CR3 %rax +_SWITCH_TO_USER_CR3 %rax %al movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .endm @@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .macro SWITCH_KERNEL_CR3 reg .endm -.macro SWITCH_USER_CR3 reg +.macro SWITCH_USER_CR3 reg regb .endm .macro SWITCH_USER_CR3_NO_STACK .endm diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index ada77fdc283f..7cf2883d7bc4 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -146,16 +146,17 @@ /* Mask for all the PCID-related bits in CR3: */ #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) +#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) + #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) -#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) -#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) +/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ +#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) #define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) #define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) #define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) #define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) #else -#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) #define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) /* * PCIDs are unsupported on 32-bit and none of these bits can be diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4c841c9c4d9c..7759b650d9da 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -50,6 +50,9 @@ static void load_new_mm_cr3(pgd_t *pgdir) * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could * do it here, but can only be used if X86_FEATURE_INVPCID is * available - and many machines support pcid without invpcid. + * + * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; + * but keep that line in there in case something changes. */ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; kaiser_flush_tlb_on_return_to_user(); -- GitLab From d0142ceb7926b81738e1fc122b109f67128d2762 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 27 Aug 2017 16:24:27 -0700 Subject: [PATCH 0766/1398] kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user Mostly this commit is just unshouting X86_CR3_PCID_KERN_VAR and X86_CR3_PCID_USER_VAR: we usually name variables in lower-case. But why does x86_cr3_pcid_noflush need to be __aligned(PAGE_SIZE)? Ah, it's a leftover from when kaiser_add_user_map() once complained about mapping the same page twice. Make it __read_mostly instead. (I'm a little uneasy about all the unrelated data which shares its page getting user-mapped too, but that was so before, and not a big deal: though we call it user-mapped, it's not mapped with _PAGE_USER.) And there is a little change around the two calls to do_nmi(). Previously they set the NOFLUSH bit (if PCID supported) when forcing to kernel context before do_nmi(); now they also have the NOFLUSH bit set (if PCID supported) when restoring context after: nothing done in do_nmi() should require a TLB to be flushed here. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 8 ++++---- arch/x86/include/asm/kaiser.h | 11 +++++------ arch/x86/mm/kaiser.c | 13 +++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 01ff191830f4..c0e142c6c16c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1316,11 +1316,11 @@ ENTRY(nmi) /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ movq %cr3, %rax + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ + orq x86_cr3_pcid_noflush, %rax pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - /* Add back kernel PCID and "no flush" bit */ - orq X86_CR3_PCID_KERN_VAR, %rax movq %rax, %cr3 #endif call do_nmi @@ -1560,11 +1560,11 @@ end_repeat_nmi: /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ movq %cr3, %rax + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ + orq x86_cr3_pcid_noflush, %rax pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - /* Add back kernel PCID and "no flush" bit */ - orq X86_CR3_PCID_KERN_VAR, %rax movq %rax, %cr3 #endif diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 110a73e0572d..48d8d70dd8c7 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -25,7 +25,7 @@ .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -orq X86_CR3_PCID_KERN_VAR, \reg +orq x86_cr3_pcid_noflush, \reg movq \reg, %cr3 .endm @@ -37,11 +37,10 @@ movq \reg, %cr3 * not enabled): so that the one register can update both memory and cr3. */ movq %cr3, \reg -andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg +orq PER_CPU_VAR(x86_cr3_pcid_user), \reg js 9f /* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ -movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) +movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) 9: movq \reg, %cr3 .endm @@ -94,8 +93,8 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax */ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -extern unsigned long X86_CR3_PCID_KERN_VAR; -DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); +extern unsigned long x86_cr3_pcid_noflush; +DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 7056840e2739..fa1cb0990551 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -28,8 +28,8 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); * This is also handy because systems that do not support PCIDs * just end up or'ing a 0 into their CR3, which does no harm. */ -__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; -DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); +unsigned long x86_cr3_pcid_noflush __read_mostly; +DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); /* * At runtime, the only things we map are some things for CPU @@ -303,7 +303,8 @@ void __init kaiser_init(void) sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL); - kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, + kaiser_add_user_map_early(&x86_cr3_pcid_noflush, + sizeof(x86_cr3_pcid_noflush), __PAGE_KERNEL); } @@ -381,8 +382,8 @@ void kaiser_setup_pcid(void) * These variables are used by the entry/exit * code to change PCID and pgd and TLB flushing. */ - X86_CR3_PCID_KERN_VAR = kern_cr3; - this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); + x86_cr3_pcid_noflush = kern_cr3; + this_cpu_write(x86_cr3_pcid_user, user_cr3); } /* @@ -392,7 +393,7 @@ void kaiser_setup_pcid(void) */ void kaiser_flush_tlb_on_return_to_user(void) { - this_cpu_write(X86_CR3_PCID_USER_VAR, + this_cpu_write(x86_cr3_pcid_user, X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); } EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); -- GitLab From 05ddad146d02b2cb05f8c06b2c3b3c365372a66c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 26 Sep 2017 18:43:07 -0700 Subject: [PATCH 0767/1398] kaiser: paranoid_entry pass cr3 need to paranoid_exit Neel Natu points out that paranoid_entry() was wrong to assume that an entry that did not need swapgs would not need SWITCH_KERNEL_CR3: paranoid_entry (used for debug breakpoint, int3, double fault or MCE; though I think it's only the MCE case that is cause for concern here) can break in at an awkward time, between cr3 switch and swapgs, but its handling always needs kernel gs and kernel cr3. Easy to fix in itself, but paranoid_entry() also needs to convey to paranoid_exit() (and my reading of macro idtentry says paranoid_entry and paranoid_exit are always paired) how to restore the prior state. The swapgs state is already conveyed by %ebx (0 or 1), so extend that also to convey when SWITCH_USER_CR3 will be needed (2 or 3). (Yes, I'd much prefer that 0 meant no swapgs, whereas it's the other way round: and a convention shared with error_entry() and error_exit(), which I don't want to touch. Perhaps I should have inverted the bit for switch cr3 too, but did not.) paranoid_exit() would be straightforward, except for TRACE_IRQS: it did TRACE_IRQS_IRETQ when doing swapgs, but TRACE_IRQS_IRETQ_DEBUG when not: which is it supposed to use when SWITCH_USER_CR3 is split apart from that? As best as I can determine, commit 5963e317b1e9 ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") missed the swapgs case, and should have used TRACE_IRQS_IRETQ_DEBUG there too (the discrepancy has nothing to do with the liberal use of _NO_STACK and _UNSAFE_STACK hereabouts: TRACE_IRQS_OFF_DEBUG has just been used in all cases); discrepancy lovingly preserved across several paranoid_exit() cleanups, but I'm now removing it. Neel further indicates that to use SWITCH_USER_CR3_NO_STACK there in paranoid_exit() is now not only unnecessary but unsafe: might corrupt syscall entry's unsafe_stack_register_backup of %rax. Just use SWITCH_USER_CR3: and delete SWITCH_USER_CR3_NO_STACK altogether, before we make the mistake of using it again. hughd adds: this commit fixes an issue in the Kaiser-without-PCIDs part of the series, and ought to be moved earlier, if you decided to make a release of Kaiser-without-PCIDs. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 46 +++++++++++++++++++++++++------- arch/x86/entry/entry_64_compat.S | 2 +- arch/x86/include/asm/kaiser.h | 8 ------ 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c0e142c6c16c..34a172facf17 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1053,7 +1053,11 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec /* * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. - * Return: ebx=0: need swapgs on exit, ebx=1: otherwise + * + * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit + * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit + * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit + * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit */ ENTRY(paranoid_entry) cld @@ -1065,9 +1069,26 @@ ENTRY(paranoid_entry) testl %edx, %edx js 1f /* negative -> in kernel */ SWAPGS - SWITCH_KERNEL_CR3 xorl %ebx, %ebx -1: ret +1: +#ifdef CONFIG_KAISER + /* + * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 + * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. + * Do a conditional SWITCH_KERNEL_CR3: this could safely be done + * unconditionally, but we need to find out whether the reverse + * should be done on return (conveyed to paranoid_exit in %ebx). + */ + movq %cr3, %rax + testl $KAISER_SHADOW_PGD_OFFSET, %eax + jz 2f + orl $2, %ebx + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + orq x86_cr3_pcid_noflush, %rax + movq %rax, %cr3 +2: +#endif + ret END(paranoid_entry) /* @@ -1080,20 +1101,25 @@ END(paranoid_entry) * be complicated. Fortunately, we there's no good reason * to try to handle preemption here. * - * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) + * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3 + * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 + * ebx=2: needs both swapgs and SWITCH_USER_CR3 + * ebx=3: needs SWITCH_USER_CR3 but not swapgs */ ENTRY(paranoid_exit) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG - testl %ebx, %ebx /* swapgs needed? */ + TRACE_IRQS_IRETQ_DEBUG +#ifdef CONFIG_KAISER + testl $2, %ebx /* SWITCH_USER_CR3 needed? */ + jz paranoid_exit_no_switch + SWITCH_USER_CR3 +paranoid_exit_no_switch: +#endif + testl $1, %ebx /* swapgs needed? */ jnz paranoid_exit_no_swapgs - TRACE_IRQS_IRETQ - SWITCH_USER_CR3_NO_STACK SWAPGS_UNSAFE_STACK - jmp paranoid_exit_restore paranoid_exit_no_swapgs: - TRACE_IRQS_IRETQ_DEBUG -paranoid_exit_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0eb5801947ff..d76a97653980 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -343,7 +343,7 @@ ENTRY(entry_INT80_compat) /* Go back to user mode. */ TRACE_IRQS_ON - SWITCH_USER_CR3_NO_STACK + SWITCH_USER_CR3 SWAPGS jmp restore_regs_and_iret END(entry_INT80_compat) diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 48d8d70dd8c7..3dc5f4c39b3e 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -63,20 +63,12 @@ _SWITCH_TO_KERNEL_CR3 %rax movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .endm -.macro SWITCH_USER_CR3_NO_STACK -movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) -_SWITCH_TO_USER_CR3 %rax %al -movq PER_CPU_VAR(unsafe_stack_register_backup), %rax -.endm - #else /* CONFIG_KAISER */ .macro SWITCH_KERNEL_CR3 reg .endm .macro SWITCH_USER_CR3 reg regb .endm -.macro SWITCH_USER_CR3_NO_STACK -.endm .macro SWITCH_KERNEL_CR3_NO_STACK .endm -- GitLab From 3df146178706bf39cfcb22d7fe1a378af6dc5896 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 2 Oct 2017 10:57:24 -0700 Subject: [PATCH 0768/1398] kaiser: kaiser_remove_mapping() move along the pgd When removing the bogus comment from kaiser_remove_mapping(), I really ought to have checked the extent of its bogosity: as Neel points out, there is nothing to stop unmap_pud_range_nofree() from continuing beyond the end of a pud (and starting in the wrong position on the next). Fix kaiser_remove_mapping() to constrain the extent and advance pgd pointer correctly: use pgd_addr_end() macro as used throughout base mm (but don't assume page-rounded start and size in this case). But this bug was very unlikely to trigger in this backport: since any buddy allocation is contained within a single pud extent, and we are not using vmapped stacks (and are only mapping one page of stack anyway): the only way to hit this bug here would be when freeing a large modified ldt. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index fa1cb0990551..cc0950f756d5 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -319,11 +319,13 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); unsigned long end = start + size; - unsigned long addr; + unsigned long addr, next; + pgd_t *pgd; - for (addr = start; addr < end; addr += PGDIR_SIZE) { - pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); - unmap_pud_range_nofree(pgd, addr, end); + pgd = native_get_shadow_pgd(pgd_offset_k(start)); + for (addr = start; addr < end; pgd++, addr = next) { + next = pgd_addr_end(addr, end); + unmap_pud_range_nofree(pgd, addr, next); } } -- GitLab From cb7d8d7e6737cad77d5eaba9f9c95f3322706198 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 4 Dec 2017 20:13:35 -0800 Subject: [PATCH 0769/1398] kaiser: fix unlikely error in alloc_ldt_struct() An error from kaiser_add_mapping() here is not at all likely, but Eric Biggers rightly points out that __free_ldt_struct() relies on new_ldt->size being initialized: move that up. Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ldt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 567bdbd7d917..8bc68cfc0d33 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -79,11 +79,11 @@ static struct ldt_struct *alloc_ldt_struct(int size) ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, __PAGE_KERNEL); + new_ldt->size = size; if (ret) { __free_ldt_struct(new_ldt); return NULL; } - new_ldt->size = size; return new_ldt; } -- GitLab From 23e09439aa460e4e194c8d2542a16977ee8ed142 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 24 Sep 2017 16:59:49 -0700 Subject: [PATCH 0770/1398] kaiser: add "nokaiser" boot option, using ALTERNATIVE Added "nokaiser" boot option: an early param like "noinvpcid". Most places now check int kaiser_enabled (#defined 0 when not CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S and entry_64_compat.S are using the ALTERNATIVE technique, which patches in the preferred instructions at runtime. That technique is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated. Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that, but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when nokaiser like when !CONFIG_KAISER, but not setting either when kaiser - neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL won't get set in some obscure corner, or something add PGE into CR4. By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled, all page table setup which uses pte_pfn() masks it out of the ptes. It's slightly shameful that the same declaration versus definition of kaiser_enabled appears in not one, not two, but in three header files (asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way, than with #including any of those in any of the others; and did not feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes them all, so we shall hear about it if they get out of synch. Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER from kaiser.c; removed the unused native_get_normal_pgd(); removed the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some comments. But more interestingly, set CR4.PSE in secondary_startup_64: the manual is clear that it does not matter whether it's 0 or 1 when 4-level-pts are enabled, but I was distracted to find cr4 different on BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask(). Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/entry/entry_64.S | 15 ++++----- arch/x86/include/asm/cpufeatures.h | 3 ++ arch/x86/include/asm/kaiser.h | 27 +++++++++++----- arch/x86/include/asm/pgtable.h | 20 ++++++++---- arch/x86/include/asm/pgtable_64.h | 13 +++----- arch/x86/include/asm/pgtable_types.h | 4 --- arch/x86/include/asm/tlbflush.h | 39 +++++++++++++++--------- arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++- arch/x86/kernel/espfix_64.c | 3 +- arch/x86/kernel/head_64.S | 4 +-- arch/x86/mm/init.c | 2 +- arch/x86/mm/init_64.c | 10 ++++++ arch/x86/mm/kaiser.c | 26 +++++++++++++--- arch/x86/mm/pgtable.c | 8 ++--- arch/x86/mm/tlb.c | 4 +-- tools/arch/x86/include/asm/cpufeatures.h | 3 ++ 17 files changed, 146 insertions(+), 65 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 152ec4e87b57..887f0eb7943d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2763,6 +2763,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nojitter [IA-64] Disables jitter checking for ITC timers. + nokaiser [X86-64] Disable KAISER isolation of kernel from user. + no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 34a172facf17..4e5c41005dd0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry) * unconditionally, but we need to find out whether the reverse * should be done on return (conveyed to paranoid_exit in %ebx). */ - movq %cr3, %rax + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER testl $KAISER_SHADOW_PGD_OFFSET, %eax jz 2f orl $2, %ebx @@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit) TRACE_IRQS_OFF_DEBUG TRACE_IRQS_IRETQ_DEBUG #ifdef CONFIG_KAISER + /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ testl $2, %ebx /* SWITCH_USER_CR3 needed? */ jz paranoid_exit_no_switch SWITCH_USER_CR3 @@ -1341,13 +1342,14 @@ ENTRY(nmi) #ifdef CONFIG_KAISER /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ orq x86_cr3_pcid_noflush, %rax pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax movq %rax, %cr3 +2: #endif call do_nmi @@ -1357,8 +1359,7 @@ ENTRY(nmi) * kernel code that needs user CR3, but do we ever return * to "user mode" where we need the kernel CR3? */ - popq %rax - mov %rax, %cr3 + ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER #endif /* @@ -1585,13 +1586,14 @@ end_repeat_nmi: #ifdef CONFIG_KAISER /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ orq x86_cr3_pcid_noflush, %rax pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax movq %rax, %cr3 +2: #endif /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ @@ -1603,8 +1605,7 @@ end_repeat_nmi: * kernel code that needs user CR3, like just just before * a sysret. */ - popq %rax - mov %rax, %cr3 + ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER #endif testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dc508830d0a1..20271d6b0eb9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,6 +198,9 @@ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 3dc5f4c39b3e..96643a9c194c 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -46,28 +46,33 @@ movq \reg, %cr3 .endm .macro SWITCH_KERNEL_CR3 -pushq %rax +ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER _SWITCH_TO_KERNEL_CR3 %rax popq %rax +8: .endm .macro SWITCH_USER_CR3 -pushq %rax +ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER _SWITCH_TO_USER_CR3 %rax %al popq %rax +8: .endm .macro SWITCH_KERNEL_CR3_NO_STACK -movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) +ALTERNATIVE "jmp 8f", \ + __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \ + X86_FEATURE_KAISER _SWITCH_TO_KERNEL_CR3 %rax movq PER_CPU_VAR(unsafe_stack_register_backup), %rax +8: .endm #else /* CONFIG_KAISER */ -.macro SWITCH_KERNEL_CR3 reg +.macro SWITCH_KERNEL_CR3 .endm -.macro SWITCH_USER_CR3 reg regb +.macro SWITCH_USER_CR3 .endm .macro SWITCH_KERNEL_CR3_NO_STACK .endm @@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +extern int kaiser_enabled; +#else +#define kaiser_enabled 0 +#endif /* CONFIG_KAISER */ + +/* + * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, + * so as to build with tests on kaiser_enabled instead of #ifdefs. + */ + /** * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping * @addr: the start address of the range @@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size); */ extern void kaiser_init(void); -#endif /* CONFIG_KAISER */ - #endif /* __ASSEMBLY */ #endif /* _ASM_X86_KAISER_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cee98e182b7..217e83a657e2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -18,6 +18,12 @@ #ifndef __ASSEMBLY__ #include +#ifdef CONFIG_KAISER +extern int kaiser_enabled; +#else +#define kaiser_enabled 0 +#endif + void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_checkwx(void); @@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd) * page table by accident; it will fault on the first * instruction it tries to run. See native_set_pgd(). */ - if (IS_ENABLED(CONFIG_KAISER)) + if (kaiser_enabled) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; @@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_KAISER - /* Clone the shadow pgd part as well */ - memcpy(native_get_shadow_pgd(dst), - native_get_shadow_pgd(src), - count * sizeof(pgd_t)); + if (kaiser_enabled) { + /* Clone the shadow pgd part as well */ + memcpy(native_get_shadow_pgd(dst), + native_get_shadow_pgd(src), + count * sizeof(pgd_t)); + } #endif } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 177caf3f7ab1..cf68b5c1cb74 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { +#ifdef CONFIG_DEBUG_VM + /* linux/mmdebug.h may not have been included at this point */ + BUG_ON(!kaiser_enabled); +#endif return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); } - -static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) -{ - return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); -} #else static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) BUILD_BUG_ON(1); return NULL; } -static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) -{ - return pgdp; -} #endif /* CONFIG_KAISER */ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7cf2883d7bc4..f0d9a1aa3f68 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -45,11 +45,7 @@ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) -#ifdef CONFIG_KAISER -#define _PAGE_GLOBAL (_AT(pteval_t, 0)) -#else #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -#endif #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 01584d488754..bdd69a3158dd 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -137,9 +137,11 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) * to avoid the need for asm/kaiser.h in unexpected places. */ #ifdef CONFIG_KAISER +extern int kaiser_enabled; extern void kaiser_setup_pcid(void); extern void kaiser_flush_tlb_on_return_to_user(void); #else +#define kaiser_enabled 0 static inline void kaiser_setup_pcid(void) { } @@ -164,7 +166,7 @@ static inline void __native_flush_tlb(void) * back: */ preempt_disable(); - if (this_cpu_has(X86_FEATURE_PCID)) + if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) kaiser_flush_tlb_on_return_to_user(); native_write_cr3(native_read_cr3()); preempt_enable(); @@ -175,20 +177,30 @@ static inline void __native_flush_tlb_global_irq_disabled(void) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); + if (cr4 & X86_CR4_PGE) { + /* clear PGE and flush TLB of all entries */ + native_write_cr4(cr4 & ~X86_CR4_PGE); + /* restore PGE as it was before */ + native_write_cr4(cr4); + } else { + /* + * x86_64 microcode update comes this way when CR4.PGE is not + * enabled, and it's safer for all callers to allow this case. + */ + native_write_cr3(native_read_cr3()); + } } static inline void __native_flush_tlb_global(void) { -#ifdef CONFIG_KAISER - /* Globals are not used at all */ - __native_flush_tlb(); -#else unsigned long flags; + if (kaiser_enabled) { + /* Globals are not used at all */ + __native_flush_tlb(); + return; + } + if (this_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes @@ -208,7 +220,6 @@ static inline void __native_flush_tlb_global(void) raw_local_irq_save(flags); __native_flush_tlb_global_irq_disabled(); raw_local_irq_restore(flags); -#endif } static inline void __native_flush_tlb_single(unsigned long addr) @@ -223,7 +234,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) */ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { - if (this_cpu_has(X86_FEATURE_PCID)) + if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) kaiser_flush_tlb_on_return_to_user(); asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); return; @@ -238,9 +249,9 @@ static inline void __native_flush_tlb_single(unsigned long addr) * Make sure to do only a single invpcid when KAISER is * disabled and we have only a single ASID. */ - if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) - invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); - invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + if (kaiser_enabled) + invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); } static inline void __flush_tlb_all(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e6be5f3b1259..8b03874396b9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s) return 1; } __setup("nopcid", x86_pcid_setup); + +static int __init x86_nokaiser_setup(char *s) +{ + /* nokaiser doesn't accept parameters */ + if (s) + return -EINVAL; +#ifdef CONFIG_KAISER + kaiser_enabled = 0; + setup_clear_cpu_cap(X86_FEATURE_KAISER); + pr_info("nokaiser: KAISER feature disabled\n"); +#endif + return 0; +} +early_param("nokaiser", x86_nokaiser_setup); #endif static int __init x86_noinvpcid_setup(char *s) @@ -327,7 +341,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) static void setup_pcid(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PCID)) { - if (cpu_has(c, X86_FEATURE_PGE)) { + if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) { cr4_set_bits(X86_CR4_PCIDE); /* * INVPCID has two "groups" of types: @@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); +#ifdef CONFIG_KAISER + if (kaiser_enabled) + set_cpu_cap(c, X86_FEATURE_KAISER); +#endif } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -1537,6 +1555,14 @@ void cpu_init(void) * try to read it. */ cr4_init_shadow(); + if (!kaiser_enabled) { + /* + * secondary_startup_64() deferred setting PGE in cr4: + * probe_page_size_mask() sets it on the boot cpu, + * but it needs to be set on each secondary cpu. + */ + cr4_set_bits(X86_CR4_PGE); + } /* * Load microcode on this cpu if a valid microcode is available. diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 560c2fd73b28..e33b38541be3 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -132,9 +132,10 @@ void __init init_espfix_bsp(void) * area to ensure it is mapped into the shadow user page * tables. */ - if (IS_ENABLED(CONFIG_KAISER)) + if (kaiser_enabled) { set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); + } /* Randomize the locations */ init_espfix_random(); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 5775379237f7..d04479b4a8ee 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -190,8 +190,8 @@ ENTRY(secondary_startup_64) movq $(init_level4_pgt - __START_KERNEL_map), %rax 1: - /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx + /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */ + movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx movq %rcx, %cr4 /* Setup early boot stage 4 level pagetables. */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 0381638168d1..1e779bca4f3e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -177,7 +177,7 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ - if (boot_cpu_has(X86_FEATURE_PGE)) { + if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } else diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3e27ded6ac65..7df8e3a79dc0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -324,6 +324,16 @@ void __init cleanup_highmap(void) continue; if (vaddr < (unsigned long) _text || vaddr > end) set_pmd(pmd, __pmd(0)); + else if (kaiser_enabled) { + /* + * level2_kernel_pgt is initialized with _PAGE_GLOBAL: + * clear that now. This is not important, so long as + * CR4.PGE remains clear, but it removes an anomaly. + * Physical mapping setup below avoids _PAGE_GLOBAL + * by use of massage_pgprot() inside pfn_pte() etc. + */ + set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL)); + } } } diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index cc0950f756d5..11032dcf9af4 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -16,7 +16,9 @@ #include #include -#ifdef CONFIG_KAISER +int kaiser_enabled __read_mostly = 1; +EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ + __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); @@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) return pte_offset_kernel(pmd, address); } -int kaiser_add_user_map(const void *__start_addr, unsigned long size, - unsigned long flags) +static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + unsigned long flags) { int ret = 0; pte_t *pte; @@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, unsigned long end_addr = PAGE_ALIGN(start_addr + size); unsigned long target_address; + /* + * It is convenient for callers to pass in __PAGE_KERNEL etc, + * and there is no actual harm from setting _PAGE_GLOBAL, so + * long as CR4.PGE is not set. But it is nonetheless troubling + * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser" + * requires that not to be #defined to 0): so mask it off here. + */ + flags &= ~_PAGE_GLOBAL; + for (; address < end_addr; address += PAGE_SIZE) { target_address = get_pa_from_mapping(address); if (target_address == -1) { @@ -263,6 +274,8 @@ void __init kaiser_init(void) { int cpu; + if (!kaiser_enabled) + return; kaiser_init_all_pgds(); for_each_possible_cpu(cpu) { @@ -311,6 +324,8 @@ void __init kaiser_init(void) /* Add a mapping to the shadow mapping, and synchronize the mappings */ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) { + if (!kaiser_enabled) + return 0; return kaiser_add_user_map((const void *)addr, size, flags); } @@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) unsigned long addr, next; pgd_t *pgd; + if (!kaiser_enabled) + return; pgd = native_get_shadow_pgd(pgd_offset_k(start)); for (addr = start; addr < end; pgd++, addr = next) { next = pgd_addr_end(addr, end); @@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp) pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) { + if (!kaiser_enabled) + return pgd; /* * Do we need to also populate the shadow pgd? Check _PAGE_USER to * skip cases like kexec and EFI which make temporary low mappings. @@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(void) X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); } EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); -#endif /* CONFIG_KAISER */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 352fd015bc63..5aaec8effc5f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd) } #else -#ifdef CONFIG_KAISER /* - * Instead of one pmd, we aquire two pmds. Being order-1, it is + * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is * both 8k in size and 8k-aligned. That lets us just flip bit 12 * in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +#define PGD_ALLOCATION_ORDER kaiser_enabled static inline pgd_t *_pgd_alloc(void) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7759b650d9da..e7934aed8d67 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -39,8 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) { unsigned long new_mm_cr3 = __pa(pgdir); -#ifdef CONFIG_KAISER - if (this_cpu_has(X86_FEATURE_PCID)) { + if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { /* * We reuse the same PCID for different tasks, so we must * flush all the entries for the PCID out when we change tasks. @@ -57,7 +56,6 @@ static void load_new_mm_cr3(pgd_t *pgdir) new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; kaiser_flush_tlb_on_return_to_user(); } -#endif /* CONFIG_KAISER */ /* * Caution: many callers of this function expect diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index a39629206864..67c93d9f5ccd 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -197,6 +197,9 @@ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -- GitLab From 50624dd12d6df936f80cb9f02e9a0e40197b5d91 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Jan 2018 14:19:48 +0100 Subject: [PATCH 0771/1398] x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti". Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 +- arch/x86/kernel/cpu/common.c | 18 ------------------ arch/x86/mm/kaiser.c | 20 +++++++++++++++++++- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 887f0eb7943d..f8c9d9168ca3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2763,7 +2763,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nojitter [IA-64] Disables jitter checking for ITC timers. - nokaiser [X86-64] Disable KAISER isolation of kernel from user. + nopti [X86-64] Disable KAISER isolation of kernel from user. no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8b03874396b9..918e44772b04 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -179,20 +179,6 @@ static int __init x86_pcid_setup(char *s) return 1; } __setup("nopcid", x86_pcid_setup); - -static int __init x86_nokaiser_setup(char *s) -{ - /* nokaiser doesn't accept parameters */ - if (s) - return -EINVAL; -#ifdef CONFIG_KAISER - kaiser_enabled = 0; - setup_clear_cpu_cap(X86_FEATURE_KAISER); - pr_info("nokaiser: KAISER feature disabled\n"); -#endif - return 0; -} -early_param("nokaiser", x86_nokaiser_setup); #endif static int __init x86_noinvpcid_setup(char *s) @@ -813,10 +799,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); -#ifdef CONFIG_KAISER - if (kaiser_enabled) - set_cpu_cap(c, X86_FEATURE_KAISER); -#endif } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 11032dcf9af4..87cae72dccb0 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -274,8 +274,13 @@ void __init kaiser_init(void) { int cpu; - if (!kaiser_enabled) + if (!kaiser_enabled) { + setup_clear_cpu_cap(X86_FEATURE_KAISER); return; + } + + setup_force_cpu_cap(X86_FEATURE_KAISER); + kaiser_init_all_pgds(); for_each_possible_cpu(cpu) { @@ -418,3 +423,16 @@ void kaiser_flush_tlb_on_return_to_user(void) X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); } EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); + +static int __init x86_nokaiser_setup(char *s) +{ + /* nopti doesn't accept parameters */ + if (s) + return -EINVAL; + + kaiser_enabled = 0; + pr_info("Kernel/User page tables isolation: disabled\n"); + + return 0; +} +early_param("nopti", x86_nokaiser_setup); -- GitLab From 8018307a45a90ab2eecfd03d48b7efb31707df37 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Jan 2018 14:19:48 +0100 Subject: [PATCH 0772/1398] x86/kaiser: Check boottime cmdline params AMD (and possibly other vendors) are not affected by the leak KAISER is protecting against. Keep the "nopti" for traditional reasons and add pti= like upstream. Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 6 +++ arch/x86/mm/kaiser.c | 59 ++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f8c9d9168ca3..5d2676d043de 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3327,6 +3327,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. pt. [PARIDE] See Documentation/blockdev/paride.txt. + pti= [X86_64] + Control KAISER user/kernel address space isolation: + on - enable + off - disable + auto - default setting + pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in default number. diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 87cae72dccb0..1840aa0a052d 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -15,6 +15,7 @@ #include #include #include +#include int kaiser_enabled __read_mostly = 1; EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ @@ -263,6 +264,43 @@ static void __init kaiser_init_all_pgds(void) WARN_ON(__ret); \ } while (0) +void __init kaiser_check_boottime_disable(void) +{ + bool enable = true; + char arg[5]; + int ret; + + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (!strncmp(arg, "on", 2)) + goto enable; + + if (!strncmp(arg, "off", 3)) + goto disable; + + if (!strncmp(arg, "auto", 4)) + goto skip; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) + goto disable; + +skip: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + goto disable; + +enable: + if (enable) + setup_force_cpu_cap(X86_FEATURE_KAISER); + + return; + +disable: + pr_info("Kernel/User page tables isolation: disabled\n"); + kaiser_enabled = 0; + setup_clear_cpu_cap(X86_FEATURE_KAISER); +} + /* * If anything in here fails, we will likely die on one of the * first kernel->user transitions and init will die. But, we @@ -274,12 +312,10 @@ void __init kaiser_init(void) { int cpu; - if (!kaiser_enabled) { - setup_clear_cpu_cap(X86_FEATURE_KAISER); - return; - } + kaiser_check_boottime_disable(); - setup_force_cpu_cap(X86_FEATURE_KAISER); + if (!kaiser_enabled) + return; kaiser_init_all_pgds(); @@ -423,16 +459,3 @@ void kaiser_flush_tlb_on_return_to_user(void) X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); } EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); - -static int __init x86_nokaiser_setup(char *s) -{ - /* nopti doesn't accept parameters */ - if (s) - return -EINVAL; - - kaiser_enabled = 0; - pr_info("Kernel/User page tables isolation: disabled\n"); - - return 0; -} -early_param("nopti", x86_nokaiser_setup); -- GitLab From 169b369f99affebb24db16f4fed58a2f8ff4060f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 3 Oct 2017 20:49:04 -0700 Subject: [PATCH 0773/1398] kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush Now that we're playing the ALTERNATIVE game, use that more efficient method: instead of user-mapping an extra page, and reading an extra cacheline each time for x86_cr3_pcid_noflush. Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax) is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs; but the one line with $63 in looks clearer, so let's stick with that. Worried about what happens with an ALTERNATIVE between the jump and jump label in another ALTERNATIVE? I was, but have checked the combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64, and it does a good job. Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 7 ++++--- arch/x86/include/asm/kaiser.h | 6 +++--- arch/x86/mm/kaiser.c | 11 +---------- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4e5c41005dd0..f273fed682aa 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1084,7 +1084,8 @@ ENTRY(paranoid_entry) jz 2f orl $2, %ebx andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - orq x86_cr3_pcid_noflush, %rax + /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ + ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID movq %rax, %cr3 2: #endif @@ -1344,7 +1345,7 @@ ENTRY(nmi) /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ - orq x86_cr3_pcid_noflush, %rax + ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax @@ -1588,7 +1589,7 @@ end_repeat_nmi: /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ - orq x86_cr3_pcid_noflush, %rax + ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 96643a9c194c..906150d6094e 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -25,7 +25,8 @@ .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -orq x86_cr3_pcid_noflush, \reg +/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ +ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID movq \reg, %cr3 .endm @@ -39,7 +40,7 @@ movq \reg, %cr3 movq %cr3, \reg orq PER_CPU_VAR(x86_cr3_pcid_user), \reg js 9f -/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ +/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */ movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) 9: movq \reg, %cr3 @@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax */ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -extern unsigned long x86_cr3_pcid_noflush; DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 1840aa0a052d..b8aa9ad59c0f 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -31,7 +31,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); * This is also handy because systems that do not support PCIDs * just end up or'ing a 0 into their CR3, which does no harm. */ -unsigned long x86_cr3_pcid_noflush __read_mostly; DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); /* @@ -356,10 +355,6 @@ void __init kaiser_init(void) kaiser_add_user_map_early(&debug_idt_table, sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL); - - kaiser_add_user_map_early(&x86_cr3_pcid_noflush, - sizeof(x86_cr3_pcid_noflush), - __PAGE_KERNEL); } /* Add a mapping to the shadow mapping, and synchronize the mappings */ @@ -433,18 +428,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) void kaiser_setup_pcid(void) { - unsigned long kern_cr3 = 0; unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; - if (this_cpu_has(X86_FEATURE_PCID)) { - kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; + if (this_cpu_has(X86_FEATURE_PCID)) user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; - } /* * These variables are used by the entry/exit * code to change PCID and pgd and TLB flushing. */ - x86_cr3_pcid_noflush = kern_cr3; this_cpu_write(x86_cr3_pcid_user, user_cr3); } -- GitLab From 8c2f8a5cc15b90674e0144e453544b7bfb1340df Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 29 Oct 2017 11:36:19 -0700 Subject: [PATCH 0774/1398] kaiser: drop is_atomic arg to kaiser_pagetable_walk() I have not observed a might_sleep() warning from setup_fixmap_gdt()'s use of kaiser_add_mapping() in our tree (why not?), but like upstream we have not provided a way for that to pass is_atomic true down to kaiser_pagetable_walk(), and at startup it's far from a likely source of trouble: so just delete the walk's is_atomic arg and might_sleep(). Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index b8aa9ad59c0f..65ac3fddb4f6 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -107,19 +107,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) +static pte_t *kaiser_pagetable_walk(unsigned long address) { pmd_t *pmd; pud_t *pud; pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - if (is_atomic) { - gfp &= ~GFP_KERNEL; - gfp |= __GFP_HIGH | __GFP_ATOMIC; - } else - might_sleep(); - if (pgd_none(*pgd)) { WARN_ONCE(1, "All shadow pgds should have been populated"); return NULL; @@ -194,7 +188,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, ret = -EIO; break; } - pte = kaiser_pagetable_walk(address, false); + pte = kaiser_pagetable_walk(address); if (!pte) { ret = -ENOMEM; break; -- GitLab From b72c26e911c566c78743d7973103925007d103c7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 4 Nov 2017 18:23:24 -0700 Subject: [PATCH 0775/1398] kaiser: asm/tlbflush.h handle noPGE at lower level I found asm/tlbflush.h too twisty, and think it safer not to avoid __native_flush_tlb_global_irq_disabled() in the kaiser_enabled case, but instead let it handle kaiser_enabled along with cr3: it can just use __native_flush_tlb() for that, no harm in re-disabling preemption. (This is not the same change as Kirill and Dave have suggested for upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge check; cr3 is enough for kaiser, and thought to be cheaper than cr4.) Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals() preference from __native_flush_tlb(): unlike the invpcid_flush_all() preference in __native_flush_tlb_global(), it's not seen in upstream 4.14, and was recently reported to be surprisingly slow. Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index bdd69a3158dd..3d863528f942 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -152,14 +152,6 @@ static inline void kaiser_flush_tlb_on_return_to_user(void) static inline void __native_flush_tlb(void) { - if (this_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Note, this works with CR4.PCIDE=0 or 1. - */ - invpcid_flush_all_nonglobals(); - return; - } - /* * If current->mm == NULL then we borrow a mm which may change during a * task switch and therefore we must not be preempted while we write CR3 @@ -183,11 +175,8 @@ static inline void __native_flush_tlb_global_irq_disabled(void) /* restore PGE as it was before */ native_write_cr4(cr4); } else { - /* - * x86_64 microcode update comes this way when CR4.PGE is not - * enabled, and it's safer for all callers to allow this case. - */ - native_write_cr3(native_read_cr3()); + /* do it with cr3, letting kaiser flush user PCID */ + __native_flush_tlb(); } } @@ -195,12 +184,6 @@ static inline void __native_flush_tlb_global(void) { unsigned long flags; - if (kaiser_enabled) { - /* Globals are not used at all */ - __native_flush_tlb(); - return; - } - if (this_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes @@ -256,11 +239,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) - __flush_tlb_global(); - else - __flush_tlb(); - + __flush_tlb_global(); /* * Note: if we somehow had PCID but not PGE, then this wouldn't work -- * we'd end up flushing kernel translations for the current ASID but -- GitLab From fe5cb75fd2dd51746fd391c7f6d18485e6a44f76 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 4 Nov 2017 18:43:06 -0700 Subject: [PATCH 0776/1398] kaiser: kaiser_flush_tlb_on_return_to_user() check PCID Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID check, instead of each caller doing it inline first: nobody needs to optimize for the noPCID case, it's clearer this way, and better suits later changes. Replace those no-op X86_CR3_PCID_KERN_FLUSH lines by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes. Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 4 ++-- arch/x86/mm/kaiser.c | 6 +++--- arch/x86/mm/tlb.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 3d863528f942..5b423d5db40a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -158,7 +158,7 @@ static inline void __native_flush_tlb(void) * back: */ preempt_disable(); - if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) + if (kaiser_enabled) kaiser_flush_tlb_on_return_to_user(); native_write_cr3(native_read_cr3()); preempt_enable(); @@ -217,7 +217,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) */ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { - if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) + if (kaiser_enabled) kaiser_flush_tlb_on_return_to_user(); asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); return; diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 65ac3fddb4f6..8600663ded3e 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -435,12 +435,12 @@ void kaiser_setup_pcid(void) /* * Make a note that this cpu will need to flush USER tlb on return to user. - * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: - * if cpu does not, then the NOFLUSH bit will never have been set. + * If cpu does not have PCID, then the NOFLUSH bit will never have been set. */ void kaiser_flush_tlb_on_return_to_user(void) { - this_cpu_write(x86_cr3_pcid_user, + if (this_cpu_has(X86_FEATURE_PCID)) + this_cpu_write(x86_cr3_pcid_user, X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); } EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e7934aed8d67..41205de487e7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -39,7 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) { unsigned long new_mm_cr3 = __pa(pgdir); - if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { + if (kaiser_enabled) { /* * We reuse the same PCID for different tasks, so we must * flush all the entries for the PCID out when we change tasks. @@ -50,10 +50,10 @@ static void load_new_mm_cr3(pgd_t *pgdir) * do it here, but can only be used if X86_FEATURE_INVPCID is * available - and many machines support pcid without invpcid. * - * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; - * but keep that line in there in case something changes. + * If X86_CR3_PCID_KERN_FLUSH actually added something, then it + * would be needed in the write_cr3() below - if PCIDs enabled. */ - new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; + BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH); kaiser_flush_tlb_on_return_to_user(); } -- GitLab From 1817d2c2fac1b277d97c5a54316e6bfb430d0268 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:30 +0100 Subject: [PATCH 0777/1398] x86/paravirt: Dont patch flush_tlb_single commit a035795499ca1c2bd1928808d1a156eda1420383 upstream native_flush_tlb_single() will be changed with the upcoming PAGE_TABLE_ISOLATION feature. This requires to have more code in there than INVLPG. Remove the paravirt patching for it. Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Reviewed-by: Juergen Gross Acked-by: Peter Zijlstra Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Cc: michael.schwarz@iaik.tugraz.at Cc: moritz.lipp@iaik.tugraz.at Cc: richard.fellner@student.tugraz.at Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/paravirt_patch_64.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index bb3840cedb4f..ee43b36075c7 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); @@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); #if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): -- GitLab From 2c2721754a7f193c98188d07ee335b124ae2df77 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Jan 2018 14:19:49 +0100 Subject: [PATCH 0778/1398] x86/kaiser: Reenable PARAVIRT Now that the required bits have been addressed, reenable PARAVIRT. Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index d8ae933471ad..fd2ceebe126a 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -34,7 +34,7 @@ config SECURITY config KAISER bool "Remove the kernel mapping in user mode" default y - depends on X86_64 && SMP && !PARAVIRT + depends on X86_64 && SMP help This enforces a strict kernel and user space isolation, in order to close hardware side channels on kernel address information. -- GitLab From 402e63de94afdf7cd64e4eb209a8a77310e02d2c Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 2 Jan 2018 14:19:49 +0100 Subject: [PATCH 0779/1398] kaiser: disabled on Xen PV Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3). This does not work with KAISER as the CR3 switch from and to user space PGD would require to map the whole XEN_PV machinery into both. More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV guests use distinct %cr3 values for kernel and user already. Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 8600663ded3e..27688548c3eb 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -263,6 +263,9 @@ void __init kaiser_check_boottime_disable(void) char arg[5]; int ret; + if (boot_cpu_has(X86_FEATURE_XENPV)) + goto silent_disable; + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); if (ret > 0) { if (!strncmp(arg, "on", 2)) @@ -290,6 +293,8 @@ void __init kaiser_check_boottime_disable(void) disable: pr_info("Kernel/User page tables isolation: disabled\n"); + +silent_disable: kaiser_enabled = 0; setup_clear_cpu_cap(X86_FEATURE_KAISER); } -- GitLab From 59094faf3f618b2d2b2a45acb916437d611cede6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 25 Dec 2017 13:57:16 +0100 Subject: [PATCH 0780/1398] x86/kaiser: Move feature detection up ... before the first use of kaiser_enabled as otherwise funky things happen: about to get started... (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000] (XEN) Pagetable walk from ffff88022a449090: (XEN) L4[0x110] = 0000000229e0e067 0000000000001e0e (XEN) L3[0x008] = 0000000000000000 ffffffffffffffff (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08 entry.o#create_bounce_frame+0x135/0x14d (XEN) Domain 0 (vcpu#0) crashed on cpu#0: (XEN) ----[ Xen-4.9.1_02-3.21 x86_64 debug=n Not tainted ]---- (XEN) CPU: 0 (XEN) RIP: e033:[] (XEN) RFLAGS: 0000000000000286 EM: 1 CONTEXT: pv guest (d0v0) Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kaiser.h | 2 ++ arch/x86/kernel/setup.c | 7 +++++++ arch/x86/mm/kaiser.c | 2 -- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 906150d6094e..b5e46aa683f4 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; extern int kaiser_enabled; +extern void __init kaiser_check_boottime_disable(void); #else #define kaiser_enabled 0 +static inline void __init kaiser_check_boottime_disable(void) {} #endif /* CONFIG_KAISER */ /* diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index feaab07fa124..6b55012d02a3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,6 +114,7 @@ #include #include #include +#include /* * max_low_pfn_mapped: highest direct mapped pfn under 4GB @@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); + /* + * This needs to happen right after XENPV is set on xen and + * kaiser_enabled is checked below in cleanup_highmap(). + */ + kaiser_check_boottime_disable(); + x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 27688548c3eb..d43f36912219 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -310,8 +310,6 @@ void __init kaiser_init(void) { int cpu; - kaiser_check_boottime_disable(); - if (!kaiser_enabled) return; -- GitLab From e71fac01727a9495f08de9db5259089bee311766 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jan 2018 10:17:35 -0800 Subject: [PATCH 0781/1398] KPTI: Rename to PAGE_TABLE_ISOLATION This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.h | 2 +- arch/x86/entry/entry_64.S | 12 ++++++------ arch/x86/events/intel/ds.c | 4 ++-- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/kaiser.h | 12 ++++++------ arch/x86/include/asm/pgtable.h | 4 ++-- arch/x86/include/asm/pgtable_64.h | 4 ++-- arch/x86/include/asm/pgtable_types.h | 2 +- arch/x86/include/asm/tlbflush.h | 2 +- arch/x86/kernel/head_64.S | 2 +- arch/x86/mm/Makefile | 2 +- arch/x86/mm/kaslr.c | 2 +- include/linux/kaiser.h | 6 +++--- include/linux/percpu-defs.h | 2 +- security/Kconfig | 2 +- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 16 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 80e2cce110b9..2728e1b7e4a6 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -9,7 +9,7 @@ */ #undef CONFIG_PARAVIRT #undef CONFIG_PARAVIRT_SPINLOCKS -#undef CONFIG_KAISER +#undef CONFIG_PAGE_TABLE_ISOLATION #undef CONFIG_KASAN #include diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f273fed682aa..af4e58132d91 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1071,7 +1071,7 @@ ENTRY(paranoid_entry) SWAPGS xorl %ebx, %ebx 1: -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. @@ -1111,7 +1111,7 @@ ENTRY(paranoid_exit) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG TRACE_IRQS_IRETQ_DEBUG -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ testl $2, %ebx /* SWITCH_USER_CR3 needed? */ jz paranoid_exit_no_switch @@ -1340,7 +1340,7 @@ ENTRY(nmi) movq %rsp, %rdi movq $-1, %rsi -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER @@ -1354,7 +1354,7 @@ ENTRY(nmi) #endif call do_nmi -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* * Unconditionally restore CR3. I know we return to * kernel code that needs user CR3, but do we ever return @@ -1584,7 +1584,7 @@ end_repeat_nmi: 1: movq %rsp, %rdi movq $-1, %rsi -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER @@ -1600,7 +1600,7 @@ end_repeat_nmi: /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ call do_nmi -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* * Unconditionally restore CR3. We might be returning to * kernel code that needs user CR3, like just just before diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 51170f69682d..8e7a3f1df3a5 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -274,7 +274,7 @@ static DEFINE_PER_CPU(void *, insn_buffer); static void *dsalloc(size_t size, gfp_t flags, int node) { -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION unsigned int order = get_order(size); struct page *page; unsigned long addr; @@ -295,7 +295,7 @@ static void *dsalloc(size_t size, gfp_t flags, int node) static void dsfree(const void *buffer, size_t size) { -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION if (!buffer) return; kaiser_remove_mapping((unsigned long)buffer, size); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 20271d6b0eb9..454a37adb823 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -199,7 +199,7 @@ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index b5e46aa683f4..802bbbdfe143 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -20,7 +20,7 @@ #define KAISER_SHADOW_PGD_OFFSET 0x1000 #ifdef __ASSEMBLY__ -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION .macro _SWITCH_TO_KERNEL_CR3 reg movq %cr3, \reg @@ -69,7 +69,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax 8: .endm -#else /* CONFIG_KAISER */ +#else /* CONFIG_PAGE_TABLE_ISOLATION */ .macro SWITCH_KERNEL_CR3 .endm @@ -78,11 +78,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax .macro SWITCH_KERNEL_CR3_NO_STACK .endm -#endif /* CONFIG_KAISER */ +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ #else /* __ASSEMBLY__ */ -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* * Upon kernel/user mode switch, it may happen that the address * space has to be switched before the registers have been @@ -100,10 +100,10 @@ extern void __init kaiser_check_boottime_disable(void); #else #define kaiser_enabled 0 static inline void __init kaiser_check_boottime_disable(void) {} -#endif /* CONFIG_KAISER */ +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ /* - * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, + * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set, * so as to build with tests on kaiser_enabled instead of #ifdefs. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 217e83a657e2..2536f90cd30c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -18,7 +18,7 @@ #ifndef __ASSEMBLY__ #include -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION extern int kaiser_enabled; #else #define kaiser_enabled 0 @@ -920,7 +920,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION if (kaiser_enabled) { /* Clone the shadow pgd part as well */ memcpy(native_get_shadow_pgd(dst), diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index cf68b5c1cb74..ce97c8c6a310 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -106,7 +106,7 @@ static inline void native_pud_clear(pud_t *pud) native_set_pud(pud, native_make_pud(0)); } -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) @@ -127,7 +127,7 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) BUILD_BUG_ON(1); return NULL; } -#endif /* CONFIG_KAISER */ +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f0d9a1aa3f68..f1c8ac468292 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -144,7 +144,7 @@ #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) #define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) -#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64) /* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ #define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5b423d5db40a..94146f665a3c 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -136,7 +136,7 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) * Declare a couple of kaiser interfaces here for convenience, * to avoid the need for asm/kaiser.h in unexpected places. */ -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION extern int kaiser_enabled; extern void kaiser_setup_pcid(void); extern void kaiser_flush_tlb_on_return_to_user(void); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d04479b4a8ee..67cd7c1b99da 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -405,7 +405,7 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not * ever go out to userspace with these, so we do not diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c505569e3835..c548b46100cb 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o -obj-$(CONFIG_KAISER) += kaiser.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 9284ec1e8aa9..319183d93602 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -189,6 +189,6 @@ void __meminit init_trampoline(void) *pud_tramp = *pud; } - /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ + /* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h index 4a4d6d911a14..58c55b1589d0 100644 --- a/include/linux/kaiser.h +++ b/include/linux/kaiser.h @@ -1,7 +1,7 @@ #ifndef _LINUX_KAISER_H #define _LINUX_KAISER_H -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION #include static inline int kaiser_map_thread_stack(void *stack) @@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_stack(void *stack) #else /* - * These stubs are used whenever CONFIG_KAISER is off, which + * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which * includes architectures that support KAISER, but have it disabled. */ @@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_stack(void *stack) { } -#endif /* !CONFIG_KAISER */ +#endif /* !CONFIG_PAGE_TABLE_ISOLATION */ #endif /* _LINUX_KAISER_H */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cfe13cb4ec63..8902f23bb770 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -35,7 +35,7 @@ #endif -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION #define USER_MAPPED_SECTION "..user_mapped" #else #define USER_MAPPED_SECTION "" diff --git a/security/Kconfig b/security/Kconfig index fd2ceebe126a..32f36b40e9f0 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -31,7 +31,7 @@ config SECURITY If you are unsure how to answer this question, answer N. -config KAISER +config PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" default y depends on X86_64 && SMP diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 67c93d9f5ccd..f79669a38c0c 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,7 @@ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -- GitLab From ea6cd39d230f71e27facc0667c1986504e5b0f54 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jan 2018 10:18:01 -0800 Subject: [PATCH 0782/1398] KPTI: Report when enabled Make sure dmesg reports when KPTI is enabled. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index d43f36912219..b6b0f3aa4f77 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -10,6 +10,9 @@ #include #include +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + #include #include /* to verify its kaiser declarations */ #include @@ -292,7 +295,7 @@ void __init kaiser_check_boottime_disable(void) return; disable: - pr_info("Kernel/User page tables isolation: disabled\n"); + pr_info("disabled\n"); silent_disable: kaiser_enabled = 0; @@ -352,6 +355,8 @@ void __init kaiser_init(void) kaiser_add_user_map_early(&debug_idt_table, sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL); + + pr_info("enabled\n"); } /* Add a mapping to the shadow mapping, and synchronize the mappings */ -- GitLab From 92fd81f772673ee51381337f07b1da94187de542 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 4 Jan 2018 13:41:55 -0800 Subject: [PATCH 0783/1398] kaiser: Set _PAGE_NX only if supported This resolves a crash if loaded under qemu + haxm under windows. See https://www.spinics.net/lists/kernel/msg2689835.html for details. Here is a boot log (the log is from chromeos-4.4, but Tao Wu says that the same log is also seen with vanilla v4.4.110-rc1). [ 0.712750] Freeing unused kernel memory: 552K [ 0.721821] init: Corrupted page table at address 57b029b332e0 [ 0.722761] PGD 80000000bb238067 PUD bc36a067 PMD bc369067 PTE 45d2067 [ 0.722761] Bad pagetable: 000b [#1] PREEMPT SMP [ 0.722761] Modules linked in: [ 0.722761] CPU: 1 PID: 1 Comm: init Not tainted 4.4.96 #31 [ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 [ 0.722761] task: ffff8800bc290000 ti: ffff8800bc28c000 task.ti: ffff8800bc28c000 [ 0.722761] RIP: 0010:[] [] __clear_user+0x42/0x67 [ 0.722761] RSP: 0000:ffff8800bc28fcf8 EFLAGS: 00010202 [ 0.722761] RAX: 0000000000000000 RBX: 00000000000001a4 RCX: 00000000000001a4 [ 0.722761] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000057b029b332e0 [ 0.722761] RBP: ffff8800bc28fd08 R08: ffff8800bc290000 R09: ffff8800bb2f4000 [ 0.722761] R10: ffff8800bc290000 R11: ffff8800bb2f4000 R12: 000057b029b332e0 [ 0.722761] R13: 0000000000000000 R14: 000057b029b33340 R15: ffff8800bb1e2a00 [ 0.722761] FS: 0000000000000000(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 [ 0.722761] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 0.722761] CR2: 000057b029b332e0 CR3: 00000000bb2f8000 CR4: 00000000000006e0 [ 0.722761] Stack: [ 0.722761] 000057b029b332e0 ffff8800bb95fa80 ffff8800bc28fd18 ffffffff83f4120c [ 0.722761] ffff8800bc28fe18 ffffffff83e9e7a1 ffff8800bc28fd68 0000000000000000 [ 0.722761] ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 [ 0.722761] Call Trace: [ 0.722761] [] clear_user+0x2e/0x30 [ 0.722761] [] load_elf_binary+0xa7f/0x18f7 [ 0.722761] [] search_binary_handler+0x86/0x19c [ 0.722761] [] do_execveat_common.isra.26+0x909/0xf98 [ 0.722761] [] ? rest_init+0x87/0x87 [ 0.722761] [] do_execve+0x23/0x25 [ 0.722761] [] run_init_process+0x2b/0x2d [ 0.722761] [] kernel_init+0x6d/0xda [ 0.722761] [] ret_from_fork+0x3f/0x70 [ 0.722761] [] ? rest_init+0x87/0x87 [ 0.722761] Code: 86 84 be 12 00 00 00 e8 87 0d e8 ff 66 66 90 48 89 d8 48 c1 eb 03 4c 89 e7 83 e0 07 48 89 d9 be 08 00 00 00 31 d2 48 85 c9 74 0a <48> 89 17 48 01 f7 ff c9 75 f6 48 89 c1 85 c9 74 09 88 17 48 ff [ 0.722761] RIP [] __clear_user+0x42/0x67 [ 0.722761] RSP [ 0.722761] ---[ end trace def703879b4ff090 ]--- [ 0.722761] BUG: sleeping function called from invalid context at /mnt/host/source/src/third_party/kernel/v4.4/kernel/locking/rwsem.c:21 [ 0.722761] in_atomic(): 0, irqs_disabled(): 1, pid: 1, name: init [ 0.722761] CPU: 1 PID: 1 Comm: init Tainted: G D 4.4.96 #31 [ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 [ 0.722761] 0000000000000086 dcb5d76098c89836 ffff8800bc28fa30 ffffffff83f34004 [ 0.722761] ffffffff84839dc2 0000000000000015 ffff8800bc28fa40 ffffffff83d57dc9 [ 0.722761] ffff8800bc28fa68 ffffffff83d57e6a ffffffff84a53640 0000000000000000 [ 0.722761] Call Trace: [ 0.722761] [] dump_stack+0x4d/0x63 [ 0.722761] [] ___might_sleep+0x13a/0x13c [ 0.722761] [] __might_sleep+0x9f/0xa6 [ 0.722761] [] down_read+0x20/0x31 [ 0.722761] [] __blocking_notifier_call_chain+0x35/0x63 [ 0.722761] [] blocking_notifier_call_chain+0x14/0x16 [ 0.800374] usb 1-1: new full-speed USB device number 2 using uhci_hcd [ 0.722761] [] profile_task_exit+0x1a/0x1c [ 0.802309] [] do_exit+0x39/0xe7f [ 0.802309] [] ? vprintk_default+0x1d/0x1f [ 0.802309] [] ? printk+0x57/0x73 [ 0.802309] [] oops_end+0x80/0x85 [ 0.802309] [] pgtable_bad+0x8a/0x95 [ 0.802309] [] __do_page_fault+0x8c/0x352 [ 0.802309] [] ? file_has_perm+0xc4/0xe5 [ 0.802309] [] do_page_fault+0xc/0xe [ 0.802309] [] page_fault+0x22/0x30 [ 0.802309] [] ? __clear_user+0x42/0x67 [ 0.802309] [] ? __clear_user+0x23/0x67 [ 0.802309] [] clear_user+0x2e/0x30 [ 0.802309] [] load_elf_binary+0xa7f/0x18f7 [ 0.802309] [] search_binary_handler+0x86/0x19c [ 0.802309] [] do_execveat_common.isra.26+0x909/0xf98 [ 0.802309] [] ? rest_init+0x87/0x87 [ 0.802309] [] do_execve+0x23/0x25 [ 0.802309] [] run_init_process+0x2b/0x2d [ 0.802309] [] kernel_init+0x6d/0xda [ 0.802309] [] ret_from_fork+0x3f/0x70 [ 0.802309] [] ? rest_init+0x87/0x87 [ 0.830559] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 [ 0.830559] [ 0.831305] Kernel Offset: 0x2c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 0.831305] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 The crash part of this problem may be solved with the following patch (thanks to Hugh for the hint). There is still another problem, though - with this patch applied, the qemu session aborts with "VCPU Shutdown request", whatever that means. Cc: lepton Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index b6b0f3aa4f77..d8376b4ad9f0 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -413,7 +413,8 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) * get out to userspace running on the kernel CR3, * userspace will crash instead of running. */ - pgd.pgd |= _PAGE_NX; + if (__supported_pte_mask & _PAGE_NX) + pgd.pgd |= _PAGE_NX; } } else if (!pgd.pgd) { /* -- GitLab From 9f74755895f9b080f79384edb600a18433788adc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 5 Jan 2018 15:46:36 +0100 Subject: [PATCH 0784/1398] Linux 4.9.75 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 075e429732e7..acbc1b032db2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 74 +SUBLEVEL = 75 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 17d3592068c2cd88e9c27ec7e5e0c3c8882c0008 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 19 Jul 2017 17:24:49 +0100 Subject: [PATCH 0785/1398] UPSTREAM: arm64: factor out entry stack manipulation In subsequent patches, we will detect stack overflow in our exception entry code, by verifying the SP after it has been decremented to make space for the exception regs. This verification code is small, and we can minimize its impact by placing it directly in the vectors. To avoid redundant modification of the SP, we also need to move the initial decrement of the SP into the vectors. As a preparatory step, this patch introduces kernel_ventry, which performs this decrement, and updates the entry code accordingly. Subsequent patches will fold SP verification into kernel_ventry. There should be no functional change as a result of this patch. Signed-off-by: Ard Biesheuvel [Mark: turn into prep patch, expand commit msg] Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Catalin Marinas Cc: James Morse (cherry picked from commit b11e5759bfac0c474d95ec4780b1566350e64cad) Signed-off-by: Todd Poynor Change-Id: I54428c1e60c9e0ed288bfda835e3b57d45f72999 Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 47 ++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c44a93387908..8cd2d830213f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -70,8 +70,13 @@ #define BAD_FIQ 2 #define BAD_ERROR 3 - .macro kernel_entry, el, regsize = 64 + .macro kernel_ventry label + .align 7 sub sp, sp, #S_FRAME_SIZE + b \label + .endm + + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif @@ -316,31 +321,31 @@ tsk .req x28 // current thread_info .align 11 ENTRY(vectors) - ventry el1_sync_invalid // Synchronous EL1t - ventry el1_irq_invalid // IRQ EL1t - ventry el1_fiq_invalid // FIQ EL1t - ventry el1_error_invalid // Error EL1t + kernel_ventry el1_sync_invalid // Synchronous EL1t + kernel_ventry el1_irq_invalid // IRQ EL1t + kernel_ventry el1_fiq_invalid // FIQ EL1t + kernel_ventry el1_error_invalid // Error EL1t - ventry el1_sync // Synchronous EL1h - ventry el1_irq // IRQ EL1h - ventry el1_fiq_invalid // FIQ EL1h - ventry el1_error_invalid // Error EL1h + kernel_ventry el1_sync // Synchronous EL1h + kernel_ventry el1_irq // IRQ EL1h + kernel_ventry el1_fiq_invalid // FIQ EL1h + kernel_ventry el1_error_invalid // Error EL1h - ventry el0_sync // Synchronous 64-bit EL0 - ventry el0_irq // IRQ 64-bit EL0 - ventry el0_fiq_invalid // FIQ 64-bit EL0 - ventry el0_error_invalid // Error 64-bit EL0 + kernel_ventry el0_sync // Synchronous 64-bit EL0 + kernel_ventry el0_irq // IRQ 64-bit EL0 + kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0 + kernel_ventry el0_error_invalid // Error 64-bit EL0 #ifdef CONFIG_COMPAT - ventry el0_sync_compat // Synchronous 32-bit EL0 - ventry el0_irq_compat // IRQ 32-bit EL0 - ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 - ventry el0_error_invalid_compat // Error 32-bit EL0 + kernel_ventry el0_sync_compat // Synchronous 32-bit EL0 + kernel_ventry el0_irq_compat // IRQ 32-bit EL0 + kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 + kernel_ventry el0_error_invalid_compat // Error 32-bit EL0 #else - ventry el0_sync_invalid // Synchronous 32-bit EL0 - ventry el0_irq_invalid // IRQ 32-bit EL0 - ventry el0_fiq_invalid // FIQ 32-bit EL0 - ventry el0_error_invalid // Error 32-bit EL0 + kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0 + kernel_ventry el0_irq_invalid // IRQ 32-bit EL0 + kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0 + kernel_ventry el0_error_invalid // Error 32-bit EL0 #endif END(vectors) -- GitLab From 19d3691ff6cb8465ed21f3d12dac253e4c4f0d6c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 12:56:18 +0100 Subject: [PATCH 0786/1398] UPSTREAM: arm64: mm: Use non-global mappings for kernel space In preparation for unmapping the kernel whilst running in userspace, make the kernel mappings non-global so we can avoid expensive TLB invalidation on kernel exit to userspace. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit e046eb0c9bf26d94be9e4592c00c7a78b0fa9bfd) Change-Id: Ic267c24fe8b3bc03b96933949418555de989529a [toddpoynor@google.com: fixup PAGE_NONE conflict] Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kernel-pgtable.h | 12 ++++++++++-- arch/arm64/include/asm/pgtable-prot.h | 21 +++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 7803343e5881..77a27af01371 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -78,8 +78,16 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG) +#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG) +#else +#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS +#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS +#endif #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2142c7726e76..84b5283d2e7f 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -34,8 +34,16 @@ #include -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG) +#else +#define PROT_DEFAULT _PROT_DEFAULT +#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -48,6 +56,7 @@ #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) @@ -55,15 +64,15 @@ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) -#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) -#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) -#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) +#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) +#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) +#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -- GitLab From dee3b246017c98b2ef932d2732fb0da03c4bd9d8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:04:48 +0100 Subject: [PATCH 0787/1398] UPSTREAM: arm64: mm: Temporarily disable ARM64_SW_TTBR0_PAN We're about to rework the way ASIDs are allocated, switch_mm is implemented and low-level kernel entry/exit is handled, so keep the ARM64_SW_TTBR0_PAN code out of the way whilst we do the heavy lifting. It will be re-enabled in a subsequent patch. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 376133b7edc20f237a42e4c72415cc9e8c0a9704) Signed-off-by: Todd Poynor Change-Id: Iac0d096cd8eda5bc2d3cd17d7d2492d1838ad8ed Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fbf07a6bd8a4..55578aeccff7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -802,6 +802,7 @@ endif config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + depends on BROKEN # Temporary while switch_mm is reworked help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved -- GitLab From a72dd8a6766560ac64577a5465d2749a5dea0b90 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:19:09 +0100 Subject: [PATCH 0788/1398] UPSTREAM: arm64: mm: Move ASID from TTBR0 to TTBR1 In preparation for mapping kernelspace and userspace with different ASIDs, move the ASID to TTBR1 and update switch_mm to context-switch TTBR0 via an invalid mapping (the zero page). Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 7655abb953860485940d4de74fb45a8192149bb6) [toddpoynor@google.com: add missing mrs inst from context] Change-Id: I9575ae2f0b3b5383c44f7ace0ac50588be739e45 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu_context.h | 7 +++++++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/include/asm/proc-fns.h | 6 ------ arch/arm64/mm/proc.S | 9 ++++++--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 63e9982daca1..3c2ff5183d48 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -51,6 +51,13 @@ static inline void cpu_set_reserved_ttbr0(void) isb(); } +static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) +{ + BUG_ON(pgd == swapper_pg_dir); + cpu_set_reserved_ttbr0(); + cpu_do_switch_mm(virt_to_phys(pgd),mm); +} + /* * TCR.T0SZ value to use when the ID map is active. Usually equals * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index eb0c2bd90de9..8df4cb6ac6f7 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -272,6 +272,7 @@ #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) +#define TCR_A1 (UL(1) << 22) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 14ad6e4e87d1..16cef2e8449e 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include -#define cpu_switch_mm(pgd,mm) \ -do { \ - BUG_ON(pgd == swapper_pg_dir); \ - cpu_do_switch_mm(virt_to_phys(pgd),mm); \ -} while (0) - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2adb0cb952a..76ef30afbb22 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -132,9 +132,12 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) + mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id - bfi x0, x1, #48, #16 // set the ASID - msr ttbr0_el1, x0 // set TTBR0 + bfi x2, x1, #48, #16 // set the ASID + msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) + isb + msr ttbr0_el1, x0 // now update TTBR0 isb post_ttbr0_update_workaround ret @@ -218,7 +221,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1 tcr_set_idmap_t0sz x10, x9 /* -- GitLab From 071a49fc82b450c9689b4acb469e7ee22a65d87c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:34:30 +0100 Subject: [PATCH 0789/1398] UPSTREAM: arm64: mm: Rename post_ttbr0_update_workaround The post_ttbr0_update_workaround hook applies to any change to TTBRx_EL1. Since we're using TTBR1 for the ASID, rename the hook to make it clearer as to what it's doing. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 158d495899ce55db453f682a8ac8390d5a426578) [toddpoynor@google.com: fixup context conflict in comments] Change-Id: Ia0f82eee78ad442d1773245b99c7534f0ea066e4 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 4 ++-- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/proc.S | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7c4218ea6ff5..eb9fad288b04 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -430,9 +430,9 @@ alternative_endif .endm /* - * Errata workaround post TTBR0_EL1 update. + * Errata workaround post TTBRx_EL1 update. */ - .macro post_ttbr0_update_workaround + .macro post_ttbr_update_workaround #ifdef CONFIG_CAVIUM_ERRATUM_27456 alternative_if ARM64_WORKAROUND_CAVIUM_27456 ic iallu diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 8cd2d830213f..a6619ccda110 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -205,7 +205,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr0_update_workaround + post_ttbr_update_workaround .endif 1: .if \el != 0 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 76ef30afbb22..c1e868a036b5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -139,7 +139,7 @@ ENTRY(cpu_do_switch_mm) isb msr ttbr0_el1, x0 // now update TTBR0 isb - post_ttbr0_update_workaround + post_ttbr_update_workaround ret ENDPROC(cpu_do_switch_mm) -- GitLab From 599c71f8be4a9c7093e8283dce7ff73eb057bb86 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:58:16 +0100 Subject: [PATCH 0790/1398] UPSTREAM: arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN With the ASID now installed in TTBR1, we can re-enable ARM64_SW_TTBR0_PAN by ensuring that we switch to a reserved ASID of zero when disabling user access and restore the active user ASID on the uaccess enable path. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 27a921e75711d924617269e0ba4adb8bae9fd0d1) [toddpoynor@google.com: fixup context, move asm-uaccess.h changes to uaccess.h] Change-Id: Ie8e1706152b8d7c068c6b1f20241a10669a69ca1 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/uaccess.h | 46 +++++++++++++++++++++++--------- arch/arm64/kernel/entry.S | 4 +-- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/xen/hypercall.S | 2 +- 9 files changed, 42 insertions(+), 21 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55578aeccff7..fbf07a6bd8a4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -802,7 +802,6 @@ endif config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" - depends on BROKEN # Temporary while switch_mm is reworked help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 31b6940ec2f2..8e265b2c8ce7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -132,15 +132,19 @@ static inline void __uaccess_ttbr0_disable(void) { unsigned long ttbr; + ttbr = read_sysreg(ttbr1_el1); /* reserved_ttbr0 placed at the end of swapper_pg_dir */ - ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; - write_sysreg(ttbr, ttbr0_el1); + write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); + isb(); + /* Set reserved ASID */ + ttbr &= ~(0xffffUL << 48); + write_sysreg(ttbr, ttbr1_el1); isb(); } static inline void __uaccess_ttbr0_enable(void) { - unsigned long flags; + unsigned long flags, ttbr0, ttbr1; /* * Disable interrupts to avoid preemption between reading the 'ttbr0' @@ -148,7 +152,16 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + ttbr0 = current_thread_info()->ttbr0; + + /* Restore active ASID */ + ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 |= ttbr0 & (0xffffUL << 48); + write_sysreg(ttbr1, ttbr1_el1); + isb(); + + /* Restore user page table */ + write_sysreg(ttbr0, ttbr0_el1); isb(); local_irq_restore(flags); } @@ -438,11 +451,20 @@ extern __must_check long strnlen_user(const char __user *str, long n); add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb + sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE + bic \tmp1, \tmp1, #(0xffff << 48) + msr ttbr1_el1, \tmp1 // set reserved ASID + isb .endm - .macro __uaccess_ttbr0_enable, tmp1 + .macro __uaccess_ttbr0_enable, tmp1, tmp2 get_thread_info \tmp1 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + mrs \tmp2, ttbr1_el1 + extr \tmp2, \tmp2, \tmp1, #48 + ror \tmp2, \tmp2, #16 + msr ttbr1_el1, \tmp2 // set the active ASID + isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm @@ -453,18 +475,18 @@ alternative_if_not ARM64_HAS_PAN alternative_else_nop_endif .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 + save_and_disable_irq \tmp3 // avoid preemption + __uaccess_ttbr0_enable \tmp1, \tmp2 + restore_irq \tmp3 alternative_else_nop_endif .endm #else .macro uaccess_ttbr0_disable, tmp1 .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 .endm #endif @@ -478,8 +500,8 @@ alternative_if ARM64_ALT_PAN_NOT_UAO alternative_else_nop_endif .endm - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 + .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3 + uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(0) alternative_else_nop_endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a6619ccda110..b610b161cd62 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -132,7 +132,7 @@ alternative_if ARM64_HAS_PAN alternative_else_nop_endif .if \el != 0 - mrs x21, ttbr0_el1 + mrs x21, ttbr1_el1 tst x21, #0xffff << 48 // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled @@ -196,7 +196,7 @@ alternative_else_nop_endif tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set .endif - __uaccess_ttbr0_enable x0 + __uaccess_ttbr0_enable x0, x1 .if \el == 0 /* diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index d7150e30438a..dd65ca253eb4 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -30,7 +30,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) - uaccess_enable_not_uao x2, x3 + uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index cfe13396085b..7e7e687c17ac 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -64,7 +64,7 @@ end .req x5 ENTRY(__arch_copy_from_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 718b1c4e2f85..074d52fcd75b 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -65,7 +65,7 @@ end .req x5 ENTRY(__copy_in_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index e99e31c9acac..67118444cde0 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -63,7 +63,7 @@ end .req x5 ENTRY(__arch_copy_to_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index da9576932322..74404536d704 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -49,7 +49,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) - uaccess_ttbr0_enable x2, x3 + uaccess_ttbr0_enable x2, x3, x4 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index b41aff25426d..f5422520cb01 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -100,7 +100,7 @@ ENTRY(privcmd_call) * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation * is enabled (it implies that hardware UAO and PAN disabled). */ - uaccess_ttbr0_enable x6, x7 + uaccess_ttbr0_enable x6, x7, x8 hvc XEN_IMM /* -- GitLab From 5914b11611d5f6a185ceda0ae38b4cf1e00df0d4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 14:10:28 +0100 Subject: [PATCH 0791/1398] UPSTREAM: arm64: mm: Allocate ASIDs in pairs In preparation for separate kernel/user ASIDs, allocate them in pairs for each mm_struct. The bottom bit distinguishes the two: if it is set, then the ASID will map only userspace. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 0c8ea531b7740754cf374ca8b7510655f569c5e3) [toddpoynor@google.com: fixup context] Change-Id: I9879f45cd51cdf13814e3cff18653a76c893f386 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/mm/context.c | 25 +++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 8d9fce037b2f..edd9ff732528 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -16,6 +16,9 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H + +#define USER_ASID_FLAG (UL(1) << 48) + typedef struct { atomic64_t id; void *vdso; diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 4c63cb154859..05a885c14c83 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#define NUM_USER_ASIDS ASID_FIRST_VERSION + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) +#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) +#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) +#else +#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define asid2idx(asid) ((asid) & ~ASID_MASK) +#define idx2asid(idx) asid2idx(idx) +#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -104,7 +113,7 @@ static void flush_context(unsigned int cpu) */ if (asid == 0) asid = per_cpu(reserved_asids, i); - __set_bit(asid & ~ASID_MASK, asid_map); + __set_bit(asid2idx(asid), asid_map); per_cpu(reserved_asids, i) = asid; } @@ -159,16 +168,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) * We had a valid ASID in a previous life, so try to re-use * it if possible. */ - asid &= ~ASID_MASK; - if (!__test_and_set_bit(asid, asid_map)) + if (!__test_and_set_bit(asid2idx(asid), asid_map)) return newasid; } /* * Allocate a free ASID. If we can't find one, take a note of the - * currently active ASIDs and mark the TLBs as requiring flushes. - * We always count from ASID #1, as we use ASID #0 when setting a - * reserved TTBR0 for the init_mm. + * currently active ASIDs and mark the TLBs as requiring flushes. We + * always count from ASID #2 (index 1), as we use ASID #0 when setting + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd + * pairs. */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); if (asid != NUM_USER_ASIDS) @@ -185,7 +194,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) set_asid: __set_bit(asid, asid_map); cur_idx = asid; - return asid | generation; + return idx2asid(asid) | generation; } void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) -- GitLab From d6ca455d0a006ba52890f2a2f3c3931e946b92e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 13:58:08 +0000 Subject: [PATCH 0792/1398] UPSTREAM: arm64: mm: Add arm64_kernel_unmapped_at_el0 helper In order for code such as TLB invalidation to operate efficiently when the decision to map the kernel at EL0 is determined at runtime, this patch introduces a helper function, arm64_kernel_unmapped_at_el0, to determine whether or not the kernel is mapped whilst running in userspace. Currently, this just reports the value of CONFIG_UNMAP_KERNEL_AT_EL0, but will later be hooked up to a fake CPU capability using a static key. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit fc0e1299da548b32440051f58f08e0c1eb7edd0b) Change-Id: I2d34c2d7b88f82d848d7063efdae7c3cc4fe8fd6 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index edd9ff732528..478e6bc6b4e8 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -19,6 +19,8 @@ #define USER_ASID_FLAG (UL(1) << 48) +#ifndef __ASSEMBLY__ + typedef struct { atomic64_t id; void *vdso; @@ -31,6 +33,11 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) +static inline bool arm64_kernel_unmapped_at_el0(void) +{ + return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); +} + extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); @@ -40,4 +47,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot, bool allow_block_mappings); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +#endif /* !__ASSEMBLY__ */ #endif -- GitLab From 2053d3c3b488013670965d24166395897c83e5a7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 14:13:33 +0100 Subject: [PATCH 0793/1398] UPSTREAM: arm64: mm: Invalidate both kernel and user ASIDs when performing TLBI Since an mm has both a kernel and a user ASID, we need to ensure that broadcast TLB maintenance targets both address spaces so that things like CoW continue to work with the uaccess primitives in the kernel. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 9b0de864b5bc298ea53005ad812f3386f81aee9c) Change-Id: I8b50e223fc7de6f11388b8dfac705fa618b7335a Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/tlbflush.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index deab52374119..ad6bd8b26ada 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -23,6 +23,7 @@ #include #include +#include /* * Raw TLBI operations. @@ -42,6 +43,11 @@ #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) +#define __tlbi_user(op, arg) do { \ + if (arm64_kernel_unmapped_at_el0()) \ + __tlbi(op, (arg) | USER_ASID_FLAG); \ +} while (0) + /* * TLB Management * ============== @@ -103,6 +109,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) dsb(ishst); __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); dsb(ish); } @@ -113,6 +120,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ishst); __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); dsb(ish); } @@ -139,10 +147,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { - if (last_level) + if (last_level) { __tlbi(vale1is, addr); - else + __tlbi_user(vale1is, addr); + } else { __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); + } } dsb(ish); } @@ -182,6 +193,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); dsb(ish); } -- GitLab From a329b068c97b34ef3045c0c6b0aa5ca9563b68ef Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:07:40 +0000 Subject: [PATCH 0794/1398] UPSTREAM: arm64: entry: Add exception trampoline page for exceptions from EL0 To allow unmapping of the kernel whilst running at EL0, we need to point the exception vectors at an entry trampoline that can map/unmap the kernel on entry/exit respectively. This patch adds the trampoline page, although it is not yet plugged into the vector table and is therefore unused. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit c7b9adaf85f818d747eeff5145eb4095ccd587fb) [toddpoynor@google.com: fixup context] Change-Id: I6c6b5eb93a69f26b97628726eccf91df739d03af Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 85 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 17 +++++++ 2 files changed, 102 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b610b161cd62..82da81e21aca 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -865,6 +866,90 @@ __ni_sys_trace: .popsection // .entry.text +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + */ + .pushsection ".entry.tramp.text", "ax" + + .macro tramp_map_kernel, tmp + mrs \tmp, ttbr1_el1 + sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + bic \tmp, \tmp, #USER_ASID_FLAG + msr ttbr1_el1, \tmp + .endm + + .macro tramp_unmap_kernel, tmp + mrs \tmp, ttbr1_el1 + add \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + orr \tmp, \tmp, #USER_ASID_FLAG + msr ttbr1_el1, \tmp + /* + * We avoid running the post_ttbr_update_workaround here because the + * user and kernel ASIDs don't have conflicting mappings, so any + * "blessing" as described in: + * + * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com + * + * will not hurt correctness. Whilst this may partially defeat the + * point of using split ASIDs in the first place, it avoids + * the hit of invalidating the entire I-cache on every return to + * userspace. + */ + .endm + + .macro tramp_ventry, regsize = 64 + .align 7 +1: + .if \regsize == 64 + msr tpidrro_el0, x30 // Restored in kernel_ventry + .endif + tramp_map_kernel x30 + ldr x30, =vectors + prfm plil1strm, [x30, #(1b - tramp_vectors)] + msr vbar_el1, x30 + add x30, x30, #(1b - tramp_vectors) + isb + br x30 + .endm + + .macro tramp_exit, regsize = 64 + adr x30, tramp_vectors + msr vbar_el1, x30 + tramp_unmap_kernel x30 + .if \regsize == 64 + mrs x30, far_el1 + .endif + eret + .endm + + .align 11 +ENTRY(tramp_vectors) + .space 0x400 + + tramp_ventry + tramp_ventry + tramp_ventry + tramp_ventry + + tramp_ventry 32 + tramp_ventry 32 + tramp_ventry 32 + tramp_ventry 32 +END(tramp_vectors) + +ENTRY(tramp_exit_native) + tramp_exit +END(tramp_exit_native) + +ENTRY(tramp_exit_compat) + tramp_exit 32 +END(tramp_exit_compat) + + .ltorg + .popsection // .entry.tramp.text +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b8deffa9e1bf..66c853e4a0c6 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -56,6 +56,17 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_TEXT \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + *(.entry.tramp.text) \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__entry_tramp_text_end) = .; +#else +#define TRAMP_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -128,6 +139,7 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + TRAMP_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -221,6 +233,11 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + _end = .; STABS_DEBUG -- GitLab From 5e54c4b6a795285d314fe0bbab42f06414b9c523 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:14:17 +0000 Subject: [PATCH 0795/1398] UPSTREAM: arm64: mm: Map entry trampoline into trampoline and kernel page tables The exception entry trampoline needs to be mapped at the same virtual address in both the trampoline page table (which maps nothing else) and also the kernel page table, so that we can swizzle TTBR1_EL1 on exceptions from and return to EL0. This patch maps the trampoline at a fixed virtual address in the fixmap area of the kernel virtual address space, which allows the kernel proper to be randomized with respect to the trampoline when KASLR is enabled. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 51a0048beb449682d632d0af52a515adb9f9882e) [toddpoynor@google.com: fixup context, remove rodata_enabled check] Change-Id: I920ad6850e4e9b03edea4a84d8603302c28eef37 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/fixmap.h | 5 +++++ arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/asm-offsets.c | 6 +++++- arch/arm64/mm/mmu.c | 23 +++++++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index caf86be815ba..7b1d88c18143 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -51,6 +51,11 @@ enum fixed_addresses { FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT, +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1af9f3c5cdf6..7bcd8b96a4b3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -692,6 +692,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 433f03ec8e9b..b6c9389c160a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -147,11 +148,14 @@ int main(void) DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); - BLANK(); DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); + BLANK(); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + DEFINE(TRAMP_VALIAS, TRAMP_VALIAS); +#endif return 0; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 33ecaffbfc23..29945bc2e3f1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -420,6 +420,29 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, vm_area_add_early(vma); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ + extern char __entry_tramp_text_start[]; + + pgprot_t prot = PAGE_KERNEL_EXEC; + phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + + /* The trampoline is always mapped and can therefore be global */ + pgprot_val(prot) &= ~PTE_NG; + + /* Map only the text into the trampoline page table */ + memset(tramp_pg_dir, 0, PGD_SIZE); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, + prot, pgd_pgtable_alloc, 0); + + /* ...as well as the kernel page table */ + __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + return 0; +} +core_initcall(map_entry_trampoline); +#endif + /* * Create fine-grained mappings for the kernel. */ -- GitLab From 8fdbffb119158a8dcf0492e5873bba844744de7e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:20:21 +0000 Subject: [PATCH 0796/1398] UPSTREAM: arm64: entry: Explicitly pass exception level to kernel_ventry macro We will need to treat exceptions from EL0 differently in kernel_ventry, so rework the macro to take the exception level as an argument and construct the branch target using that. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 5b1f7fe41909cde40decad9f0e8ee585777a0538) [toddpoynor@google.com: fixup context, error -> error_invalid] Change-Id: I5dfbce460f7617e33298a1ece09159572d55fb3a Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 44 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 82da81e21aca..ac474a70db85 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -71,10 +71,10 @@ #define BAD_FIQ 2 #define BAD_ERROR 3 - .macro kernel_ventry label + .macro kernel_ventry, el, label, regsize = 64 .align 7 sub sp, sp, #S_FRAME_SIZE - b \label + b el\()\el\()_\label .endm .macro kernel_entry, el, regsize = 64 @@ -322,31 +322,31 @@ tsk .req x28 // current thread_info .align 11 ENTRY(vectors) - kernel_ventry el1_sync_invalid // Synchronous EL1t - kernel_ventry el1_irq_invalid // IRQ EL1t - kernel_ventry el1_fiq_invalid // FIQ EL1t - kernel_ventry el1_error_invalid // Error EL1t + kernel_ventry 1, sync_invalid // Synchronous EL1t + kernel_ventry 1, irq_invalid // IRQ EL1t + kernel_ventry 1, fiq_invalid // FIQ EL1t + kernel_ventry 1, error_invalid // Error EL1t - kernel_ventry el1_sync // Synchronous EL1h - kernel_ventry el1_irq // IRQ EL1h - kernel_ventry el1_fiq_invalid // FIQ EL1h - kernel_ventry el1_error_invalid // Error EL1h + kernel_ventry 1, sync // Synchronous EL1h + kernel_ventry 1, irq // IRQ EL1h + kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, error_invalid // Error EL1h - kernel_ventry el0_sync // Synchronous 64-bit EL0 - kernel_ventry el0_irq // IRQ 64-bit EL0 - kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0 - kernel_ventry el0_error_invalid // Error 64-bit EL0 + kernel_ventry 0, sync // Synchronous 64-bit EL0 + kernel_ventry 0, irq // IRQ 64-bit EL0 + kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, error_invalid // Error 64-bit EL0 #ifdef CONFIG_COMPAT - kernel_ventry el0_sync_compat // Synchronous 32-bit EL0 - kernel_ventry el0_irq_compat // IRQ 32-bit EL0 - kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 - kernel_ventry el0_error_invalid_compat // Error 32-bit EL0 + kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 + kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 + kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, error_invalid_compat, 32 // Error 32-bit EL0 #else - kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0 - kernel_ventry el0_irq_invalid // IRQ 32-bit EL0 - kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0 - kernel_ventry el0_error_invalid // Error 32-bit EL0 + kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 + kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0 + kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0 + kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0 #endif END(vectors) -- GitLab From c27a2258342344ac5048e6e60e48fb969fee1ed4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:24:29 +0000 Subject: [PATCH 0797/1398] UPSTREAM: arm64: entry: Hook up entry trampoline to exception vectors Hook up the entry trampoline to our exception vectors so that all exceptions from and returns to EL0 go via the trampoline, which swizzles the vector base register accordingly. Transitioning to and from the kernel clobbers x30, so we use tpidrro_el0 and far_el1 as scratch registers for native tasks. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 4bf3286d29f3a88425d8d8cd53428cbb8f865f04) Change-Id: I92fe4484fdfcf09e772f5d7ddfa29bfa4eb065f7 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ac474a70db85..a22c78ae57a9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -73,10 +73,26 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + .if \el == 0 + .if \regsize == 64 + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr + .else + mov x30, xzr + .endif + .endif +#endif + sub sp, sp, #S_FRAME_SIZE b el\()\el\()_\label .endm + .macro tramp_alias, dst, sym + mov_q \dst, TRAMP_VALIAS + add \dst, \dst, #(\sym - .entry.tramp.text) + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -218,18 +234,20 @@ alternative_else_nop_endif .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 + tst x22, #PSR_MODE32_BIT // native task? + b.eq 3f + #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 - tbz x22, #4, 1f #ifdef CONFIG_PID_IN_CONTEXTIDR mrs x29, contextidr_el1 msr contextidr_el1, x29 #else msr contextidr_el1, xzr #endif -1: alternative_else_nop_endif #endif +3: .endif msr elr_el1, x21 // set up the return data @@ -251,7 +269,22 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp - eret // return to kernel + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 + eret +#else + .if \el == 0 + bne 4f + msr far_el1, x30 + tramp_alias x30, tramp_exit_native + br x30 +4: + tramp_alias x30, tramp_exit_compat + br x30 + .else + eret + .endif +#endif .endm .macro irq_stack_entry -- GitLab From 04b77fe9ae8da2aab099ae51c8835c700f9d5970 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:29:19 +0000 Subject: [PATCH 0798/1398] UPSTREAM: arm64: erratum: Work around Falkor erratum #E1003 in trampoline code We rely on an atomic swizzling of TTBR1 when transitioning from the entry trampoline to the kernel proper on an exception. We can't rely on this atomicity in the face of Falkor erratum #E1003, so on affected cores we can issue a TLB invalidation to invalidate the walk cache prior to jumping into the kernel. There is still the possibility of a TLB conflict here due to conflicting walk cache entries prior to the invalidation, but this doesn't appear to be the case on these CPUs in practice. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git commit d1777e686ad10ba7c594304429c6045fb79255a1) [ghackmann@google.com: replace runtime alternative_if with a compile-time check for Code Aurora's out-of-tree CONFIG_ARCH_MSM8996. Kryo needs this workaround too, and 4.4 doesn't have any of the upstream Falkor errata infrastructure needed to detect this at boot time.] Signed-off-by: Greg Hackmann Signed-off-by: Todd Poynor Change-Id: Iaf244a364b22d386b54368f88b73e39d295de49f Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a22c78ae57a9..79ea90b33682 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -910,6 +910,16 @@ __ni_sys_trace: sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp +#ifdef CONFIG_ARCH_MSM8996 + /* ASID already in \tmp[63:48] */ + movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12) + movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12) + /* 2MB boundary containing the vectors, so we nobble the walk cache */ + movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12) + isb + tlbi vae1, \tmp + dsb nsh +#endif /* CONFIG_ARCH_MSM8996 */ .endm .macro tramp_unmap_kernel, tmp -- GitLab From da1016a5635158113253b3dc420c25d289193c1d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:33:28 +0000 Subject: [PATCH 0799/1398] UPSTREAM: arm64: tls: Avoid unconditional zeroing of tpidrro_el0 for native tasks When unmapping the kernel at EL0, we use tpidrro_el0 as a scratch register during exception entry from native tasks and subsequently zero it in the kernel_ventry macro. We can therefore avoid zeroing tpidrro_el0 in the context-switch path for native tasks using the entry trampoline. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 18011eac28c7cb31c87b86b7d0e5b01894405c7f) [toddpoynor@google.com: fixup context] Change-Id: I4a0ded2e339b0c75521bc3b5a7fbbb3e60aa8774 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/process.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 65ea76255831..7e365ffe1370 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -372,17 +372,17 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, static void tls_thread_switch(struct task_struct *next) { - unsigned long tpidr, tpidrro; + unsigned long tpidr; tpidr = read_sysreg(tpidr_el0); *task_user_tls(current) = tpidr; - tpidr = *task_user_tls(next); - tpidrro = is_compat_thread(task_thread_info(next)) ? - next->thread.tp_value : 0; + if (is_compat_thread(task_thread_info(next))) + write_sysreg(next->thread.tp_value, tpidrro_el0); + else if (!arm64_kernel_unmapped_at_el0()) + write_sysreg(0, tpidrro_el0); - write_sysreg(tpidr, tpidr_el0); - write_sysreg(tpidrro, tpidrro_el0); + write_sysreg(*task_user_tls(next), tpidr_el0); } /* Restore the UAO state depending on next's addr_limit */ -- GitLab From f79ff2d82888428c2b0f7dbdedce553313156fae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:38:19 +0000 Subject: [PATCH 0800/1398] UPSTREAM: arm64: entry: Add fake CPU feature for unmapping the kernel at EL0 Allow explicit disabling of the entry trampoline on the kernel command line (kpti=off) by adding a fake CPU feature (ARM64_UNMAP_KERNEL_AT_EL0) that can be used to toggle the alternative sequences in our entry code and avoid use of the trampoline altogether if desired. This also allows us to make use of a static key in arm64_kernel_unmapped_at_el0(). Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit ea1e3de85e94d711f63437c04624aa0e8de5c8b3) [toddpoynor@google.com: fixup context, cpus_have_const_cap -> cpus_have_cap] Change-Id: I72936d608b0d4c0dd9725eced7674b95e4abcf2d Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpucaps.h | 4 +++- arch/arm64/include/asm/mmu.h | 3 ++- arch/arm64/kernel/cpufeature.c | 41 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 9 +++---- 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 87b446535185..4a4a98a4b1bd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -35,6 +35,8 @@ #define ARM64_HYP_OFFSET_LOW 14 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 -#define ARM64_NCAPS 16 +#define ARM64_UNMAP_KERNEL_AT_EL0 23 + +#define ARM64_NCAPS 24 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 478e6bc6b4e8..78b79dfc2905 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -35,7 +35,8 @@ typedef struct { static inline bool arm64_kernel_unmapped_at_el0(void) { - return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); + return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && + cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0); } extern void paging_init(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0127e1bb5a7c..bfbd9e90fa04 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -748,6 +748,40 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ + +static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + /* Forced on command line? */ + if (__kpti_forced) { + pr_info_once("kernel page table isolation forced %s by command line option\n", + __kpti_forced > 0 ? "ON" : "OFF"); + return __kpti_forced > 0; + } + + /* Useful for KASLR robustness */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return true; + + return false; +} + +static int __init parse_kpti(char *str) +{ + bool enabled; + int ret = strtobool(str, &enabled); + + if (ret) + return ret; + + __kpti_forced = enabled ? 1 : -1; + return 0; +} +__setup("kpti=", parse_kpti); +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -831,6 +865,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .def_scope = SCOPE_SYSTEM, .matches = hyp_offset_low, }, +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + { + .capability = ARM64_UNMAP_KERNEL_AT_EL0, + .def_scope = SCOPE_SYSTEM, + .matches = unmap_kernel_at_el0, + }, +#endif {}, }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 79ea90b33682..6e9ce0fb147b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -74,6 +74,7 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +alternative_if ARM64_UNMAP_KERNEL_AT_EL0 .if \el == 0 .if \regsize == 64 mrs x30, tpidrro_el0 @@ -82,6 +83,7 @@ mov x30, xzr .endif .endif +alternative_else_nop_endif #endif sub sp, sp, #S_FRAME_SIZE @@ -270,10 +272,9 @@ alternative_else_nop_endif ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp -#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 - eret -#else .if \el == 0 +alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 4f msr far_el1, x30 tramp_alias x30, tramp_exit_native @@ -281,10 +282,10 @@ alternative_else_nop_endif 4: tramp_alias x30, tramp_exit_compat br x30 +#endif .else eret .endif -#endif .endm .macro irq_stack_entry -- GitLab From 16f712b456b11f40a9b6931b585aa22e11df5057 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:41:01 +0000 Subject: [PATCH 0801/1398] UPSTREAM: arm64: Kconfig: Add CONFIG_UNMAP_KERNEL_AT_EL0 Add a Kconfig entry to control use of the entry trampoline, which allows us to unmap the kernel whilst running in userspace and improve the robustness of KASLR. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 084eb77cd3a81134d02500977dc0ecc9277dc97d) Change-Id: I70a996e448fa17007a3b8564c794e53e3992d7bf Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fbf07a6bd8a4..161fced75e7d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -733,6 +733,19 @@ config FORCE_MAX_ZONEORDER However for 4K, we choose a higher default value, 11 as opposed to 10, giving us 4M allocations matching the default size used by generic code. +config UNMAP_KERNEL_AT_EL0 + bool "Unmap kernel when running in userspace (aka \"KAISER\")" + default y + help + Some attacks against KASLR make use of the timing difference between + a permission fault which could arise from a page table entry that is + present in the TLB, and a translation fault which always requires a + page table walk. This option defends against these attacks by unmapping + the kernel whilst running in userspace, therefore forcing translation + faults for all of kernel space. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT -- GitLab From d7013ede260486a55b254e1f95127a2b0f24cdc1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 1 Dec 2017 17:33:48 +0000 Subject: [PATCH 0802/1398] UPSTREAM: arm64: mm: Introduce TTBR_ASID_MASK for getting at the ASID in the TTBR There are now a handful of open-coded masks to extract the ASID from a TTBR value, so introduce a TTBR_ASID_MASK and use that instead. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit b519538dfefc2f8478a1bcb458459c861d431784) [toddpoynor@google.com: fixup context, asm-uaccess.h changes to uaccess.h] Change-Id: Ia456c58c83f9bbd02177cefb8e3106dfffe357cc Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu.h | 1 + arch/arm64/include/asm/uaccess.h | 7 ++++--- arch/arm64/kernel/entry.S | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 78b79dfc2905..d7750fca1259 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -18,6 +18,7 @@ #define USER_ASID_FLAG (UL(1) << 48) +#define TTBR_ASID_MASK (UL(0xffff) << 48) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8e265b2c8ce7..c8af98bf9961 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -20,6 +20,7 @@ #include #include +#include #include #ifndef __ASSEMBLY__ @@ -137,7 +138,7 @@ static inline void __uaccess_ttbr0_disable(void) write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); isb(); /* Set reserved ASID */ - ttbr &= ~(0xffffUL << 48); + ttbr &= ~TTBR_ASID_MASK; write_sysreg(ttbr, ttbr1_el1); isb(); } @@ -156,7 +157,7 @@ static inline void __uaccess_ttbr0_enable(void) /* Restore active ASID */ ttbr1 = read_sysreg(ttbr1_el1); - ttbr1 |= ttbr0 & (0xffffUL << 48); + ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); isb(); @@ -452,7 +453,7 @@ extern __must_check long strnlen_user(const char __user *str, long n); msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE - bic \tmp1, \tmp1, #(0xffff << 48) + bic \tmp1, \tmp1, #TTBR_ASID_MASK msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6e9ce0fb147b..c97454e542f4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -152,7 +152,7 @@ alternative_else_nop_endif .if \el != 0 mrs x21, ttbr1_el1 - tst x21, #0xffff << 48 // Check for the reserved ASID + tst x21, #TTBR_ASID_MASK // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR -- GitLab From 06fe41f85237c9d3c915447b9f9d51f1e85c4891 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Dec 2017 11:24:02 +0000 Subject: [PATCH 0803/1398] UPSTREAM: arm64: kaslr: Put kernel vectors address in separate data page The literal pool entry for identifying the vectors base is the only piece of information in the trampoline page that identifies the true location of the kernel. This patch moves it into a page-aligned region of the .rodata section and maps this adjacent to the trampoline text via an additional fixmap entry, which protects against any accidental leakage of the trampoline contents. Suggested-by: Ard Biesheuvel Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon (cherry picked from commit 6c27c4082f4f70b9f41df4d0adf51128b40351df) Change-Id: Id125331e7fa5645c801a26843fecca53f8773705 Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/fixmap.h | 1 + arch/arm64/kernel/entry.S | 14 ++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 5 ++++- arch/arm64/mm/mmu.c | 10 +++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 7b1d88c18143..d8e58051f32d 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -53,6 +53,7 @@ enum fixed_addresses { FIX_TEXT_POKE0, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_DATA, FIX_ENTRY_TRAMP_TEXT, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c97454e542f4..fe22ca8ae750 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -949,7 +949,13 @@ __ni_sys_trace: msr tpidrro_el0, x30 // Restored in kernel_ventry .endif tramp_map_kernel x30 +#ifdef CONFIG_RANDOMIZE_BASE + adr x30, tramp_vectors + PAGE_SIZE +alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 + ldr x30, [x30] +#else ldr x30, =vectors +#endif prfm plil1strm, [x30, #(1b - tramp_vectors)] msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) @@ -992,6 +998,14 @@ END(tramp_exit_compat) .ltorg .popsection // .entry.tramp.text +#ifdef CONFIG_RANDOMIZE_BASE + .pushsection ".rodata", "a" + .align PAGE_SHIFT + .globl __entry_tramp_data_start +__entry_tramp_data_start: + .quad vectors + .popsection // .rodata +#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 66c853e4a0c6..34d3ed64fe8e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -257,7 +257,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif - +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, + "Entry trampoline text too big") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 29945bc2e3f1..31d16d8257e2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -436,8 +436,16 @@ static int __init map_entry_trampoline(void) __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, prot, pgd_pgtable_alloc, 0); - /* ...as well as the kernel page table */ + /* Map both the text and data into the kernel page table */ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern char __entry_tramp_data_start[]; + + __set_fixmap(FIX_ENTRY_TRAMP_DATA, + __pa_symbol(__entry_tramp_data_start), + PAGE_KERNEL_RO); + } + return 0; } core_initcall(map_entry_trampoline); -- GitLab From 753593688bfab89b6cbf085f62fec44012a6820d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 16:15:59 +0000 Subject: [PATCH 0804/1398] arm64: use RET instruction for exiting the trampoline Speculation attacks against the entry trampoline can potentially resteer the speculative instruction stream through the indirect branch and into arbitrary gadgets within the kernel. This patch defends against these attacks by forcing a misprediction through the return stack: a dummy BL instruction loads an entry into the stack, so that the predicted program flow of the subsequent RET instruction is to a branch-to-self instruction which is finally resolved as a branch to the kernel vectors with speculation suppressed. Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index fe22ca8ae750..c1ff0999045e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -948,6 +948,9 @@ __ni_sys_trace: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + bl 2f + b . +2: tramp_map_kernel x30 #ifdef CONFIG_RANDOMIZE_BASE adr x30, tramp_vectors + PAGE_SIZE @@ -960,7 +963,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) isb - br x30 + ret .endm .macro tramp_exit, regsize = 64 -- GitLab From 5f5e5d4041e31dfe9605713248a7a66754d224c7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 16:19:39 +0000 Subject: [PATCH 0805/1398] arm64: Kconfig: Reword UNMAP_KERNEL_AT_EL0 kconfig entry Although CONFIG_UNMAP_KERNEL_AT_EL0 does make KASLR more robust, it's actually more useful as a mitigation against speculation attacks that can leak arbitrary kernel data to userspace through speculation. Reword the Kconfig help message to reflect this, and make the option depend on EXPERT so that it is on by default for the majority of users. Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 161fced75e7d..5702e7d0f5e0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -734,15 +734,14 @@ config FORCE_MAX_ZONEORDER 4M allocations matching the default size used by generic code. config UNMAP_KERNEL_AT_EL0 - bool "Unmap kernel when running in userspace (aka \"KAISER\")" + bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT default y help - Some attacks against KASLR make use of the timing difference between - a permission fault which could arise from a page table entry that is - present in the TLB, and a translation fault which always requires a - page table walk. This option defends against these attacks by unmapping - the kernel whilst running in userspace, therefore forcing translation - faults for all of kernel space. + Speculation attacks against some high-performance processors can + be used to bypass MMU permission checks and leak kernel data to + userspace. This can be defended against by unmapping the kernel + when running in userspace, mapping it back in on exception entry + via a trampoline page in the vector table. If unsure, say Y. -- GitLab From b54d99aa12b4d02a66bf976fb85b6f0ed9a5a485 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Mon, 8 Aug 2016 16:20:15 -0700 Subject: [PATCH 0806/1398] ANDROID: usb: f_fs: Prevent gadget unbind if it is already unbound Upon usb composition switch there is possibility of ep0 file release happening after gadget driver bind. In case of composition switch from adb to a non-adb composition gadget will never gets bound again resulting into failure of usb device enumeration. Fix this issue by checking FFS_FL_BOUND flag and avoid extra gadget driver unbind if it is already done as part of composition switch. Change-Id: I1638001ff4a94f08224b188aa42425f3d732fa2b Signed-off-by: Hemant Kumar --- drivers/usb/gadget/function/f_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7b107e43b1c4..d90bf57ba30e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3725,7 +3725,8 @@ static void ffs_closed(struct ffs_data *ffs) ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; ffs_dev_unlock(); - unregister_gadget_item(ci); + if (test_bit(FFS_FL_BOUND, &ffs->flags)) + unregister_gadget_item(ci); return; done: ffs_dev_unlock(); -- GitLab From b328e524c85af8e72a34ae8fbef9bdc363d6f84a Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Mon, 8 Jan 2018 12:22:41 -0800 Subject: [PATCH 0807/1398] ANDROID: arm64: kaslr: fixup Falkor workaround for 4.9 android-4.9 port of upstream commit 6c27c4082f4f70b9f41df4d0adf51128b40351df "arm64: kaslr: Put kernel vectors address in separate data page" failed to fixup code that does not omcpile when CONFIG_RANDOMIZE_BASE=y. Port fixup for earlier kernels from ghackmann@google.com to android-4.9: - replace ARM64_WORKAROUND_QCOM_FALKOR_E1003 alternative with compile-time CONFIG_ARCH_MSM8996 check] Change-Id: Idc4bb255db830029093ea0d948a9e5066e2c13f8 Signed-off-by: Greg Hackmann Signed-off-by: Todd Poynor --- arch/arm64/kernel/entry.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c1ff0999045e..3448a6250ac2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -954,7 +954,9 @@ __ni_sys_trace: tramp_map_kernel x30 #ifdef CONFIG_RANDOMIZE_BASE adr x30, tramp_vectors + PAGE_SIZE -alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 +#ifndef CONFIG_ARCH_MSM8996 + isb +#endif ldr x30, [x30] #else ldr x30, =vectors -- GitLab From 14dbf60f44f6cd44aa851287e006a0489e7f42c0 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 8 Jan 2018 13:57:36 -0800 Subject: [PATCH 0808/1398] ANDROID: sdcardfs: Fix missing break on default_normal Signed-off-by: Daniel Rosenberg Bug: 64672411 Change-Id: I98796df95dc9846adb77a11f49a1a254fb1618b1 --- fs/sdcardfs/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 3d1023a7ff7c..d5239b21cb8a 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -127,6 +127,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, break; case Opt_default_normal: vfsopts->default_normal = true; + break; /* unknown option */ default: if (!silent) -- GitLab From 31f9a0a478a7427273bc3e2b4fff84b49d99b9fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 1 Feb 2017 11:48:58 +0000 Subject: [PATCH 0809/1398] BACKPORT: arm64: Add CNTVCT_EL0 trap handler Since people seem to make a point in breaking the userspace visible counter, we have no choice but to trap the access. Add the required handler. Acked-by: Thomas Gleixner Acked-by: Mark Rutland Signed-off-by: Marc Zyngier (cherry picked from commit 6126ce0588eb5a0752d5c8b5796a7fca324fd887) Change-Id: I05700a533ea59e864e7a072605e3000e29526db3 Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/esr.h | 2 ++ arch/arm64/kernel/traps.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d14c478976d0..ad42e79a5d4d 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -175,6 +175,8 @@ #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index dc1c9fc2d9a2..b043f865d4c8 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -495,6 +495,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + if (rt != 31) + regs->regs[rt] = arch_counter_get_cntvct(); + regs->pc += 4; +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -513,6 +522,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ, .handler = ctr_read_handler, }, + { + /* Trap read access to CNTVCT_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT, + .handler = cntvct_read_handler, + }, {}, }; -- GitLab From e4f4f43c20b34121c9022e761d689a62eef054cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 24 Apr 2017 09:04:03 +0100 Subject: [PATCH 0810/1398] BACKPORT: arm64: Add CNTFRQ_EL0 trap handler We now trap accesses to CNTVCT_EL0 when the counter is broken enough to require the kernel to mediate the access. But it turns out that some existing userspace (such as OpenMPI) do probe for the counter frequency, leading to an UNDEF exception as CNTVCT_EL0 and CNTFRQ_EL0 share the same control bit. The fix is to handle the exception the same way we do for CNTVCT_EL0. Fixes: a86bd139f2ae ("arm64: arch_timer: Enable CNTVCT_EL0 trap if workaround is enabled") Reported-by: Hanjun Guo Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 9842119a238bfb92cbab63258dabb54f0e7b111b) Change-Id: I40401111d1e2ba52a9512536ff20c68dcef26c54 Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/esr.h | 4 ++++ arch/arm64/kernel/traps.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ad42e79a5d4d..85997c0e5443 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -177,6 +177,10 @@ #define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b043f865d4c8..f4aad0176fa3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -504,6 +504,15 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + if (rt != 31) + regs->regs[rt] = read_sysreg(cntfrq_el0); + regs->pc += 4; +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -528,6 +537,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT, .handler = cntvct_read_handler, }, + { + /* Trap read access to CNTFRQ_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, + .handler = cntfrq_read_handler, + }, {}, }; -- GitLab From e16a20b1b9121c93a9c21b272a81672aad4e100b Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 4 Oct 2017 09:31:34 -0700 Subject: [PATCH 0811/1398] arm64: issue isb when trapping CNTVCT_EL0 access Change-Id: I6005a6e944494257bfc2243fde2f7a09c3fd76c6 Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f4aad0176fa3..5fd787ad3777 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -499,6 +500,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + isb(); if (rt != 31) regs->regs[rt] = arch_counter_get_cntvct(); regs->pc += 4; -- GitLab From 8910fa508811788010712e1cc9dd3de499b3406b Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 19 Sep 2017 10:55:17 -0700 Subject: [PATCH 0812/1398] clocksource: arch_timer: make virtual counter access configurable Change-Id: Ibdb1fd768b748002b90bfc165612c12c8311f8a2 Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/Kconfig | 8 ++++++++ drivers/clocksource/arm_arch_timer.c | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index e2c6e43cf8ca..0bed22ff57a8 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -305,6 +305,14 @@ config ARM_ARCH_TIMER_EVTSTREAM This must be disabled for hardware validation purposes to detect any hardware anomalies of missing events. +config ARM_ARCH_TIMER_VCT_ACCESS + bool "Support for ARM architected timer virtual counter access in userspace" + default !ARM64 + depends on ARM_ARCH_TIMER + help + This option enables support for reading the ARM architected timer's + virtual counter in userspace. + config FSL_ERRATUM_A008585 bool "Workaround for Freescale/NXP Erratum A-008585" default y diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a2503db7e533..e3bc592b1055 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -449,7 +449,10 @@ static void arch_counter_set_user_access(void) | ARCH_TIMER_USR_PCT_ACCESS_EN); /* Enable user access to the virtual counter */ - cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_VCT_ACCESS)) + cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + else + cntkctl &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; arch_timer_set_cntkctl(cntkctl); } -- GitLab From 790080ce0e3288c2d289d0771aa63814949c6da5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jan 2018 16:17:49 -0800 Subject: [PATCH 0813/1398] kernel/acct.c: fix the acct->needcheck check in check_free_space() commit 4d9570158b6260f449e317a5f9ed030c2504a615 upstream. As Tsukada explains, the time_is_before_jiffies(acct->needcheck) check is very wrong, we need time_is_after_jiffies() to make sys_acct() work. Ignoring the overflows, the code should "goto out" if needcheck > jiffies, while currently it checks "needcheck < jiffies" and thus in the likely case check_free_space() does nothing until jiffies overflow. In particular this means that sys_acct() is simply broken, acct_on() sets acct->needcheck = jiffies and expects that check_free_space() should set acct->active = 1 after the free-space check, but this won't happen if jiffies increments in between. This was broken by commit 32dc73086015 ("get rid of timer in kern/acct.c") in 2011, then another (correct) commit 795a2f22a8ea ("acct() should honour the limits from the very beginning") made the problem more visible. Link: http://lkml.kernel.org/r/20171213133940.GA6554@redhat.com Fixes: 32dc73086015 ("get rid of timer in kern/acct.c") Reported-by: TSUKADA Koutaro Suggested-by: TSUKADA Koutaro Signed-off-by: Oleg Nesterov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/acct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/acct.c b/kernel/acct.c index 74963d192c5d..37f1dc696fbd 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -99,7 +99,7 @@ static int check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_before_jiffies(acct->needcheck)) + if (time_is_after_jiffies(acct->needcheck)) goto out; /* May block */ -- GitLab From f6db86f31b47a630134689fe1e425b78e6626c22 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 19 Dec 2017 19:09:07 +0100 Subject: [PATCH 0814/1398] crypto: n2 - cure use after free commit 203f45003a3d03eea8fa28d74cfc74c354416fdb upstream. queue_cache_init is first called for the Control Word Queue (n2_crypto_probe). At that time, queue_cache[0] is NULL and a new kmem_cache will be allocated. If the subsequent n2_register_algs call fails, the kmem_cache will be released in queue_cache_destroy, but queue_cache_init[0] is not set back to NULL. So when the Module Arithmetic Unit gets probed next (n2_mau_probe), queue_cache_init will not allocate a kmem_cache again, but leave it as its bogus value, causing a BUG() to trigger when queue_cache[0] is eventually passed to kmem_cache_zalloc: n2_crypto: Found N2CP at /virtual-devices@100/n2cp@7 n2_crypto: Registered NCS HVAPI version 2.0 called queue_cache_init n2_crypto: md5 alg registration failed n2cp f028687c: /virtual-devices@100/n2cp@7: Unable to register algorithms. called queue_cache_destroy n2cp: probe of f028687c failed with error -22 n2_crypto: Found NCP at /virtual-devices@100/ncp@6 n2_crypto: Registered NCS HVAPI version 2.0 called queue_cache_init kernel BUG at mm/slab.c:2993! Call Trace: [0000000000604488] kmem_cache_alloc+0x1a8/0x1e0 (inlined) kmem_cache_zalloc (inlined) new_queue (inlined) spu_queue_setup (inlined) handle_exec_unit [0000000010c61eb4] spu_mdesc_scan+0x1f4/0x460 [n2_crypto] [0000000010c62b80] n2_mau_probe+0x100/0x220 [n2_crypto] [000000000084b174] platform_drv_probe+0x34/0xc0 Signed-off-by: Jan Engelhardt Acked-by: David S. Miller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/n2_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index c5aac25a5738..b365ad78ac27 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1620,6 +1620,7 @@ static int queue_cache_init(void) CWQ_ENTRY_SIZE, 0, NULL); if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; return -ENOMEM; } return 0; @@ -1629,6 +1630,8 @@ static void queue_cache_destroy(void) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; + queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL; } static int spu_queue_register(struct spu_queue *p, unsigned long q_type) -- GitLab From 868f50b95dbef609e73b48acd89c8631dfa0215d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Dec 2017 12:15:17 -0800 Subject: [PATCH 0815/1398] crypto: chacha20poly1305 - validate the digest size commit e57121d08c38dabec15cf3e1e2ad46721af30cae upstream. If the rfc7539 template was instantiated with a hash algorithm with digest size larger than 16 bytes (POLY1305_DIGEST_SIZE), then the digest overran the 'tag' buffer in 'struct chachapoly_req_ctx', corrupting the subsequent memory, including 'cryptlen'. This caused a crash during crypto_skcipher_decrypt(). Fix it by, when instantiating the template, requiring that the underlying hash algorithm has the digest size expected for Poly1305. Reproducer: #include #include #include int main() { int algfd, reqfd; struct sockaddr_alg addr = { .salg_type = "aead", .salg_name = "rfc7539(chacha20,sha256)", }; unsigned char buf[32] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (void *)&addr, sizeof(addr)); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, buf, sizeof(buf)); reqfd = accept(algfd, 0, 0); write(reqfd, buf, 16); read(reqfd, buf, 16); } Reported-by: syzbot Fixes: 71ebc4d1b27d ("crypto: chacha20poly1305 - Add a ChaCha20-Poly1305 AEAD construction, RFC7539") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/chacha20poly1305.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index e899ef51dc8e..cb1c3a3287b0 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); + poly_hash = __crypto_hash_alg_common(poly); + + err = -EINVAL; + if (poly_hash->digestsize != POLY1305_DIGEST_SIZE) + goto out_put_poly; err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; - poly_hash = __crypto_hash_alg_common(poly); err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, aead_crypto_instance(inst)); if (err) -- GitLab From c195a4c0230d7a1dafb53cdbd5eed4dea4493504 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Dec 2017 14:28:25 -0800 Subject: [PATCH 0816/1398] crypto: pcrypt - fix freeing pcrypt instances commit d76c68109f37cb85b243a1cf0f40313afd2bae68 upstream. pcrypt is using the old way of freeing instances, where the ->free() method specified in the 'struct crypto_template' is passed a pointer to the 'struct crypto_instance'. But the crypto_instance is being kfree()'d directly, which is incorrect because the memory was actually allocated as an aead_instance, which contains the crypto_instance at a nonzero offset. Thus, the wrong pointer was being kfree()'d. Fix it by switching to the new way to free aead_instance's where the ->free() method is specified in the aead_instance itself. Reported-by: syzbot Fixes: 0496f56065e0 ("crypto: pcrypt - Add support for new AEAD interface") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/pcrypt.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ee9cfb99fe25..f8ec3d4ba4a8 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->child); } +static void pcrypt_free(struct aead_instance *inst) +{ + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); + kfree(inst); +} + static int pcrypt_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.encrypt = pcrypt_aead_encrypt; inst->alg.decrypt = pcrypt_aead_decrypt; + inst->free = pcrypt_free; + err = aead_register_instance(tmpl, inst); if (err) goto out_drop_aead; @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) return -EINVAL; } -static void pcrypt_free(struct crypto_instance *inst) -{ - struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - - crypto_drop_aead(&ctx->spawn); - kfree(inst); -} - static int pcrypt_cpumask_change_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt) static struct crypto_template pcrypt_tmpl = { .name = "pcrypt", .create = pcrypt_create, - .free = pcrypt_free, .module = THIS_MODULE, }; -- GitLab From 34fa2eede095c84cbf7f636b0c21f8dacf1686aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Mon, 27 Nov 2017 20:05:34 +0100 Subject: [PATCH 0817/1398] sunxi-rsb: Include OF based modalias in device uevent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e2bf801ecd4e62222a46d1ba9e57e710171d29c1 upstream. Include the OF-based modalias in the uevent sent when registering devices on the sunxi RSB bus, so that user space has a chance to autoload the kernel module for the device. Fixes a regression caused by commit 3f241bfa60bd ("arm64: allwinner: a64: pine64: Use dcdc1 regulator for mmc0"). When the axp20x-rsb module for the AXP803 PMIC is built as a module, it is not loaded and the system ends up with an disfunctional MMC controller. Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus") Acked-by: Chen-Yu Tsai Signed-off-by: Stefan Brüns Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/bus/sunxi-rsb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 795c9d9c96a6..2051d926e303 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = { .match = sunxi_rsb_device_match, .probe = sunxi_rsb_device_probe, .remove = sunxi_rsb_device_remove, + .uevent = of_device_uevent_modalias, }; static void sunxi_rsb_dev_release(struct device *dev) -- GitLab From 2b9b2002e05d1aa7d8c930d09d4342a6abacd2dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: [PATCH 0818/1398] fscache: Fix the default for fscache_maybe_release_page() commit 98801506552593c9b8ac11021b0cdad12cab4f6b upstream. Fix the default for fscache_maybe_release_page() for when the cookie isn't valid or the page isn't cached. It mustn't return false as that indicates the page cannot yet be freed. The problem with the default is that if, say, there's no cache, but a network filesystem's pages are using up almost all the available memory, a system can OOM because the filesystem ->releasepage() op will not allow them to be released as fscache_maybe_release_page() incorrectly prevents it. This can be tested by writing a sequence of 512MiB files to an AFS mount. It does not affect NFS or CIFS because both of those wrap the call in a check of PG_fscache and it shouldn't bother Ceph as that only has PG_private set whilst writeback is in progress. This might be an issue for 9P, however. Note that the pages aren't entirely stuck. Removing a file or unmounting will clear things because that uses ->invalidatepage() instead. Fixes: 201a15428bd5 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeff Layton Acked-by: Al Viro Tested-by: Marc Dionne Signed-off-by: Greg Kroah-Hartman --- include/linux/fscache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 115bb81912cc..94a8aae8f9e2 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -764,7 +764,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** -- GitLab From 3a381abc5b703c0f10c2792e97ec805674e9197e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Nov 2016 12:30:37 -0700 Subject: [PATCH 0819/1398] nbd: fix use-after-free of rq/bio in the xmit path commit 429a787be6793554ee02aacc7e1f11ebcecc4453 upstream. For writes, we can get a completion in while we're still iterating the request and bio chain. If that happens, we're reading freed memory and we can crash. Break out after the last segment and avoid having the iterator read freed memory. Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7d506cb73e54..4d30da269060 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -272,6 +272,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); + struct bio *bio; u32 type; if (req->cmd_type == REQ_TYPE_DRV_PRIV) @@ -305,16 +306,20 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) return -EIO; } - if (type == NBD_CMD_WRITE) { - struct req_iterator iter; + if (type != NBD_CMD_WRITE) + return 0; + + flags = 0; + bio = req->bio; + while (bio) { + struct bio *next = bio->bi_next; + struct bvec_iter iter; struct bio_vec bvec; - /* - * we are really probing at internals to determine - * whether to set MSG_MORE or not... - */ - rq_for_each_segment(bvec, req, iter) { - flags = 0; - if (!rq_iter_last(bvec, iter)) + + bio_for_each_segment(bvec, bio, iter) { + bool is_last = !next && bio_iter_last(bvec, iter); + + if (is_last) flags = MSG_MORE; dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", cmd, bvec.bv_len); @@ -325,7 +330,16 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) result); return -EIO; } + /* + * The completion might already have come in, + * so break for the last one instead of letting + * the iterator do it. This prevents use-after-free + * of the bio. + */ + if (is_last) + break; } + bio = next; } return 0; } -- GitLab From 79258d9834803518a80b0ed0603c790638f0478b Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Thu, 14 Dec 2017 15:33:12 -0800 Subject: [PATCH 0820/1398] kernel: make groups_sort calling a responsibility group_info allocators commit bdcf0a423ea1c40bbb40e7ee483b50fc8aa3d758 upstream. In testing, we found that nfsd threads may call set_groups in parallel for the same entry cached in auth.unix.gid, racing in the call of groups_sort, corrupting the groups for that entry and leading to permission denials for the client. This patch: - Make groups_sort globally visible. - Move the call to groups_sort to the modifiers of group_info - Remove the call to groups_sort from set_groups Link: http://lkml.kernel.org/r/20171211151420.18655-1-thiago.becker@gmail.com Signed-off-by: Thiago Rafael Becker Reviewed-by: Matthew Wilcox Reviewed-by: NeilBrown Acked-by: "J. Bruce Fields" Cc: Al Viro Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/compat_linux.c | 1 + fs/nfsd/auth.c | 3 +++ include/linux/cred.h | 1 + kernel/groups.c | 5 +++-- kernel/uid16.c | 1 + net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 1 + net/sunrpc/svcauth_unix.c | 2 ++ 8 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 0f9cd90c11af..f06a9a0063f1 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 62469c60be23..75f942ae5176 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -59,6 +59,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } } else { gi = get_group_info(rqgi); diff --git a/include/linux/cred.h b/include/linux/cred.h index f0e70a1bb3ac..cf1a5d0c4eb4 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -82,6 +82,7 @@ extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); extern bool may_setgroups(void); +extern void groups_sort(struct group_info *); /* * The security context of a task diff --git a/kernel/groups.c b/kernel/groups.c index 2fcadd66a8fd..94bde5210e3d 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -77,7 +77,7 @@ static int groups_from_user(struct group_info *group_info, } /* a simple Shell sort */ -static void groups_sort(struct group_info *group_info) +void groups_sort(struct group_info *group_info) { int base, max, stride; int gidsetsize = group_info->ngroups; @@ -103,6 +103,7 @@ static void groups_sort(struct group_info *group_info) stride /= 3; } } +EXPORT_SYMBOL(groups_sort); /* a simple bsearch */ int groups_search(const struct group_info *group_info, kgid_t grp) @@ -134,7 +135,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp) void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); - groups_sort(group_info); get_group_info(group_info); new->group_info = group_info; } @@ -218,6 +218,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/kernel/uid16.c b/kernel/uid16.c index cc40793464e3..dcffcce9d75e 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -190,6 +190,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 25d9a9cf7b66..624c322af3ab 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, goto out_free_groups; creds->cr_group_info->gid[i] = kgid; } + groups_sort(creds->cr_group_info); return 0; out_free_groups: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 153082598522..6a08bc451247 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd, goto out; rsci.cred.cr_group_info->gid[i] = kgid; } + groups_sort(rsci.cred.cr_group_info); /* mech name */ len = qword_get(&mesg, buf, mlen); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 64af4f034de6..738a243c68a2 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd, ug.gi->gid[i] = kgid; } + groups_sort(ug.gi); ugp = unix_gid_lookup(cd, uid); if (ugp) { struct cache_head *ch; @@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); cred->cr_group_info->gid[i] = kgid; } + groups_sort(cred->cr_group_info); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; -- GitLab From 1453b3ac6cf8c62abb5a50680c6626d460c63044 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 17 Nov 2017 15:30:01 -0800 Subject: [PATCH 0821/1398] kernel/signal.c: protect the traced SIGNAL_UNKILLABLE tasks from SIGKILL commit 628c1bcba204052d19b686b5bac149a644cdb72e upstream. The comment in sig_ignored() says "Tracers may want to know about even ignored signals" but SIGKILL can not be reported to debugger and it is just wrong to return 0 in this case: SIGKILL should only kill the SIGNAL_UNKILLABLE task if it comes from the parent ns. Change sig_ignored() to ignore ->ptrace if sig == SIGKILL and rely on sig_task_ignored(). SISGTOP coming from within the namespace is not really right too but at least debugger can intercept it, and we can't drop it here because this will break "gdb -p 1": ptrace_attach() won't work. Perhaps we will add another ->ptrace check later, we will see. Link: http://lkml.kernel.org/r/20171103184206.GB21036@redhat.com Signed-off-by: Oleg Nesterov Tested-by: Kyle Huey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index e48668c3c972..107ce6a4832d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -88,13 +88,15 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - if (!sig_task_ignored(t, sig, force)) - return 0; - /* - * Tracers may want to know about even ignored signals. + * Tracers may want to know about even ignored signal unless it + * is SIGKILL which can't be reported anyway but can be ignored + * by SIGNAL_UNKILLABLE task. */ - return !t->ptrace; + if (t->ptrace && sig != SIGKILL) + return 0; + + return sig_task_ignored(t, sig, force); } /* -- GitLab From 794ac8ef9b06bd51186f6e54b763ecf4f94d7171 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 17 Nov 2017 15:30:04 -0800 Subject: [PATCH 0822/1398] kernel/signal.c: protect the SIGNAL_UNKILLABLE tasks from !sig_kernel_only() signals commit ac25385089f673560867eb5179228a44ade0cfc1 upstream. Change sig_task_ignored() to drop the SIG_DFL && !sig_kernel_only() signals even if force == T. This simplifies the next change and this matches the same check in get_signal() which will drop these signals anyway. Link: http://lkml.kernel.org/r/20171103184227.GC21036@redhat.com Signed-off-by: Oleg Nesterov Tested-by: Kyle Huey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 107ce6a4832d..23cd177eb911 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -72,7 +72,7 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) handler = sig_handler(t, sig); if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && - handler == SIG_DFL && !force) + handler == SIG_DFL && !(force && sig_kernel_only(sig))) return 1; return sig_handler_ignored(handler, sig); -- GitLab From 4d53eb494950de4dc898a014600b56254a913a6d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 17 Nov 2017 15:30:08 -0800 Subject: [PATCH 0823/1398] kernel/signal.c: remove the no longer needed SIGNAL_UNKILLABLE check in complete_signal() commit 426915796ccaf9c2bd9bb06dc5702225957bc2e5 upstream. complete_signal() checks SIGNAL_UNKILLABLE before it starts to destroy the thread group, today this is wrong in many ways. If nothing else, fatal_signal_pending() should always imply that the whole thread group (except ->group_exit_task if it is not NULL) is killed, this check breaks the rule. After the previous changes we can rely on sig_task_ignored(); sig_fatal(sig) && SIGNAL_UNKILLABLE can only be true if we actually want to kill this task and sig == SIGKILL OR it is traced and debugger can intercept the signal. This should hopefully fix the problem reported by Dmitry. This test-case static int init(void *arg) { for (;;) pause(); } int main(void) { char stack[16 * 1024]; for (;;) { int pid = clone(init, stack + sizeof(stack)/2, CLONE_NEWPID | SIGCHLD, NULL); assert(pid > 0); assert(ptrace(PTRACE_ATTACH, pid, 0, 0) == 0); assert(waitpid(-1, NULL, WSTOPPED) == pid); assert(ptrace(PTRACE_DETACH, pid, 0, SIGSTOP) == 0); assert(syscall(__NR_tkill, pid, SIGKILL) == 0); assert(pid == wait(NULL)); } } triggers the WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING)) in task_participate_group_stop(). do_signal_stop()->signal_group_exit() checks SIGNAL_GROUP_EXIT and return false, but task_set_jobctl_pending() checks fatal_signal_pending() and does not set JOBCTL_STOP_PENDING. And his should fix the minor security problem reported by Kyle, SECCOMP_RET_TRACE can miss fatal_signal_pending() the same way if the task is the root of a pid namespace. Link: http://lkml.kernel.org/r/20171103184246.GD21036@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dmitry Vyukov Reported-by: Kyle Huey Reviewed-by: Kees Cook Tested-by: Kyle Huey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 23cd177eb911..7ebe236a5364 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -919,9 +919,9 @@ static void complete_signal(int sig, struct task_struct *p, int group) * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) && - !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && + !(signal->flags & SIGNAL_GROUP_EXIT) && !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || !t->ptrace)) { + (sig == SIGKILL || !p->ptrace)) { /* * This signal will be fatal to the whole group. */ -- GitLab From 03975faee7cee814773ee98b7b499e71a4d157df Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 14 Dec 2017 11:03:01 +0000 Subject: [PATCH 0824/1398] iommu/arm-smmu-v3: Don't free page table ops twice commit 57d72e159b60456c8bb281736c02ddd3164037aa upstream. Kasan reports a double free when finalise_stage_fn fails: the io_pgtable ops are freed by arm_smmu_domain_finalise and then again by arm_smmu_domain_free. Prevent this by leaving pgtbl_ops empty on failure. Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Reviewed-by: Robin Murphy Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu-v3.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d3d975ae24b7..b313e09c7e47 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1547,13 +1547,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1UL << ias) - 1; domain->geometry.force_aperture = true; - smmu_domain->pgtbl_ops = pgtbl_ops; ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); - if (ret < 0) + if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); + return ret; + } - return ret; + smmu_domain->pgtbl_ops = pgtbl_ops; + return 0; } static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) -- GitLab From e6a897a684c2434d98828887a0afa344b7b2082b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jan 2018 12:33:14 +0000 Subject: [PATCH 0825/1398] iommu/arm-smmu-v3: Cope with duplicated Stream IDs commit 563b5cbe334e9503ab2b234e279d500fc4f76018 upstream. For PCI devices behind an aliasing PCIe-to-PCI/X bridge, the bridge alias to DevFn 0.0 on the subordinate bus may match the original RID of the device, resulting in the same SID being present in the device's fwspec twice. This causes trouble later in arm_smmu_write_strtab_ent() when we wind up visiting the STE a second time and find it already live. Avoid the issue by giving arm_smmu_install_ste_for_dev() the cleverness to skip over duplicates. It seems mildly counterintuitive compared to preventing the duplicates from existing in the first place, but since the DT and ACPI probe paths build their fwspecs differently, this is actually the cleanest and most self-contained way to deal with it. Fixes: 8f78515425da ("iommu/arm-smmu: Implement of_xlate() for SMMUv3") Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Jayachandran C. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu-v3.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index b313e09c7e47..7f294f785ce6 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1582,7 +1582,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { - int i; + int i, j; struct arm_smmu_master_data *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; @@ -1590,6 +1590,13 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) u32 sid = fwspec->ids[i]; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + /* Bridged PCI devices may end up with duplicated IDs */ + for (j = 0; j < i; j++) + if (fwspec->ids[j] == sid) + break; + if (j < i) + continue; + arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } -- GitLab From cc1349fa9c22b5d330ac396240c8884d4f7b68ac Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Dec 2017 08:26:58 -0800 Subject: [PATCH 0826/1398] ARC: uaccess: dont use "l" gcc inline asm constraint modifier commit 79435ac78d160e4c245544d457850a56f805ac0d upstream. This used to setup the LP_COUNT register automatically, but now has been removed. There was an earlier fix 3c7c7a2fc8811 which fixed instance in delay.h but somehow missed this one as gcc change had not made its way into production toolchains and was not pedantic as it is now ! Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/uaccess.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 41faf17cd28d..0684fd2f42e8 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -673,6 +673,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) return 0; __asm__ __volatile__( + " mov lp_count, %5 \n" " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" " breq.d %3, 0, 3f \n" @@ -689,8 +690,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .word 1b, 4b \n" " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) - : "g"(-EFAULT), "l"(count) - : "memory"); + : "g"(-EFAULT), "r"(count) + : "lp_count", "lp_start", "lp_end", "memory"); return res; } -- GitLab From 2b009d33f42797d6ba7ebd23729c34871e1ec7ec Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sat, 25 Nov 2017 16:48:41 -0800 Subject: [PATCH 0827/1398] Input: elantech - add new icbody type 15 commit 10d900303f1c3a821eb0bef4e7b7ece16768fba4 upstream. The touchpad of Lenovo Thinkpad L480 reports it's version as 15. Signed-off-by: Aaron Ma Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elantech.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index cd834da5934a..59603a5728f7 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1609,7 +1609,7 @@ static int elantech_set_properties(struct elantech_data *etd) case 5: etd->hw_version = 3; break; - case 6 ... 14: + case 6 ... 15: etd->hw_version = 4; break; default: -- GitLab From dd43c465ba24d7f482bd50c0ceda50b1740b8dce Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 30 Nov 2017 16:46:40 -0600 Subject: [PATCH 0828/1398] x86/microcode/AMD: Add support for fam17h microcode loading commit f4e9b7af0cd58dd039a0fb2cd67d57cea4889abf upstream. The size for the Microcode Patch Block (MPB) for an AMD family 17h processor is 3200 bytes. Add a #define for fam17h so that it does not default to 2048 bytes and fail a microcode load/update. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20171130224640.15391.40247.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar Cc: Alice Ferrazzi Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/amd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 017bda12caae..b74bb29db6b9 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -592,6 +592,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, #define F14H_MPB_MAX_SIZE 1824 #define F15H_MPB_MAX_SIZE 4096 #define F16H_MPB_MAX_SIZE 3458 +#define F17H_MPB_MAX_SIZE 3200 switch (family) { case 0x14: @@ -603,6 +604,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, case 0x16: max_size = F16H_MPB_MAX_SIZE; break; + case 0x17: + max_size = F17H_MPB_MAX_SIZE; + break; default: max_size = F1XH_MPB_MAX_SIZE; break; -- GitLab From 14c06206b98f3d11ff250875f6046df8d825f13d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Jan 2018 20:36:44 +0100 Subject: [PATCH 0829/1398] parisc: Fix alignment of pa_tlb_lock in assembly on 32-bit SMP kernel commit 88776c0e70be0290f8357019d844aae15edaa967 upstream. Qemu for PARISC reported on a 32bit SMP parisc kernel strange failures about "Not-handled unaligned insn 0x0e8011d6 and 0x0c2011c9." Those opcodes evaluate to the ldcw() assembly instruction which requires (on 32bit) an alignment of 16 bytes to ensure atomicity. As it turns out, qemu is correct and in our assembly code in entry.S and pacache.S we don't pay attention to the required alignment. This patch fixes the problem by aligning the lock offset in assembly code in the same manner as we do in our C-code. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/ldcw.h | 2 ++ arch/parisc/kernel/entry.S | 13 +++++++++++-- arch/parisc/kernel/pacache.S | 9 +++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 8be707e1b6c7..82dea145574e 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -11,6 +11,7 @@ for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 +#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ @@ -28,6 +29,7 @@ ldcd). */ #define __PA_LDCW_ALIGNMENT 4 +#define __PA_LDCW_ALIGN_ORDER 2 #define __ldcw_align(a) (&(a)->slock) #define __LDCW "ldcw,co" diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 4fcff2dcc9c3..e3d3e8e1d708 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,14 @@ #endif .import pa_tlb_lock,data + .macro load_pa_tlb_lock reg +#if __PA_LDCW_ALIGNMENT > 4 + load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg + depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg +#else + load32 PA(pa_tlb_lock), \reg +#endif + .endm /* space_to_prot macro creates a prot id from a space id */ @@ -457,7 +466,7 @@ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop @@ -480,7 +489,7 @@ /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp tlb_unlock0 \spc,\tmp #endif .endm diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index adf7187f8951..2d40c4ff3f69 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -36,6 +36,7 @@ #include #include #include +#include #include .text @@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP - ldil L%pa_tlb_lock,%r1 - ldo R%pa_tlb_lock(%r1),\la +#if __PA_LDCW_ALIGNMENT > 4 + load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la + depi 0,31,__PA_LDCW_ALIGN_ORDER, \la +#else + load32 pa_tlb_lock, \la +#endif rsm PSW_SM_I,\flags 1: LDCW 0(\la),\tmp cmpib,<>,n 0,\tmp,3f -- GitLab From 91dfc41e753bb4e7d3948b02baca60ac40c4cbfd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 5 Jan 2018 21:55:38 +0100 Subject: [PATCH 0830/1398] parisc: qemu idle sleep support commit 310d82784fb4d60c80569f5ca9f53a7f3bf1d477 upstream. Add qemu idle sleep support when running under qemu with SeaBIOS PDC firmware. Like the power architecture we use the "or" assembler instructions, which translate to nops on real hardware, to indicate that qemu shall idle sleep. Signed-off-by: Helge Deller Cc: Richard Henderson Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/process.c | 39 ++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 7593787ed4c3..c3a532abac03 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -180,6 +181,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) return 1; } +/* + * Idle thread support + * + * Detect when running on QEMU with SeaBIOS PDC Firmware and let + * QEMU idle the host too. + */ + +int running_on_qemu __read_mostly; + +void __cpuidle arch_cpu_idle_dead(void) +{ + /* nop on real hardware, qemu will offline CPU. */ + asm volatile("or %%r31,%%r31,%%r31\n":::); +} + +void __cpuidle arch_cpu_idle(void) +{ + local_irq_enable(); + + /* nop on real hardware, qemu will idle sleep. */ + asm volatile("or %%r10,%%r10,%%r10\n":::); +} + +static int __init parisc_idle_init(void) +{ + const char *marker; + + /* check QEMU/SeaBIOS marker in PAGE0 */ + marker = (char *) &PAGE0->pad0; + running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); + + if (!running_on_qemu) + cpu_idle_poll_ctrl(1); + + return 0; +} +arch_initcall(parisc_idle_init); + /* * Copy architecture-specific thread state */ -- GitLab From 47f3cea393ab5188d1a1409259e71099314dc530 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Jan 2018 22:19:04 +0100 Subject: [PATCH 0831/1398] x86/tlb: Drop the _GPL from the cpu_tlbstate export commit 1e5476815fd7f98b888e01a0f9522b63085f96c9 upstream. The recent changes for PTI touch cpu_tlbstate from various tlb_flush inlines. cpu_tlbstate is exported as GPL symbol, so this causes a regression when building out of tree drivers for certain graphics cards. Aside of that the export was wrong since it was introduced as it should have been EXPORT_PER_CPU_SYMBOL_GPL(). Use the correct PER_CPU export and drop the _GPL to restore the previous state which allows users to utilize the cards they payed for. As always I'm really thrilled to make this kind of change to support the #friends (or however the hot hashtag of today is spelled) from that closet sauce graphics corp. Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4") Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches") Reported-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Thomas Backlund Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1e779bca4f3e..f92bdb9f4e46 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -768,7 +768,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .state = 0, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_SYMBOL_GPL(cpu_tlbstate); +EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { -- GitLab From beca4e2d994464f5a0f629c1ff91b98398725efa Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jan 2018 17:42:45 +0100 Subject: [PATCH 0832/1398] Map the vsyscall page with _PAGE_USER This needs to happen early in kaiser_pagetable_walk(), before the hierarchy is established so that _PAGE_USER permission can be really set. A proper fix would be to teach kaiser_pagetable_walk() to update those permissions but the vsyscall page is the only exception here so ... Signed-off-by: Borislav Petkov Acked-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vsyscall/vsyscall_64.c | 5 ++++ arch/x86/include/asm/vsyscall.h | 2 ++ arch/x86/mm/kaiser.c | 34 +++++++++++++++++++++++---- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 636c4b341f36..6bb7e92c6d50 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *str) } early_param("vsyscall", vsyscall_setup); +bool vsyscall_enabled(void) +{ + return vsyscall_mode != NONE; +} + static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 6ba66ee79710..4865e10dbb55 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -12,12 +12,14 @@ extern void map_vsyscall(void); * Returns true if handled. */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool vsyscall_enabled(void); #else static inline void map_vsyscall(void) {} static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { return false; } +static inline bool vsyscall_enabled(void) { return false; } #endif #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index d8376b4ad9f0..8f8e5e03d083 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -19,6 +19,7 @@ #include #include #include +#include int kaiser_enabled __read_mostly = 1; EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ @@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *kaiser_pagetable_walk(unsigned long address) +static pte_t *kaiser_pagetable_walk(unsigned long address, bool user) { pmd_t *pmd; pud_t *pud; pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + unsigned long prot = _KERNPG_TABLE; if (pgd_none(*pgd)) { WARN_ONCE(1, "All shadow pgds should have been populated"); @@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) } BUILD_BUG_ON(pgd_large(*pgd) != 0); + if (user) { + /* + * The vsyscall page is the only page that will have + * _PAGE_USER set. Catch everything else. + */ + BUG_ON(address != VSYSCALL_ADDR); + + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + prot = _PAGE_TABLE; + } + pud = pud_offset(pgd, address); /* The shadow page tables do not use large mappings: */ if (pud_large(*pud)) { @@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) return NULL; spin_lock(&shadow_table_allocation_lock); if (pud_none(*pud)) { - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); + set_pud(pud, __pud(prot | __pa(new_pmd_page))); __inc_zone_page_state(virt_to_page((void *) new_pmd_page), NR_KAISERTABLE); } else @@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) return NULL; spin_lock(&shadow_table_allocation_lock); if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); + set_pmd(pmd, __pmd(prot | __pa(new_pte_page))); __inc_zone_page_state(virt_to_page((void *) new_pte_page), NR_KAISERTABLE); } else @@ -191,7 +204,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, ret = -EIO; break; } - pte = kaiser_pagetable_walk(address); + pte = kaiser_pagetable_walk(address, flags & _PAGE_USER); if (!pte) { ret = -ENOMEM; break; @@ -318,6 +331,19 @@ void __init kaiser_init(void) kaiser_init_all_pgds(); + /* + * Note that this sets _PAGE_USER and it needs to happen when the + * pagetable hierarchy gets created, i.e., early. Otherwise + * kaiser_pagetable_walk() will encounter initialized PTEs in the + * hierarchy and not set the proper permissions, leading to the + * pagefaults with page-protection violations when trying to read the + * vsyscall page. For example. + */ + if (vsyscall_enabled()) + kaiser_add_user_map_early((void *)VSYSCALL_ADDR, + PAGE_SIZE, + __PAGE_KERNEL_VSYSCALL); + for_each_possible_cpu(cpu) { void *percpu_vaddr = __per_cpu_user_mapped_start + per_cpu_offset(cpu); -- GitLab From 5e1f377fc8101a221ff762a4735cf076c336a4ab Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 18 Dec 2017 11:32:45 +0100 Subject: [PATCH 0833/1398] mtd: nand: pxa3xx: Fix READOOB implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fee4380f368e84ed216b62ccd2fbc4126f2bf40b upstream. In the current driver, OOB bytes are accessed in raw mode, and when a page access is done with NDCR_SPARE_EN set and NDCR_ECC_EN cleared, the driver must read the whole spare area (64 bytes in case of a 2k page, 16 bytes for a 512 page). The driver was only reading the free OOB bytes, which was leaving some unread data in the FIFO and was somehow leading to a timeout. We could patch the driver to read ->spare_size + ->ecc_size instead of just ->spare_size when READOOB is requested, but we'd better make in-band and OOB accesses consistent. Since the driver is always accessing in-band data in non-raw mode (with the ECC engine enabled), we should also access OOB data in this mode. That's particularly useful when using the BCH engine because in this mode the free OOB bytes are also ECC protected. Fixes: 43bcfd2bb24a ("mtd: nand: pxa3xx: Add driver-specific ECC BCH support") Reported-by: Sean Nyekjær Tested-by: Willy Tarreau Signed-off-by: Boris Brezillon Acked-by: Ezequiel Garcia Tested-by: Sean Nyekjaer Acked-by: Robert Jarzmik Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/pxa3xx_nand.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index b121bf4ed73a..3b8911cd3a19 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -950,6 +950,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command) switch (command) { case NAND_CMD_READ0: + case NAND_CMD_READOOB: case NAND_CMD_PAGEPROG: info->use_ecc = 1; break; -- GitLab From 7bbc6ca4887794cc44b41412a35bdfbe0cbd1c50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Jan 2018 09:29:55 +0100 Subject: [PATCH 0834/1398] Linux 4.9.76 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index acbc1b032db2..2637f0ed0a07 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 75 +SUBLEVEL = 76 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 2e5f41268771224b7df22b01ee7fb72ecd7c5f21 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 May 2017 21:05:16 +0200 Subject: [PATCH 0835/1398] BACKPORT: tee: add ARM_SMCCC dependency For the moment, the tee subsystem only makes sense in combination with the op-tee driver that depends on ARM_SMCCC, so let's hide the subsystem from users that can't select that. Change-Id: I231f4366be51bf9b4bda827c2e61b2801a132097 Suggested-by: Linus Torvalds Signed-off-by: Arnd Bergmann (cherry picked from commit e84188852a7239d7a144af12f7e5dac8fa88600b) Signed-off-by: Victor Chong --- drivers/tee/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig index 2330a4eb4e8b..a6df12d88f90 100644 --- a/drivers/tee/Kconfig +++ b/drivers/tee/Kconfig @@ -1,6 +1,7 @@ # Generic Trusted Execution Environment Configuration config TEE tristate "Trusted Execution Environment support" + depends on HAVE_ARM_SMCCC || COMPILE_TEST select DMA_SHARED_BUFFER select GENERIC_ALLOCATOR help -- GitLab From 19e81abf7ec57d12267349de643aeec6c005ace1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 12 Jul 2017 10:06:20 -0300 Subject: [PATCH 0836/1398] BACKPORT: tee.txt: standardize document format Each text file under Documentation follows a different format. Some doesn't even have titles! Change its representation to follow the adopted standard, using ReST markups for it to be parseable by Sphinx: - adjust identation of titles; - mark ascii artwork as a literal block; - adjust references. Change-Id: Ic11b6fbdb7240a65338e40529c161db8eb0277fb Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet (cherry picked from commit 4297739f2b5d4693d9b9f9e3dffeecf2ae9f8081) Signed-off-by: Victor Chong --- Documentation/tee.txt | 53 +++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/Documentation/tee.txt b/Documentation/tee.txt index 718599357596..56ea85ffebf2 100644 --- a/Documentation/tee.txt +++ b/Documentation/tee.txt @@ -1,4 +1,7 @@ +============= TEE subsystem +============= + This document describes the TEE subsystem in Linux. A TEE (Trusted Execution Environment) is a trusted OS running in some @@ -80,27 +83,27 @@ The GlobalPlatform TEE Client API [5] is implemented on top of the generic TEE API. Picture of the relationship between the different components in the -OP-TEE architecture. - - User space Kernel Secure world - ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~ - +--------+ +-------------+ - | Client | | Trusted | - +--------+ | Application | - /\ +-------------+ - || +----------+ /\ - || |tee- | || - || |supplicant| \/ - || +----------+ +-------------+ - \/ /\ | TEE Internal| - +-------+ || | API | - + TEE | || +--------+--------+ +-------------+ - | Client| || | TEE | OP-TEE | | OP-TEE | - | API | \/ | subsys | driver | | Trusted OS | - +-------+----------------+----+-------+----+-----------+-------------+ - | Generic TEE API | | OP-TEE MSG | - | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) | - +-----------------------------+ +------------------------------+ +OP-TEE architecture:: + + User space Kernel Secure world + ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~ + +--------+ +-------------+ + | Client | | Trusted | + +--------+ | Application | + /\ +-------------+ + || +----------+ /\ + || |tee- | || + || |supplicant| \/ + || +----------+ +-------------+ + \/ /\ | TEE Internal| + +-------+ || | API | + + TEE | || +--------+--------+ +-------------+ + | Client| || | TEE | OP-TEE | | OP-TEE | + | API | \/ | subsys | driver | | Trusted OS | + +-------+----------------+----+-------+----+-----------+-------------+ + | Generic TEE API | | OP-TEE MSG | + | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) | + +-----------------------------+ +------------------------------+ RPC (Remote Procedure Call) are requests from secure world to kernel driver or tee-supplicant. An RPC is identified by a special range of SMCCC return @@ -109,10 +112,16 @@ kernel are handled by the kernel driver. Other RPC messages will be forwarded to tee-supplicant without further involvement of the driver, except switching shared memory buffer representation. -References: +References +========== + [1] https://github.com/OP-TEE/optee_os + [2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html + [3] drivers/tee/optee/optee_smc.h + [4] drivers/tee/optee/optee_msg.h + [5] http://www.globalplatform.org/specificationsdevice.asp look for "TEE Client API Specification v1.0" and click download. -- GitLab From 14edc414ca990fccc447db30def2a5615949f43c Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 15 May 2017 11:09:28 +0200 Subject: [PATCH 0837/1398] BACKPORT: tee: optee: fix uninitialized symbol 'parg' Fixes the static checker warning in optee_release(). error: uninitialized symbol 'parg'. Change-Id: I7b183f3b128b3ec6f2154195901a20bd6cf0a838 Reported-by: Dan Carpenter Signed-off-by: Jens Wiklander (cherry picked from commit efb14036bd7f8914f721e1e82891d4ba617cc784) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 58169e519422..857141e29e80 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -224,13 +224,14 @@ static void optee_release(struct tee_context *ctx) if (!IS_ERR(shm)) { arg = tee_shm_get_va(shm, 0); /* - * If va2pa fails for some reason, we can't call - * optee_close_session(), only free the memory. Secure OS - * will leak sessions and finally refuse more sessions, but - * we will at least let normal world reclaim its memory. + * If va2pa fails for some reason, we can't call into + * secure world, only free the memory. Secure OS will leak + * sessions and finally refuse more sessions, but we will + * at least let normal world reclaim its memory. */ if (!IS_ERR(arg)) - tee_shm_va2pa(shm, arg, &parg); + if (tee_shm_va2pa(shm, arg, &parg)) + arg = NULL; /* prevent usage of parg below */ } list_for_each_entry_safe(sess, sess_tmp, &ctxdata->sess_list, -- GitLab From e5b275ca255724c71369a07d8feaca4edee796f5 Mon Sep 17 00:00:00 2001 From: Jerome Forissier Date: Wed, 31 May 2017 13:21:05 +0200 Subject: [PATCH 0838/1398] BACKPORT: tee: add forward declaration for struct device tee_drv.h references struct device, but does not include device.h nor platform_device.h. Therefore, if tee_drv.h is included by some file that does not pull device.h nor platform_device.h beforehand, we have a compile warning. Fix this by adding a forward declaration. Change-Id: I91c83f323dda42907be3f1e0fa0e5585204a5e83 Signed-off-by: Jerome Forissier Signed-off-by: Jens Wiklander (cherry picked from commit 999616b8536cf3b9a1d0d74d5542ea009df482ff) Signed-off-by: Victor Chong --- include/linux/tee_drv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 0f175b8f6456..cb889afe576b 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -28,6 +28,7 @@ #define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ #define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ +struct device; struct tee_device; struct tee_shm; struct tee_shm_pool; -- GitLab From 736d117a8873afc16c10f30da20010051dd0498c Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 1 Jul 2017 17:56:06 +0530 Subject: [PATCH 0839/1398] BACKPORT: tee: tee_shm: Constify dma_buf_ops structures. dma_buf_ops are not supposed to change at runtime. All functions working with dma_buf_ops provided by work with const dma_buf_ops. So mark the non-const structs as const. File size before: text data bss dec hex filename 2026 112 0 2138 85a drivers/tee/tee_shm.o File size After adding 'const': text data bss dec hex filename 2138 0 0 2138 85a drivers/tee/tee_shm.o Change-Id: I3f353e6c49c598d1f81b7e981b69542b9b87a807 Signed-off-by: Arvind Yadav Signed-off-by: Jens Wiklander (cherry picked from commit 53e3ca5cee24f5fafe4e9ff5fe4b230e1a1b85ed) Signed-off-by: Victor Chong --- drivers/tee/tee_shm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 0be1e3e93bee..37207ae2de7b 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -80,7 +80,7 @@ static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) size, vma->vm_page_prot); } -static struct dma_buf_ops tee_shm_dma_buf_ops = { +static const struct dma_buf_ops tee_shm_dma_buf_ops = { .map_dma_buf = tee_shm_op_map_dma_buf, .unmap_dma_buf = tee_shm_op_unmap_dma_buf, .release = tee_shm_op_release, -- GitLab From 7b22a54e30d19832ce86026f8054cb5bd85d21a0 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Thu, 29 Jun 2017 15:05:04 +0530 Subject: [PATCH 0840/1398] BACKPORT: tee: optee: add const to tee_driver_ops and tee_desc structures Add const to tee_desc structures as they are only passed as an argument to the function tee_device_alloc. This argument is of type const, so declare these structures as const too. Add const to tee_driver_ops structures as they are only stored in the ops field of a tee_desc structure. This field is of type const, so declare these structure types as const. Change-Id: I9ffb39b85321a45fff5dae71915c4d851f4cc7ac Signed-off-by: Bhumika Goyal Signed-off-by: Jens Wiklander (cherry picked from commit 96e72ddeec4546fda0e194298c2ee39e394a3ab7) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 857141e29e80..7952357df9c8 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -259,7 +259,7 @@ static void optee_release(struct tee_context *ctx) } } -static struct tee_driver_ops optee_ops = { +static const struct tee_driver_ops optee_ops = { .get_version = optee_get_version, .open = optee_open, .release = optee_release, @@ -269,13 +269,13 @@ static struct tee_driver_ops optee_ops = { .cancel_req = optee_cancel_req, }; -static struct tee_desc optee_desc = { +static const struct tee_desc optee_desc = { .name = DRIVER_NAME "-clnt", .ops = &optee_ops, .owner = THIS_MODULE, }; -static struct tee_driver_ops optee_supp_ops = { +static const struct tee_driver_ops optee_supp_ops = { .get_version = optee_get_version, .open = optee_open, .release = optee_release, @@ -283,7 +283,7 @@ static struct tee_driver_ops optee_supp_ops = { .supp_send = optee_supp_send, }; -static struct tee_desc optee_supp_desc = { +static const struct tee_desc optee_supp_desc = { .name = DRIVER_NAME "-supp", .ops = &optee_supp_ops, .owner = THIS_MODULE, -- GitLab From a75ed9217223f06afc569203fc8d76ea1968c7c8 Mon Sep 17 00:00:00 2001 From: tiger-yu99 Date: Sat, 6 May 2017 00:20:32 +0800 Subject: [PATCH 0841/1398] BACKPORT: tee: optee: interruptible RPC sleep Prior to this patch RPC sleep was uninterruptible since msleep() is uninterruptible. Change to use msleep_interruptible() instead. Change-Id: I34b7ff2fe1aaac1ca41ee595bd49dd451d99b9d7 Signed-off-by: Tiger Yu Reviewed-by: Joakim Bech Signed-off-by: Jerome Forissier Signed-off-by: Jens Wiklander (cherry picked from commit a9980e947ec97297e03d2332d6beff06f5131a98) Signed-off-by: Victor Chong --- drivers/tee/optee/rpc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 8814eca06021..9488ed1541bc 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -140,11 +140,8 @@ static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg) msec_to_wait = arg->params[0].u.value.a; - /* set task's state to interruptible sleep */ - set_current_state(TASK_INTERRUPTIBLE); - - /* take a nap */ - msleep(msec_to_wait); + /* Go to interruptible sleep */ + msleep_interruptible(msec_to_wait); arg->ret = TEEC_SUCCESS; return; -- GitLab From e42b1e39bb38b2ec803ca0c4ce6f7ca56e8c8ecc Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 16 Feb 2017 09:07:02 +0100 Subject: [PATCH 0842/1398] BACKPORT: tee: indicate privileged dev in gen_caps Mirrors the TEE_DESC_PRIVILEGED bit of struct tee_desc:flags into struct tee_ioctl_version_data:gen_caps as TEE_GEN_CAP_PRIVILEGED in tee_ioctl_version() Change-Id: Ic3961a07812c3f89a6f3006619d89945733a71a8 Reviewed-by: Jerome Forissier Signed-off-by: Jens Wiklander (cherry picked from commit 059cf566e123ca7eb7434285c6455d7afafb4e02) Signed-off-by: Victor Chong --- drivers/tee/tee_core.c | 5 +++++ include/uapi/linux/tee.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 5c60bf4423e6..58a5009eacc3 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -90,8 +90,13 @@ static int tee_ioctl_version(struct tee_context *ctx, struct tee_ioctl_version_data vers; ctx->teedev->desc->ops->get_version(ctx->teedev, &vers); + + if (ctx->teedev->desc->flags & TEE_DESC_PRIVILEGED) + vers.gen_caps |= TEE_GEN_CAP_PRIVILEGED; + if (copy_to_user(uvers, &vers, sizeof(vers))) return -EFAULT; + return 0; } diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 370d8845ab21..688782e90140 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -49,6 +49,7 @@ #define TEE_MAX_ARG_SIZE 1024 #define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ +#define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ /* * TEE Implementation ID -- GitLab From 4d046850199b32ddc9f8b6bb124bda06a14efe7d Mon Sep 17 00:00:00 2001 From: David Wang Date: Thu, 16 Feb 2017 16:43:44 +0800 Subject: [PATCH 0843/1398] BACKPORT: tee: optee: sync with new naming of interrupts In the latest changes of optee_os, the interrupts' names are changed to "native" and "foreign" interrupts. Change-Id: I5d1c2e05e9556b4805e24ac3aca1486dcdb5414b Signed-off-by: David Wang Signed-off-by: Jerome Forissier Signed-off-by: Jens Wiklander (cherry picked from commit 39e6519a3f135b143dee4d4fb5ac0438e75454e2) Signed-off-by: Victor Chong --- drivers/tee/optee/optee_smc.h | 12 ++++++------ drivers/tee/optee/rpc.c | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h index 13b7c98cdf25..069c8e1429de 100644 --- a/drivers/tee/optee/optee_smc.h +++ b/drivers/tee/optee/optee_smc.h @@ -298,7 +298,7 @@ struct optee_smc_disable_shm_cache_result { OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE) /* - * Resume from RPC (for example after processing an IRQ) + * Resume from RPC (for example after processing a foreign interrupt) * * Call register usage: * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC @@ -383,19 +383,19 @@ struct optee_smc_disable_shm_cache_result { OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE) /* - * Deliver an IRQ in normal world. + * Deliver foreign interrupt to normal world. * * "Call" register usage: - * a0 OPTEE_SMC_RETURN_RPC_IRQ + * a0 OPTEE_SMC_RETURN_RPC_FOREIGN_INTR * a1-7 Resume information, must be preserved * * "Return" register usage: * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. * a1-7 Preserved */ -#define OPTEE_SMC_RPC_FUNC_IRQ 4 -#define OPTEE_SMC_RETURN_RPC_IRQ \ - OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_IRQ) +#define OPTEE_SMC_RPC_FUNC_FOREIGN_INTR 4 +#define OPTEE_SMC_RETURN_RPC_FOREIGN_INTR \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FOREIGN_INTR) /* * Do an RPC request. The supplied struct optee_msg_arg tells which diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 9488ed1541bc..cef417f4f4d2 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -371,11 +371,11 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param) shm = reg_pair_to_ptr(param->a1, param->a2); tee_shm_free(shm); break; - case OPTEE_SMC_RPC_FUNC_IRQ: + case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR: /* - * An IRQ was raised while secure world was executing, - * since all IRQs are handled in Linux a dummy RPC is - * performed to let Linux take the IRQ through the normal + * A foreign interrupt was raised while secure world was + * executing, since they are handled in Linux a dummy RPC is + * performed to let Linux take the interrupt through the normal * vector. */ break; -- GitLab From 8e170a589bd45981bf0c102ae0f1ab53b9cef2d6 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 9 Oct 2017 11:11:49 +0200 Subject: [PATCH 0844/1398] BACKPORT: optee: fix invalid of_node_put() in optee_driver_init() The first node supplied to of_find_matching_node() has its reference counter decreased as part of call to that function. In optee_driver_init() after calling of_find_matching_node() it's invalid to call of_node_put() on the supplied node again. So remove the invalid call to of_node_put(). Change-Id: I09d0dafa710a9ef7f3550e751b06b10197c9701a Reported-by: Alex Shi Signed-off-by: Jens Wiklander (cherry picked from commit f044113113dd95ba73916bde10e804d3cdfa2662) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 7952357df9c8..edb6e4e9ef3a 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -590,7 +590,6 @@ static int __init optee_driver_init(void) return -ENODEV; np = of_find_matching_node(fw_np, optee_match); - of_node_put(fw_np); if (!np) return -ENODEV; -- GitLab From f0f6293fd01add65e4bf3035dc5f96cf40dc9e55 Mon Sep 17 00:00:00 2001 From: Artem Borisov Date: Sat, 13 Jan 2018 18:03:40 +0300 Subject: [PATCH 0845/1398] ANDROID: uid_sys_stats: fix the comment It is not uid_cputime.c anymore. Change-Id: I7effc2a449c1f9cba9d86a7b122a9c05fc266405 Signed-off-by: Artem Borisov --- drivers/misc/uid_sys_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index 7d69dd5c2b75..42a513c97239 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -1,4 +1,4 @@ -/* drivers/misc/uid_cputime.c +/* drivers/misc/uid_sys_stats.c * * Copyright (C) 2014 - 2015 Google, Inc. * -- GitLab From 8dec074e888aa70c456af00ef28b0b48cb3a0955 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Jun 2017 12:14:40 -0700 Subject: [PATCH 0846/1398] fscrypt: updates on 4.15-rc4 Cherry-picked from origin/upstream-f2fs-stable-linux-4.9.y: 743205fbb952 fscrypt: move to generic async completion f1eb0c0b51a5 crypto: introduce crypto wait for async op e0af083add9b fscrypt: lock mutex before checking for bounce page pool 9e48a9fd98ba fscrypt: new helper function - fscrypt_prepare_setattr() ec822ff8b5e5 fscrypt: new helper function - fscrypt_prepare_lookup() 98fe83a195e7 fscrypt: new helper function - fscrypt_prepare_rename() f52187025917 fscrypt: new helper function - fscrypt_prepare_link() d61dffbd4f3e fscrypt: new helper function - fscrypt_file_open() 5190ed0766fe fscrypt: new helper function - fscrypt_require_key() 8814204af9c1 fscrypt: remove unneeded empty fscrypt_operations structs 8745aa36e439 fscrypt: remove ->is_encrypted() d750ec720f4d fscrypt: switch from ->is_encrypted() to IS_ENCRYPTED() 685285b0b3d9 fs, fscrypt: add an S_ENCRYPTED inode flag 1617929c3bea fscrypt: clean up include file mess a0471ef4ed35 fscrypt: fix dereference of NULL user_key_payload e77e7df06084 fscrypt: make ->dummy_context() return bool Change-Id: I23f36bfd059c0c576608221e7e1135535646cc5d Signed-off-by: Jaegeuk Kim --- crypto/api.c | 13 ++ fs/crypto/Makefile | 2 +- fs/crypto/crypto.c | 35 +--- fs/crypto/fname.c | 39 +---- fs/crypto/fscrypt_private.h | 13 +- fs/crypto/hooks.c | 112 ++++++++++++ fs/crypto/keyinfo.c | 23 +-- fs/crypto/policy.c | 6 +- fs/ext4/ext4.h | 8 +- fs/ext4/inode.c | 5 +- fs/ext4/super.c | 17 +- fs/f2fs/f2fs.h | 9 +- fs/f2fs/inode.c | 5 +- fs/f2fs/super.c | 7 +- include/linux/crypto.h | 40 +++++ include/linux/fs.h | 2 + include/linux/fscrypt.h | 290 ++++++++++++++++++++++++++++++++ include/linux/fscrypt_common.h | 138 --------------- include/linux/fscrypt_notsupp.h | 39 ++++- include/linux/fscrypt_supp.h | 17 +- 20 files changed, 556 insertions(+), 264 deletions(-) create mode 100644 fs/crypto/hooks.c create mode 100644 include/linux/fscrypt.h delete mode 100644 include/linux/fscrypt_common.h diff --git a/crypto/api.c b/crypto/api.c index bbc147cb5dec..e5c1abfd451f 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "internal.h" LIST_HEAD(crypto_alg_list); @@ -611,5 +612,17 @@ int crypto_has_alg(const char *name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_has_alg); +void crypto_req_done(struct crypto_async_request *req, int err) +{ + struct crypto_wait *wait = req->data; + + if (err == -EINPROGRESS) + return; + + wait->err = err; + complete(&wait->completion); +} +EXPORT_SYMBOL_GPL(crypto_req_done); + MODULE_DESCRIPTION("Cryptographic core API"); MODULE_LICENSE("GPL"); diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 9f6607f17b53..cb496989a6b6 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o -fscrypto-y := crypto.o fname.o policy.o keyinfo.o +fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o fscrypto-$(CONFIG_BLOCK) += bio.o diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 151d4893382d..732a786cce9d 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -126,21 +126,6 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) } EXPORT_SYMBOL(fscrypt_get_ctx); -/** - * page_crypt_complete() - completion callback for page crypto - * @req: The asynchronous cipher request context - * @res: The result of the cipher operation - */ -static void page_crypt_complete(struct crypto_async_request *req, int res) -{ - struct fscrypt_completion_result *ecr = req->data; - - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); -} - int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, struct page *dest_page, unsigned int len, @@ -151,7 +136,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u8 padding[FS_IV_SIZE - sizeof(__le64)]; } iv; struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; @@ -179,7 +164,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, skcipher_request_set_callback( req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - page_crypt_complete, &ecr); + crypto_req_done, &wait); sg_init_table(&dst, 1); sg_set_page(&dst, dest_page, len, offs); @@ -187,14 +172,9 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, sg_set_page(&src, src_page, len, offs); skcipher_request_set_crypt(req, &src, &dst, len, &iv); if (rw == FS_DECRYPT) - res = crypto_skcipher_decrypt(req); + res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); else - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res) { printk_ratelimited(KERN_ERR @@ -340,7 +320,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; dir = dget_parent(dentry); - if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) { + if (!IS_ENCRYPTED(d_inode(dir))) { dput(dir); return 0; } @@ -410,10 +390,7 @@ int fscrypt_initialize(unsigned int cop_flags) { int i, res = -ENOMEM; - /* - * No need to allocate a bounce page pool if there already is one or - * this FS won't use it. - */ + /* No need to allocate a bounce page pool if this FS won't use it. */ if (cop_flags & FS_CFLG_OWN_PAGES) return 0; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index ad9f814fdead..6eb434363ff2 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -14,21 +14,6 @@ #include #include "fscrypt_private.h" -/** - * fname_crypt_complete() - completion callback for filename crypto - * @req: The asynchronous cipher request context - * @res: The result of the cipher operation - */ -static void fname_crypt_complete(struct crypto_async_request *req, int res) -{ - struct fscrypt_completion_result *ecr = req->data; - - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); -} - /** * fname_encrypt() - encrypt a filename * @@ -40,7 +25,7 @@ static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) { struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; @@ -76,17 +61,12 @@ static int fname_encrypt(struct inode *inode, } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fname_crypt_complete, &ecr); + crypto_req_done, &wait); sg_init_one(&sg, oname->name, cryptlen); skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); /* Do the encryption */ - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - /* Request is being completed asynchronously; wait for it */ - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -110,7 +90,7 @@ static int fname_decrypt(struct inode *inode, struct fscrypt_str *oname) { struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; @@ -131,7 +111,7 @@ static int fname_decrypt(struct inode *inode, } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fname_crypt_complete, &ecr); + crypto_req_done, &wait); /* Initialize IV */ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); @@ -140,11 +120,7 @@ static int fname_decrypt(struct inode *inode, sg_init_one(&src_sg, iname->name, iname->len); sg_init_one(&dst_sg, oname->name, oname->len); skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); - res = crypto_skcipher_decrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -382,8 +358,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; - if (!dir->i_sb->s_cop->is_encrypted(dir) || - fscrypt_is_dot_dotdot(iname)) { + if (!IS_ENCRYPTED(dir) || fscrypt_is_dot_dotdot(iname)) { fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index a1d5021c31ef..4688a645cd63 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -11,7 +11,8 @@ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H -#include +#define __FS_HAS_ENCRYPTION 1 +#include #include /* Encryption parameters */ @@ -69,16 +70,6 @@ typedef enum { #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 -struct fscrypt_completion_result { - struct completion completion; - int res; -}; - -#define DECLARE_FS_COMPLETION_RESULT(ecr) \ - struct fscrypt_completion_result ecr = { \ - COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 } - - /* crypto.c */ extern int fscrypt_initialize(unsigned int cop_flags); extern struct workqueue_struct *fscrypt_read_workqueue; diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c new file mode 100644 index 000000000000..9f5fb2eb9cf7 --- /dev/null +++ b/fs/crypto/hooks.c @@ -0,0 +1,112 @@ +/* + * fs/crypto/hooks.c + * + * Encryption hooks for higher-level filesystem operations. + */ + +#include +#include "fscrypt_private.h" + +/** + * fscrypt_file_open - prepare to open a possibly-encrypted regular file + * @inode: the inode being opened + * @filp: the struct file being set up + * + * Currently, an encrypted regular file can only be opened if its encryption key + * is available; access to the raw encrypted contents is not supported. + * Therefore, we first set up the inode's encryption key (if not already done) + * and return an error if it's unavailable. + * + * We also verify that if the parent directory (from the path via which the file + * is being opened) is encrypted, then the inode being opened uses the same + * encryption policy. This is needed as part of the enforcement that all files + * in an encrypted directory tree use the same encryption policy, as a + * protection against certain types of offline attacks. Note that this check is + * needed even when opening an *unencrypted* file, since it's forbidden to have + * an unencrypted file in an encrypted directory. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + */ +int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + int err; + struct dentry *dir; + + err = fscrypt_require_key(inode); + if (err) + return err; + + dir = dget_parent(file_dentry(filp)); + if (IS_ENCRYPTED(d_inode(dir)) && + !fscrypt_has_permitted_context(d_inode(dir), inode)) { + pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu", + d_inode(dir)->i_ino, inode->i_ino); + err = -EPERM; + } + dput(dir); + return err; +} +EXPORT_SYMBOL_GPL(fscrypt_file_open); + +int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) +{ + int err; + + err = fscrypt_require_key(dir); + if (err) + return err; + + if (!fscrypt_has_permitted_context(dir, inode)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_link); + +int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err; + + err = fscrypt_require_key(old_dir); + if (err) + return err; + + err = fscrypt_require_key(new_dir); + if (err) + return err; + + if (old_dir != new_dir) { + if (IS_ENCRYPTED(new_dir) && + !fscrypt_has_permitted_context(new_dir, + d_inode(old_dentry))) + return -EPERM; + + if ((flags & RENAME_EXCHANGE) && + IS_ENCRYPTED(old_dir) && + !fscrypt_has_permitted_context(old_dir, + d_inode(new_dentry))) + return -EPERM; + } + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename); + +int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) +{ + int err = fscrypt_get_encryption_info(dir); + + if (err) + return err; + + if (fscrypt_has_encryption_key(dir)) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + spin_unlock(&dentry->d_lock); + } + + d_set_d_op(dentry, &fscrypt_d_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 8e704d12a1cf..c891d2ea9d24 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -17,17 +17,6 @@ static struct crypto_shash *essiv_hash_tfm; -static void derive_crypt_complete(struct crypto_async_request *req, int rc) -{ - struct fscrypt_completion_result *ecr = req->data; - - if (rc == -EINPROGRESS) - return; - - ecr->res = rc; - complete(&ecr->completion); -} - /** * derive_key_aes() - Derive a key using AES-128-ECB * @deriving_key: Encryption key used for derivation. @@ -42,7 +31,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], { int res = 0; struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); @@ -59,7 +48,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - derive_crypt_complete, &ecr); + crypto_req_done, &wait); res = crypto_skcipher_setkey(tfm, deriving_key, FS_AES_128_ECB_KEY_SIZE); if (res < 0) @@ -69,11 +58,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], sg_init_one(&dst_sg, derived_raw_key, source_key->size); skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size, NULL); - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); out: skcipher_request_free(req); crypto_free_skcipher(tfm); @@ -273,7 +258,7 @@ int fscrypt_get_encryption_info(struct inode *inode) res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { if (!fscrypt_dummy_context_enabled(inode) || - inode->i_sb->s_cop->is_encrypted(inode)) + IS_ENCRYPTED(inode)) return res; /* Fake up a context for an unencrypted directory */ memset(&ctx, 0, sizeof(ctx)); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 9914d51dff86..2f2c53f2e136 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -109,7 +109,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) struct fscrypt_policy policy; int res; - if (!inode->i_sb->s_cop->is_encrypted(inode)) + if (!IS_ENCRYPTED(inode)) return -ENODATA; res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); @@ -166,11 +166,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) return 1; /* No restrictions if the parent directory is unencrypted */ - if (!cops->is_encrypted(parent)) + if (!IS_ENCRYPTED(parent)) return 1; /* Encrypted directories must not contain unencrypted files */ - if (!cops->is_encrypted(child)) + if (!IS_ENCRYPTED(child)) return 0; /* diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index be2d9ae0a749..c17539153099 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -32,17 +32,15 @@ #include #include #include -#ifdef CONFIG_EXT4_FS_ENCRYPTION -#include -#else -#include -#endif #include #include #ifdef __KERNEL__ #include #endif +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION) +#include + /* * The fourth extended filesystem constants/structures */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d887e32e88a0..74c49b5f3784 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4443,8 +4443,11 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) new_fl |= S_DAX; + if (flags & EXT4_ENCRYPT_FL) + new_fl |= S_ENCRYPTED; inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| + S_ENCRYPTED); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6c8f2bdef79f..0440befe4a6a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1130,7 +1130,8 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); /* - * Update inode->i_flags - e.g. S_DAX may get disabled + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled */ ext4_set_inode_flags(inode); } @@ -1151,7 +1152,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - /* Update inode->i_flags - e.g. S_DAX may get disabled */ + /* + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled + */ ext4_set_inode_flags(inode); res = ext4_mark_inode_dirty(handle, inode); if (res) @@ -1166,7 +1170,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, return res; } -static int ext4_dummy_context(struct inode *inode) +static bool ext4_dummy_context(struct inode *inode) { return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); } @@ -1182,14 +1186,9 @@ static const struct fscrypt_operations ext4_cryptops = { .get_context = ext4_get_context, .set_context = ext4_set_context, .dummy_context = ext4_dummy_context, - .is_encrypted = ext4_encrypted_inode, .empty_dir = ext4_empty_dir, .max_namelen = ext4_max_namelen, }; -#else -static const struct fscrypt_operations ext4_cryptops = { - .is_encrypted = ext4_encrypted_inode, -}; #endif #ifdef CONFIG_QUOTA @@ -3919,7 +3918,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; +#ifdef CONFIG_EXT4_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; +#endif #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (ext4_has_feature_quota(sb)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 341dbe563224..d156dee60fc8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,13 +24,11 @@ #include #include #include -#ifdef CONFIG_F2FS_FS_ENCRYPTION -#include -#else -#include -#endif #include +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION) +#include + #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) #else @@ -3096,6 +3094,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); + inode->i_flags |= S_ENCRYPTED; #endif } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9684d53563f1..b4c4f2b25304 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -43,8 +43,11 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) new_fl |= S_DIRSYNC; + if (f2fs_encrypted_inode(inode)) + new_fl |= S_ENCRYPTED; inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| + S_ENCRYPTED); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f95ffde1cff7..8f364c5870e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1738,14 +1738,9 @@ static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, - .is_encrypted = f2fs_encrypted_inode, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; -#else -static const struct fscrypt_operations f2fs_cryptops = { - .is_encrypted = f2fs_encrypted_inode, -}; #endif static struct inode *f2fs_nfs_get_inode(struct super_block *sb, @@ -2474,7 +2469,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) #endif sb->s_op = &f2fs_sops; +#ifdef CONFIG_F2FS_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; +#endif sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7cee5551625b..0a726257f74a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #include #include #include +#include /* * Autoloaded crypto modules should only use a prefixed name to avoid allowing @@ -464,6 +465,45 @@ struct crypto_alg { struct module *cra_module; } CRYPTO_MINALIGN_ATTR; +/* + * A helper struct for waiting for completion of async crypto ops + */ +struct crypto_wait { + struct completion completion; + int err; +}; + +/* + * Macro for declaring a crypto op async wait object on stack + */ +#define DECLARE_CRYPTO_WAIT(_wait) \ + struct crypto_wait _wait = { \ + COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 } + +/* + * Async ops completion helper functioons + */ +void crypto_req_done(struct crypto_async_request *req, int err); + +static inline int crypto_wait_req(int err, struct crypto_wait *wait) +{ + switch (err) { + case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&wait->completion); + reinit_completion(&wait->completion); + err = wait->err; + break; + }; + + return err; +} + +static inline void crypto_init_wait(struct crypto_wait *wait) +{ + init_completion(&wait->completion); +} + /* * Algorithm registration interface. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 18bd249d380b..cbcdcb209204 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1857,6 +1857,7 @@ struct super_operations { #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif +#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -1895,6 +1896,7 @@ struct super_operations { #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) +#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h new file mode 100644 index 000000000000..8641e56b8f8a --- /dev/null +++ b/include/linux/fscrypt.h @@ -0,0 +1,290 @@ +/* + * fscrypt.h: declarations for per-file encryption + * + * Filesystems that implement per-file encryption include this header + * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem + * is being built with encryption support or not. + * + * Copyright (C) 2015, Google, Inc. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _LINUX_FSCRYPT_H +#define _LINUX_FSCRYPT_H + +#include +#include +#include +#include +#include +#include +#include + +#define FS_CRYPTO_BLOCK_SIZE 16 + +struct fscrypt_info; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + +struct fscrypt_str { + unsigned char *name; + u32 len; +}; + +struct fscrypt_name { + const struct qstr *usr_fname; + struct fscrypt_str disk_name; + u32 hash; + u32 minor_hash; + struct fscrypt_str crypto_buf; +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto opertions for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + if (inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode)) + return true; + return false; +} + +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + +#if __FS_HAS_ENCRYPTION + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +#include + +#else /* !__FS_HAS_ENCRYPTION */ + +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return 0; +} + +#include +#endif /* __FS_HAS_ENCRYPTION */ + +/** + * fscrypt_require_key - require an inode's encryption key + * @inode: the inode we need the key for + * + * If the inode is encrypted, set up its encryption key if not already done. + * Then require that the key be present and return -ENOKEY otherwise. + * + * No locks are needed, and the key will live as long as the struct inode --- so + * it won't go away from under you. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_require_key(struct inode *inode) +{ + if (IS_ENCRYPTED(inode)) { + int err = fscrypt_get_encryption_info(inode); + + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } + return 0; +} + +/** + * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory + * @old_dentry: an existing dentry for the inode being linked + * @dir: the target directory + * @dentry: negative dentry for the target filename + * + * A new link can only be added to an encrypted directory if the directory's + * encryption key is available --- since otherwise we'd have no way to encrypt + * the filename. Therefore, we first set up the directory's encryption key (if + * not already done) and return an error if it's unavailable. + * + * We also verify that the link will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, + * -EPERM if the link would result in an inconsistent encryption policy, or + * another -errno code. + */ +static inline int fscrypt_prepare_link(struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_link(d_inode(old_dentry), dir); + return 0; +} + +/** + * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories + * @old_dir: source directory + * @old_dentry: dentry for source file + * @new_dir: target directory + * @new_dentry: dentry for target location (may be negative unless exchanging) + * @flags: rename flags (we care at least about %RENAME_EXCHANGE) + * + * Prepare for ->rename() where the source and/or target directories may be + * encrypted. A new link can only be added to an encrypted directory if the + * directory's encryption key is available --- since otherwise we'd have no way + * to encrypt the filename. A rename to an existing name, on the other hand, + * *is* cryptographically possible without the key. However, we take the more + * conservative approach and just forbid all no-key renames. + * + * We also verify that the rename will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * rename would cause inconsistent encryption policies, or another -errno code. + */ +static inline int fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) + return __fscrypt_prepare_rename(old_dir, old_dentry, + new_dir, new_dentry, flags); + return 0; +} + +/** + * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory + * @dir: directory being searched + * @dentry: filename being looked up + * @flags: lookup flags + * + * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be + * done with or without the directory's encryption key; without the key, + * filenames are presented in encrypted form. Therefore, we'll try to set up + * the directory's encryption key, but even without it the lookup can continue. + * + * To allow invalidating stale dentries if the directory's encryption key is + * added later, we also install a custom ->d_revalidate() method and use the + * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a + * plaintext name (flag set) or a ciphertext name (flag cleared). + * + * Return: 0 on success, -errno if a problem occurred while setting up the + * encryption key + */ +static inline int fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_lookup(dir, dentry); + return 0; +} + +/** + * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, + * most attribute changes are allowed even without the encryption key. However, + * without the encryption key we do have to forbid truncates. This is needed + * because the size being truncated to may not be a multiple of the filesystem + * block size, and in that case we'd have to decrypt the final block, zero the + * portion past i_size, and re-encrypt it. (We *could* allow truncating to a + * filesystem block boundary, but it's simpler to just forbid all truncates --- + * and we already forbid all other contents modifications without the key.) + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + if (attr->ia_valid & ATTR_SIZE) + return fscrypt_require_key(d_inode(dentry)); + return 0; +} + +#endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h deleted file mode 100644 index 4022c61f7e9b..000000000000 --- a/include/linux/fscrypt_common.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * fscrypt_common.h: common declarations for per-file encryption - * - * Copyright (C) 2015, Google, Inc. - * - * Written by Michael Halcrow, 2015. - * Modified by Jaegeuk Kim, 2015. - */ - -#ifndef _LINUX_FSCRYPT_COMMON_H -#define _LINUX_FSCRYPT_COMMON_H - -#include -#include -#include -#include -#include -#include -#include - -#define FS_CRYPTO_BLOCK_SIZE 16 - -struct fscrypt_info; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - -struct fscrypt_str { - unsigned char *name; - u32 len; -}; - -struct fscrypt_name { - const struct qstr *usr_fname; - struct fscrypt_str disk_name; - u32 hash; - u32 minor_hash; - struct fscrypt_str crypto_buf; -}; - -#define FSTR_INIT(n, l) { .name = n, .len = l } -#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) -#define fname_name(p) ((p)->disk_name.name) -#define fname_len(p) ((p)->disk_name.len) - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - int (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -#else - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -#endif -} - -static inline int fscrypt_has_encryption_key(const struct inode *inode) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return (inode->i_crypt_info != NULL); -#else - return 0; -#endif -} - -#endif /* _LINUX_FSCRYPT_COMMON_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index ec406aed2f2f..c4c6bf2c390e 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -3,13 +3,16 @@ * * This stubs out the fscrypt functions for filesystems configured without * encryption support. + * + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_notsupp.h!" +#endif #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H -#include - /* crypto.c */ static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) @@ -97,7 +100,7 @@ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - if (dir->i_sb->s_cop->is_encrypted(dir)) + if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(struct fscrypt_name)); @@ -174,4 +177,34 @@ static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return -EOPNOTSUPP; } +/* hooks.c */ + +static inline int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + if (IS_ENCRYPTED(inode)) + return -EOPNOTSUPP; + return 0; +} + +static inline int __fscrypt_prepare_link(struct inode *inode, + struct inode *dir) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 32e2fcf13b01..2db5e9706f60 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -1,14 +1,15 @@ /* * fscrypt_supp.h * - * This is included by filesystems configured with encryption support. + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_supp.h!" +#endif #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H -#include - /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); @@ -142,4 +143,14 @@ extern void fscrypt_pullback_bio_page(struct page **, bool); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); +/* hooks.c */ +extern int fscrypt_file_open(struct inode *inode, struct file *filp); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); + #endif /* _LINUX_FSCRYPT_SUPP_H */ -- GitLab From b58aa24edb621469040b0a8a47f25d91f5ee2c58 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 6 Dec 2017 09:27:30 -0800 Subject: [PATCH 0847/1398] dm bufio: fix shrinker scans when (nr_to_scan < retain_target) commit fbc7c07ec23c040179384a1f16b62b6030eb6bdd upstream. When system is under memory pressure it is observed that dm bufio shrinker often reclaims only one buffer per scan. This change fixes the following two issues in dm bufio shrinker that cause this behavior: 1. ((nr_to_scan - freed) <= retain_target) condition is used to terminate slab scan process. This assumes that nr_to_scan is equal to the LRU size, which might not be correct because do_shrink_slab() in vmscan.c calculates nr_to_scan using multiple inputs. As a result when nr_to_scan is less than retain_target (64) the scan will terminate after the first iteration, effectively reclaiming one buffer per scan and making scans very inefficient. This hurts vmscan performance especially because mutex is acquired/released every time dm_bufio_shrink_scan() is called. New implementation uses ((LRU size - freed) <= retain_target) condition for scan termination. LRU size can be safely determined inside __scan() because this function is called after dm_bufio_lock(). 2. do_shrink_slab() uses value returned by dm_bufio_shrink_count() to determine number of freeable objects in the slab. However dm_bufio always retains retain_target buffers in its LRU and will terminate a scan when this mark is reached. Therefore returning the entire LRU size from dm_bufio_shrink_count() is misleading because that does not represent the number of freeable objects that slab will reclaim during a scan. Returning (LRU size - retain_target) better represents the number of freeable objects in the slab. This way do_shrink_slab() returns 0 when (LRU size < retain_target) and vmscan will not try to scan this shrinker avoiding scans that will not reclaim any memory. Test: tested using Android device running /system/extras/alloc-stress that generates memory pressure and causes intensive shrinker scans Signed-off-by: Suren Baghdasaryan Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 7643f72adb1c..3ec647e8b9c6 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1554,7 +1554,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, int l; struct dm_buffer *b, *tmp; unsigned long freed = 0; - unsigned long count = nr_to_scan; + unsigned long count = c->n_buffers[LIST_CLEAN] + + c->n_buffers[LIST_DIRTY]; unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { @@ -1591,6 +1592,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c; unsigned long count; + unsigned long retain_target; c = container_of(shrink, struct dm_bufio_client, shrinker); if (sc->gfp_mask & __GFP_FS) @@ -1599,8 +1601,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) return 0; count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + retain_target = get_retain_buffers(c); dm_bufio_unlock(c); - return count; + return (count < retain_target) ? 0 : (count - retain_target); } /* -- GitLab From 234c8e60437d3733cdcae996a62064757f3f35ac Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Mon, 21 Nov 2016 16:58:40 +0200 Subject: [PATCH 0848/1398] mac80211: Add RX flag to indicate ICV stripped commit cef0acd4d7d4811d2d19cd0195031bf0dfe41249 upstream. Add a flag that indicates that the WEP ICV was stripped from an RX packet, allowing the device to not transfer that if it's already checked. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg Cc: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- include/net/mac80211.h | 5 ++++- net/mac80211/wep.c | 3 ++- net/mac80211/wpa.c | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c7d876e2a1a..8fd61bc50383 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1007,7 +1007,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware. * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame, * verification has been done by the hardware. - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this @@ -1078,6 +1078,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. * This is used for AMSDU subframes which can have the same PN as * the first subframe. + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must + * be done in the hardware. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1113,6 +1115,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), RX_FLAG_MIC_STRIPPED = BIT_ULL(32), RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), + RX_FLAG_ICV_STRIPPED = BIT_ULL(34), }; #define RX_FLAG_STBC_SHIFT 26 diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index efa3f48f1ec5..73e8f347802e 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -293,7 +293,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) + if (!(status->flag & RX_FLAG_ICV_STRIPPED) && + pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 5c71d60f3a64..caa5986cb2e4 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -295,7 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); + if (!(status->flag & RX_FLAG_ICV_STRIPPED)) + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); -- GitLab From c5ab9ee144d8a3a3ec8de9b2c029e84ea221478f Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 27 Oct 2017 18:35:31 +0300 Subject: [PATCH 0849/1398] ath10k: rebuild crypto header in rx data frames commit 7eccb738fce57cbe53ed903ccf43f9ab257b15b3 upstream. Rx data frames notified through HTT_T2H_MSG_TYPE_RX_IND and HTT_T2H_MSG_TYPE_RX_FRAG_IND expect PN/TSC check to be done on host (mac80211) rather than firmware. Rebuild cipher header in every received data frames (that are notified through those HTT interfaces) from the rx_hdr_status tlv available in the rx descriptor of the first msdu. Skip setting RX_FLAG_IV_STRIPPED flag for the packets which requires mac80211 PN/TSC check support and set appropriate RX_FLAG for stripped crypto tail. Hw QCA988X, QCA9887, QCA99X0, QCA9984, QCA9888 and QCA4019 currently need the rebuilding of cipher header to perform PN/TSC check for replay attack. Please note that removing crypto tail for CCMP-256, GCMP and GCMP-256 ciphers in raw mode needs to be fixed. Since Rx with these ciphers in raw mode does not work in the current form even without this patch and removing crypto tail for these chipers needs clean up, raw mode related issues in CCMP-256, GCMP and GCMP-256 can be addressed in follow up patches. Tested-by: Manikanta Pubbisetty Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/htt_rx.c | 105 ++++++++++++++++++---- drivers/net/wireless/ath/ath10k/rx_desc.h | 3 + 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 0b4c1562420f..ba1fe61e6ea6 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -548,6 +548,11 @@ static int ath10k_htt_rx_crypto_param_len(struct ath10k *ar, return IEEE80211_TKIP_IV_LEN; case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2: return IEEE80211_CCMP_HDR_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2: + return IEEE80211_CCMP_256_HDR_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2: + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2: + return IEEE80211_GCMP_HDR_LEN; case HTT_RX_MPDU_ENCRYPT_WEP128: case HTT_RX_MPDU_ENCRYPT_WAPI: break; @@ -573,6 +578,11 @@ static int ath10k_htt_rx_crypto_tail_len(struct ath10k *ar, return IEEE80211_TKIP_ICV_LEN; case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2: return IEEE80211_CCMP_MIC_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2: + return IEEE80211_CCMP_256_MIC_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2: + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2: + return IEEE80211_GCMP_MIC_LEN; case HTT_RX_MPDU_ENCRYPT_WEP128: case HTT_RX_MPDU_ENCRYPT_WAPI: break; @@ -1024,9 +1034,21 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, hdr = (void *)msdu->data; /* Tail */ - if (status->flag & RX_FLAG_IV_STRIPPED) + if (status->flag & RX_FLAG_IV_STRIPPED) { skb_trim(msdu, msdu->len - ath10k_htt_rx_crypto_tail_len(ar, enctype)); + } else { + /* MIC */ + if ((status->flag & RX_FLAG_MIC_STRIPPED) && + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + skb_trim(msdu, msdu->len - 8); + + /* ICV */ + if (status->flag & RX_FLAG_ICV_STRIPPED && + enctype != HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + skb_trim(msdu, msdu->len - + ath10k_htt_rx_crypto_tail_len(ar, enctype)); + } /* MMIC */ if ((status->flag & RX_FLAG_MMIC_STRIPPED) && @@ -1048,7 +1070,8 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, struct sk_buff *msdu, struct ieee80211_rx_status *status, - const u8 first_hdr[64]) + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) { struct ieee80211_hdr *hdr; struct htt_rx_desc *rxd; @@ -1056,6 +1079,7 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; int l3_pad_bytes; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [nwifi 802.11 header] <-- replaced with 802.11 hdr @@ -1084,6 +1108,14 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, /* push original 802.11 header */ hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); /* original 802.11 header has a different DA and in @@ -1144,6 +1176,7 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, u8 sa[ETH_ALEN]; int l3_pad_bytes; struct htt_rx_desc *rxd; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc @@ -1172,6 +1205,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, /* push original 802.11 header */ hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); /* original 802.11 header has a different DA and in @@ -1185,12 +1226,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, struct sk_buff *msdu, struct ieee80211_rx_status *status, - const u8 first_hdr[64]) + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) { struct ieee80211_hdr *hdr; size_t hdr_len; int l3_pad_bytes; struct htt_rx_desc *rxd; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [amsdu header] <-- replaced with 802.11 hdr @@ -1206,6 +1249,14 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); } @@ -1240,13 +1291,15 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar, is_decrypted); break; case RX_MSDU_DECAP_NATIVE_WIFI: - ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr); + ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr, + enctype); break; case RX_MSDU_DECAP_ETHERNET2_DIX: ath10k_htt_rx_h_undecap_eth(ar, msdu, status, first_hdr, enctype); break; case RX_MSDU_DECAP_8023_SNAP_LLC: - ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr); + ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr, + enctype); break; } } @@ -1289,7 +1342,8 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu) static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, struct sk_buff_head *amsdu, - struct ieee80211_rx_status *status) + struct ieee80211_rx_status *status, + bool fill_crypt_header) { struct sk_buff *first; struct sk_buff *last; @@ -1299,7 +1353,6 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, enum htt_rx_mpdu_encrypt_type enctype; u8 first_hdr[64]; u8 *qos; - size_t hdr_len; bool has_fcs_err; bool has_crypto_err; bool has_tkip_err; @@ -1324,15 +1377,17 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, * decapped header. It'll be used for undecapping of each MSDU. */ hdr = (void *)rxd->rx_hdr_status; - hdr_len = ieee80211_hdrlen(hdr->frame_control); - memcpy(first_hdr, hdr, hdr_len); + memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN); /* Each A-MSDU subframe will use the original header as the base and be * reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl. */ hdr = (void *)first_hdr; - qos = ieee80211_get_qos_ctl(hdr); - qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + if (ieee80211_is_data_qos(hdr->frame_control)) { + qos = ieee80211_get_qos_ctl(hdr); + qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + } /* Some attention flags are valid only in the last MSDU. */ last = skb_peek_tail(amsdu); @@ -1379,9 +1434,14 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, status->flag |= RX_FLAG_DECRYPTED; if (likely(!is_mgmt)) - status->flag |= RX_FLAG_IV_STRIPPED | - RX_FLAG_MMIC_STRIPPED; -} + status->flag |= RX_FLAG_MMIC_STRIPPED; + + if (fill_crypt_header) + status->flag |= RX_FLAG_MIC_STRIPPED | + RX_FLAG_ICV_STRIPPED; + else + status->flag |= RX_FLAG_IV_STRIPPED; + } skb_queue_walk(amsdu, msdu) { ath10k_htt_rx_h_csum_offload(msdu); @@ -1397,6 +1457,9 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, if (is_mgmt) continue; + if (fill_crypt_header) + continue; + hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); } @@ -1407,6 +1470,9 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar, struct ieee80211_rx_status *status) { struct sk_buff *msdu; + struct sk_buff *first_subframe; + + first_subframe = skb_peek(amsdu); while ((msdu = __skb_dequeue(amsdu))) { /* Setup per-MSDU flags */ @@ -1415,6 +1481,13 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar, else status->flag |= RX_FLAG_AMSDU_MORE; + if (msdu == first_subframe) { + first_subframe = NULL; + status->flag &= ~RX_FLAG_ALLOW_SAME_PN; + } else { + status->flag |= RX_FLAG_ALLOW_SAME_PN; + } + ath10k_process_rx(ar, status, msdu); } } @@ -1557,7 +1630,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, rx_status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true); ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status); return num_msdus; @@ -1892,7 +1965,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false); ath10k_htt_rx_h_deliver(ar, &amsdu, status); break; case -EAGAIN: diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h index 034e7a54c5b2..e4878d0044bf 100644 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h @@ -239,6 +239,9 @@ enum htt_rx_mpdu_encrypt_type { HTT_RX_MPDU_ENCRYPT_WAPI = 5, HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2 = 6, HTT_RX_MPDU_ENCRYPT_NONE = 7, + HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2 = 8, + HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2 = 9, + HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2 = 10, }; #define RX_MPDU_START_INFO0_PEER_IDX_MASK 0x000007ff -- GitLab From c781e3be97a1cbeef8c853101e8f266db556b0a3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 14 Dec 2017 17:40:50 -0800 Subject: [PATCH 0850/1398] KVM: Fix stack-out-of-bounds read in write_mmio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e39d200fa5bf5b94a0948db0dae44c1b73b84a56 upstream. Reported by syzkaller: BUG: KASAN: stack-out-of-bounds in write_mmio+0x11e/0x270 [kvm] Read of size 8 at addr ffff8803259df7f8 by task syz-executor/32298 CPU: 6 PID: 32298 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #18 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 Call Trace: dump_stack+0xab/0xe1 print_address_description+0x6b/0x290 kasan_report+0x28a/0x370 write_mmio+0x11e/0x270 [kvm] emulator_read_write_onepage+0x311/0x600 [kvm] emulator_read_write+0xef/0x240 [kvm] emulator_fix_hypercall+0x105/0x150 [kvm] em_hypercall+0x2b/0x80 [kvm] x86_emulate_insn+0x2b1/0x1640 [kvm] x86_emulate_instruction+0x39a/0xb90 [kvm] handle_exception+0x1b4/0x4d0 [kvm_intel] vcpu_enter_guest+0x15a0/0x2640 [kvm] kvm_arch_vcpu_ioctl_run+0x549/0x7d0 [kvm] kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The path of patched vmmcall will patch 3 bytes opcode 0F 01 C1(vmcall) to the guest memory, however, write_mmio tracepoint always prints 8 bytes through *(u64 *)val since kvm splits the mmio access into 8 bytes. This leaks 5 bytes from the kernel stack (CVE-2017-17741). This patch fixes it by just accessing the bytes which we operate on. Before patch: syz-executor-5567 [007] .... 51370.561696: kvm_mmio: mmio write len 3 gpa 0x10 val 0x1ffff10077c1010f After patch: syz-executor-13416 [002] .... 51302.299573: kvm_mmio: mmio write len 3 gpa 0x10 val 0xc1010f Reported-by: Dmitry Vyukov Reviewed-by: Darren Kenny Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Cc: Mathieu Desnoyers Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmio.c | 6 +++--- arch/x86/kvm/x86.c | 8 ++++---- include/trace/events/kvm.h | 7 +++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index b6e715fd3c90..dac7ceb1a677 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - data); + &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), len); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); kvm_mmio_write_buf(data_buf, len, data); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } else { trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, - fault_ipa, 0); + fault_ipa, NULL); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 73304b1a03cc..d3f80cccb9aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4264,7 +4264,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4517,7 +4517,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4539,14 +4539,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 8ade3eb6c640..90fce4d6956a 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq, { KVM_TRACE_MMIO_WRITE, "write" } TRACE_EVENT(kvm_mmio, - TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( @@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio, __entry->type = type; __entry->len = len; __entry->gpa = gpa; - __entry->val = val; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); ), TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", -- GitLab From 02f201f78fb9da5d140abc17cf9b3a196b1b42dd Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Wed, 13 Dec 2017 19:52:23 +0100 Subject: [PATCH 0851/1398] can: gs_usb: fix return value of the "set_bittiming" callback commit d5b42e6607661b198d8b26a0c30969605b1bf5c7 upstream. The "set_bittiming" callback treats a positive return value as error! For that reason "can_changelink()" will quit silently after setting the bittiming values without processing ctrlmode, restart-ms, etc. Signed-off-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index eea9aea14b00..5d5012337d9e 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) -- GitLab From 30191718645d24c5b213f1ab3ae531a585a13f21 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Jan 2018 13:39:15 -0800 Subject: [PATCH 0852/1398] IB/srpt: Disable RDMA access by the initiator commit bec40c26041de61162f7be9d2ce548c756ce0f65 upstream. With the SRP protocol all RDMA operations are initiated by the target. Since no RDMA operations are initiated by the initiator, do not grant the initiator permission to submit RDMA reads or writes to the target. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index b9748970df4a..29ab814693fc 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -992,8 +992,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) return -ENOMEM; attr->qp_state = IB_QPS_INIT; - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; attr->port_num = ch->sport->port; attr->pkey_index = 0; -- GitLab From 14e1c579acba423e363d1494c3183f943b8a1e3d Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 27 Nov 2017 09:33:03 +0000 Subject: [PATCH 0853/1398] MIPS: Validate PR_SET_FP_MODE prctl(2) requests against the ABI of the task commit b67336eee3fcb8ecedc6c13e2bf88aacfa3151e2 upstream. Fix an API loophole introduced with commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS"), where the caller of prctl(2) is incorrectly allowed to make a change to CP0.Status.FR or CP0.Config5.FRE register bits even if CONFIG_MIPS_O32_FP64_SUPPORT has not been enabled, despite that an executable requesting the mode requested via ELF file annotation would not be allowed to run in the first place, or for n64 and n64 ABI tasks which do not have non-default modes defined at all. Add suitable checks to `mips_set_process_fp_mode' and bail out if an invalid mode change has been requested for the ABI in effect, even if the FPU hardware or emulation would otherwise allow it. Always succeed however without taking any further action if the mode requested is the same as one already in effect, regardless of whether any mode change, should it be requested, would actually be allowed for the task concerned. Signed-off-by: Maciej W. Rozycki Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17800/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c558bce989cd..6e716a5f1173 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -683,6 +683,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) struct task_struct *t; int max_users; + /* If nothing to change, return right away, successfully. */ + if (value == mips_get_process_fp_mode(task)) + return 0; + + /* Only accept a mode change if 64-bit FP enabled for o32. */ + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) + return -EOPNOTSUPP; + + /* And only for o32 tasks. */ + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS)) + return -EOPNOTSUPP; + /* Check the value is valid */ if (value & ~known_bits) return -EOPNOTSUPP; -- GitLab From 8eb5655aacdd9fee39a690e29252a89326c85df6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:51:35 +0000 Subject: [PATCH 0854/1398] MIPS: Factor out NT_PRFPREG regset access helpers commit a03fe72572c12e98f4173f8a535f32468e48b6ec upstream. In preparation to fix a commit 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") FCSR access regression factor out NT_PRFPREG regset access helpers for the non-MSA and the MSA variants respectively, to avoid having to deal with excessive indentation in the actual fix. No functional change, however use `target->thread.fpu.fpr[0]' rather than `target->thread.fpu.fpr[i]' for FGR holding type size determination as there's no `i' variable to refer to anymore, and for the factored out `i' variable declaration use `unsigned int' rather than `unsigned' as its type, following the common style. Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17925/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 108 +++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 11890e6e4093..c6df58e01418 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -439,25 +439,36 @@ static int gpr64_set(struct task_struct *target, #endif /* CONFIG_64BIT */ -static int fpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots + * correspond 1:1 to buffer slots. + */ +static int fpr_get_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) { - unsigned i; - int err; - u64 fpr_val; - - /* XXX fcr31 */ + return user_regset_copyout(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, sizeof(elf_fpregset_t)); +} - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's + * general register slots are copied to buffer slots. + */ +static int fpr_get_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; for (i = 0; i < NUM_FPU_REGS; i++) { fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + err = user_regset_copyout(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); if (err) @@ -467,27 +478,54 @@ static int fpr_get(struct task_struct *target, return 0; } -static int fpr_set(struct task_struct *target, +/* Copy the floating-point context to the supplied NT_PRFPREG buffer. */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + void *kbuf, void __user *ubuf) { - unsigned i; int err; - u64 fpr_val; /* XXX fcr31 */ - init_fp_ctx(target); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + + return err; +} - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP + * context's general register slots. + */ +static int fpr_set_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, sizeof(elf_fpregset_t)); +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 + * bits only of FP context's general register slots. + */ +static int fpr_set_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { - err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + for (i = 0; i < NUM_FPU_REGS && *count >= sizeof(elf_fpreg_t); i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); if (err) @@ -498,6 +536,26 @@ static int fpr_set(struct task_struct *target, return 0; } +/* Copy the supplied NT_PRFPREG buffer to the floating-point context. */ +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err; + + /* XXX fcr31 */ + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + + return err; +} + enum mips_regset { REGSET_GPR, REGSET_FPR, -- GitLab From 5b593a81fddd566aaf5f7b5f4d96ccb84b0c6148 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:52:15 +0000 Subject: [PATCH 0855/1398] MIPS: Guard against any partial write attempt with PTRACE_SETREGSET commit dc24d0edf33c3e15099688b6bbdf7bdc24bf6e91 upstream. Complement commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") and ensure that no partial register write attempt is made with PTRACE_SETREGSET, as we do not preinitialize any temporaries used to hold incoming register data and consequently random data could be written. It is the responsibility of the caller, such as `ptrace_regset', to arrange for writes to span whole registers only, so here we only assert that it has indeed happened. Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17926/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c6df58e01418..16a7809c73be 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -536,7 +536,15 @@ static int fpr_set_msa(struct task_struct *target, return 0; } -/* Copy the supplied NT_PRFPREG buffer to the floating-point context. */ +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', + * which is supposed to have been guaranteed by the kernel before + * calling us, e.g. in `ptrace_regset'. We enforce that requirement, + * so that we can safely avoid preinitializing temporaries for + * partial register writes. + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -544,6 +552,8 @@ static int fpr_set(struct task_struct *target, { int err; + BUG_ON(count % sizeof(elf_fpreg_t)); + /* XXX fcr31 */ init_fp_ctx(target); -- GitLab From f616180a8720b499ada54c830b7e028ed8a8e1c7 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:53:14 +0000 Subject: [PATCH 0856/1398] MIPS: Consistently handle buffer counter with PTRACE_SETREGSET commit 80b3ffce0196ea50068885d085ff981e4b8396f4 upstream. Update commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") bug and consistently consume all data supplied to `fpr_set_msa' with the ptrace(2) PTRACE_SETREGSET request, such that a zero data buffer counter is returned where insufficient data has been given to fill a whole number of FP general registers. In reality this is not going to happen, as the caller is supposed to only supply data covering a whole number of registers and it is verified in `ptrace_regset' and again asserted in `fpr_set', however structuring code such that the presence of trailing partial FP general register data causes `fpr_set_msa' to return with a non-zero data buffer counter makes it appear that this trailing data will be used if there are subsequent writes made to FP registers, which is going to be the case with the FCSR once the missing write to that register has been fixed. Fixes: d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17927/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 16a7809c73be..73ce5e5a989c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -524,7 +524,7 @@ static int fpr_set_msa(struct task_struct *target, int err; BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && *count >= sizeof(elf_fpreg_t); i++) { + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { err = user_regset_copyin(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); -- GitLab From cfc5c63a38ca0ae5f11d48589c4f9e348e9428aa Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:54:33 +0000 Subject: [PATCH 0857/1398] MIPS: Fix an FCSR access API regression with NT_PRFPREG and MSA commit be07a6a1188372b6d19a3307ec33211fc9c9439d upstream. Fix a commit 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") public API regression, then activated by commit 1db1af84d6df ("MIPS: Basic MSA context switching support"), that caused the FCSR register not to be read or written for CONFIG_CPU_HAS_MSA kernel configurations (regardless of actual presence or absence of the MSA feature in a given processor) with ptrace(2) PTRACE_GETREGSET and PTRACE_SETREGSET requests nor recorded in core dumps. This is because with !CONFIG_CPU_HAS_MSA configurations the whole of `elf_fpregset_t' array is bulk-copied as it is, which includes the FCSR in one half of the last, 33rd slot, whereas with CONFIG_CPU_HAS_MSA configurations array elements are copied individually, and then only the leading 32 FGR slots while the remaining slot is ignored. Correct the code then such that only FGR slots are copied in the respective !MSA and MSA helpers an then the FCSR slot is handled separately in common code. Use `ptrace_setfcr31' to update the FCSR too, so that the read-only mask is respected. Retrieving a correct value of FCSR is important in debugging not only for the human to be able to get the right interpretation of the situation, but for correct operation of GDB as well. This is because the condition code bits in FSCR are used by GDB to determine the location to place a breakpoint at when single-stepping through an FPU branch instruction. If such a breakpoint is placed incorrectly (i.e. with the condition reversed), then it will be missed, likely causing the debuggee to run away from the control of GDB and consequently breaking the process of investigation. Fortunately GDB continues using the older PTRACE_GETFPREGS ptrace(2) request which is unaffected, so the regression only really hits with post-mortem debug sessions using a core dump file, in which case execution, and consequently single-stepping through branches is not possible. Of course core files created by buggy kernels out there will have the value of FCSR recorded clobbered, but such core files cannot be corrected and the person using them simply will have to be aware that the value of FCSR retrieved is not reliable. Which also means we can likely get away without defining a replacement API which would ensure a correct value of FSCR to be retrieved, or none at all. This is based on previous work by Alex Smith, extensively rewritten. Signed-off-by: Alex Smith Signed-off-by: James Hogan Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: Paul Burton Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17928/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 47 ++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 73ce5e5a989c..800342a16b12 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -442,7 +442,7 @@ static int gpr64_set(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer, * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots - * correspond 1:1 to buffer slots. + * correspond 1:1 to buffer slots. Only general registers are copied. */ static int fpr_get_fpa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -450,13 +450,14 @@ static int fpr_get_fpa(struct task_struct *target, { return user_regset_copyout(pos, count, kbuf, ubuf, &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* * Copy the floating-point context to the supplied NT_PRFPREG buffer, * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's - * general register slots are copied to buffer slots. + * general register slots are copied to buffer slots. Only general + * registers are copied. */ static int fpr_get_msa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -478,20 +479,29 @@ static int fpr_get_msa(struct task_struct *target, return 0; } -/* Copy the floating-point context to the supplied NT_PRFPREG buffer. */ +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); int err; - /* XXX fcr31 */ - if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); else err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); return err; } @@ -499,7 +509,7 @@ static int fpr_get(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context, * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP - * context's general register slots. + * context's general register slots. Only general registers are copied. */ static int fpr_set_fpa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -507,13 +517,14 @@ static int fpr_set_fpa(struct task_struct *target, { return user_regset_copyin(pos, count, kbuf, ubuf, &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* * Copy the supplied NT_PRFPREG buffer to the floating-point context, * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 - * bits only of FP context's general register slots. + * bits only of FP context's general register slots. Only general + * registers are copied. */ static int fpr_set_msa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -538,6 +549,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -550,18 +563,30 @@ static int fpr_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + u32 fcr31; int err; BUG_ON(count % sizeof(elf_fpreg_t)); - /* XXX fcr31 */ - init_fp_ctx(target); if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); else err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + if (count > 0) { + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + ptrace_setfcr31(target, fcr31); + } return err; } -- GitLab From 1f4cff1c364ba4d1b197af88fca63b8736a9d601 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:55:40 +0000 Subject: [PATCH 0858/1398] MIPS: Also verify sizeof `elf_fpreg_t' with PTRACE_SETREGSET commit 006501e039eec411842bb3150c41358867d320c2 upstream. Complement commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") and like with the PTRACE_GETREGSET ptrace(2) request also apply a BUILD_BUG_ON check for the size of the `elf_fpreg_t' type in the PTRACE_SETREGSET request handler. Signed-off-by: Maciej W. Rozycki Fixes: d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17929/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 800342a16b12..99bc1ce2f40a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -467,6 +467,7 @@ static int fpr_get_msa(struct task_struct *target, u64 fpr_val; int err; + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); for (i = 0; i < NUM_FPU_REGS; i++) { fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); err = user_regset_copyout(pos, count, kbuf, ubuf, -- GitLab From 78c00f597ba8211007420afdac5f99e56f9c0e2f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:56:54 +0000 Subject: [PATCH 0859/1398] MIPS: Disallow outsized PTRACE_SETREGSET NT_PRFPREG regset accesses commit c8c5a3a24d395b14447a9a89d61586a913840a3b upstream. Complement commit c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump register layout") and also reject outsized PTRACE_SETREGSET requests to the NT_PRFPREG regset, like with the NT_PRSTATUS regset. Signed-off-by: Maciej W. Rozycki Fixes: c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump register layout") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17930/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 99bc1ce2f40a..0c8ae2cc6380 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -570,6 +570,9 @@ static int fpr_set(struct task_struct *target, BUG_ON(count % sizeof(elf_fpreg_t)); + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; + init_fp_ctx(target); if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) -- GitLab From 491c0ca3dbd5f6f9a1c3aa67b8c3b3c9610c489e Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 3 Jan 2018 14:31:38 -0800 Subject: [PATCH 0860/1398] kvm: vmx: Scrub hardware GPRs at VM-exit commit 0cb5b30698fdc8f6b4646012e3acb4ddce430788 upstream. Guest GPR values are live in the hardware GPRs at VM-exit. Do not leave any guest values in hardware GPRs after the guest GPR values are saved to the vcpu_vmx structure. This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753. Specifically, it defeats the Project Zero PoC for CVE 2017-5715. Suggested-by: Eric Northup Signed-off-by: Jim Mattson Reviewed-by: Eric Northup Reviewed-by: Benjamin Serebrin Reviewed-by: Andrew Honig [Paolo: Add AMD bits, Signed-off-by: Tom Lendacky ] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 19 +++++++++++++++++++ arch/x86/kvm/vmx.c | 14 +++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8148d8ca7930..8d96f9ce1926 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4868,6 +4868,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" +#endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" #endif "pop %%" _ASM_BP : diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 263e56059fd5..d8582902a783 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8948,6 +8948,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -8964,12 +8965,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" -- GitLab From 64ab063b7193dd8f41f54751d9612c5b00735395 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 12:49:29 +0100 Subject: [PATCH 0861/1398] platform/x86: wmi: Call acpi_wmi_init() later commit 98b8e4e5c17bf87c1b18ed929472051dab39878c upstream. Calling acpi_wmi_init() at the subsys_initcall() level causes ordering issues to appear on some systems and they are difficult to reproduce, because there is no guaranteed ordering between subsys_initcall() calls, so they may occur in different orders on different systems. In particular, commit 86d9f48534e8 (mm/slab: fix kmemcg cache creation delayed issue) exposed one of these issues where genl_init() and acpi_wmi_init() are both called at the same initcall level, but the former must run before the latter so as to avoid a NULL pointer dereference. For this reason, move the acpi_wmi_init() invocation to the initcall_sync level which should still be early enough for things to work correctly in the WMI land. Link: https://marc.info/?t=151274596700002&r=1&w=2 Reported-by: Jonathan McDowell Reported-by: Joonsoo Kim Tested-by: Jonathan McDowell Signed-off-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index ceeb8c188ef3..00d82e8443bd 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -848,5 +848,5 @@ static void __exit acpi_wmi_exit(void) pr_info("Mapper unloaded\n"); } -subsys_initcall(acpi_wmi_init); +subsys_initcall_sync(acpi_wmi_init); module_exit(acpi_wmi_exit); -- GitLab From 0199927a8e51126ff420efc2155ad196a853678b Mon Sep 17 00:00:00 2001 From: Vikas C Sajjan Date: Thu, 16 Nov 2017 21:43:44 +0530 Subject: [PATCH 0862/1398] x86/acpi: Handle SCI interrupts above legacy space gracefully commit 252714155f04c5d16989cb3aadb85fd1b5772f99 upstream. Platforms which support only IOAPIC mode, pass the SCI information above the legacy space (0-15) via the FADT mechanism and not via MADT. In such cases mp_override_legacy_irq() which is invoked from acpi_sci_ioapic_setup() to register SCI interrupts fails for interrupts greater equal 16, since it is meant to handle only the legacy space and emits error "Invalid bus_irq %u for legacy override". Add a new function to handle SCI interrupts >= 16 and invoke it conditionally in acpi_sci_ioapic_setup(). The code duplication due to this new function will be cleaned up in a separate patch. Co-developed-by: Sunil V L Signed-off-by: Vikas C Sajjan Signed-off-by: Sunil V L Signed-off-by: Thomas Gleixner Tested-by: Abdul Lateef Attar Acked-by: Rafael J. Wysocki Cc: linux-pm@vger.kernel.org Cc: kkamagui@gmail.com Cc: linux-acpi@vger.kernel.org Link: https://lkml.kernel.org/r/1510848825-21965-2-git-send-email-vikas.cha.sajjan@hpe.com Cc: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 11cc600f4df0..928259488bc3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -422,6 +422,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi) +{ + struct mpc_intsrc mp_irq; + int ioapic, pin; + + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + pr_warn("Failed to find ioapic for gsi : %u\n", gsi); + return ioapic; + } + + pin = mp_find_ioapic_pin(ioapic, gsi); + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = pin; + + mp_save_irq(&mp_irq); + + return 0; +} + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -466,7 +494,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + if (bus_irq < NR_IRQS_LEGACY) + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + else + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* -- GitLab From 83da0245eda22dd99a44f2f127cca86d06479edd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 1 Jan 2018 09:50:50 +0100 Subject: [PATCH 0863/1398] ALSA: pcm: Remove incorrect snd_BUG_ON() usages commit fe08f34d066f4404934a509b6806db1a4f700c86 upstream. syzkaller triggered kernel warnings through PCM OSS emulation at closing a stream: WARNING: CPU: 0 PID: 3502 at sound/core/pcm_lib.c:1635 snd_pcm_hw_param_first+0x289/0x690 sound/core/pcm_lib.c:1635 Call Trace: .... snd_pcm_hw_param_near.constprop.27+0x78d/0x9a0 sound/core/oss/pcm_oss.c:457 snd_pcm_oss_change_params+0x17d3/0x3720 sound/core/oss/pcm_oss.c:969 snd_pcm_oss_make_ready+0xaa/0x130 sound/core/oss/pcm_oss.c:1128 snd_pcm_oss_sync+0x257/0x830 sound/core/oss/pcm_oss.c:1638 snd_pcm_oss_release+0x20b/0x280 sound/core/oss/pcm_oss.c:2431 __fput+0x327/0x7e0 fs/file_table.c:210 .... This happens while it tries to open and set up the aloop device concurrently. The warning above (invoked from snd_BUG_ON() macro) is to detect the unexpected logical error where snd_pcm_hw_refine() call shouldn't fail. The theory is true for the case where the hw_params config rules are static. But for an aloop device, the hw_params rule condition does vary dynamically depending on the connected target; when another device is opened and changes the parameters, the device connected in another side is also affected, and it caused the error from snd_pcm_hw_refine(). That is, the simplest "solution" for this is to remove the incorrect assumption of static rules, and treat such an error as a normal error path. As there are a couple of other places using snd_BUG_ON() incorrectly, this patch removes these spurious snd_BUG_ON() calls. Reported-by: syzbot+6f11c7e2a1b91d466432@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 1 - sound/core/pcm_lib.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index ebc9fdfe64df..2167363c18b3 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -466,7 +466,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, v = snd_pcm_hw_param_last(pcm, params, var, dir); else v = snd_pcm_hw_param_first(pcm, params, var, dir); - snd_BUG_ON(v < 0); return v; } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index c80d80e312e3..e685e779a4b8 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1664,7 +1664,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); @@ -1711,7 +1711,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); -- GitLab From 8e81425e80c90667464540694967337e318e545b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jan 2018 16:39:27 +0100 Subject: [PATCH 0864/1398] ALSA: pcm: Add missing error checks in OSS emulation plugin builder commit 6708913750344a900f2e73bfe4a4d6dbbce4fe8d upstream. In the OSS emulation plugin builder where the frame size is parsed in the plugin chain, some places miss the possible errors returned from the plugin src_ or dst_frames callback. This patch papers over such places. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_plugin.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 727ac44d39f4..a84a1d3d23e5 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st snd_pcm_sframes_t frames = size; plugin = snd_pcm_plug_first(plug); - while (plugin && frames > 0) { + while (plugin) { + if (frames <= 0) + return frames; if ((next = plugin->next) != NULL) { snd_pcm_sframes_t frames1 = frames; - if (plugin->dst_frames) + if (plugin->dst_frames) { frames1 = plugin->dst_frames(plugin, frames); + if (frames1 <= 0) + return frames1; + } if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) { return err; } if (err != frames1) { frames = err; - if (plugin->src_frames) + if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames1); + if (frames <= 0) + return frames; + } } } else dst_channels = NULL; -- GitLab From 3a00564cb49f17e5ca2a1d6e34c7ea7b1113f49e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Jan 2018 13:58:31 +0100 Subject: [PATCH 0865/1398] ALSA: pcm: Abort properly at pending signal in OSS read/write loops commit 29159a4ed7044c52e3e2cf1a9fb55cec4745c60b upstream. The loops for read and write in PCM OSS emulation have no proper check of pending signals, and they keep processing even after user tries to break. This results in a very long delay, often seen as RCU stall when a huge unprocessed bytes remain queued. The bug could be easily triggered by syzkaller. As a simple workaround, this patch adds the proper check of pending signals and aborts the loop appropriately. Reported-by: syzbot+993cb4cfcbbff3947c21@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 2167363c18b3..f82f4455cd77 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1416,6 +1416,10 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha tmp != runtime->oss.period_bytes) break; } + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + goto err; + } } mutex_unlock(&runtime->oss.params_lock); return xfer; @@ -1501,6 +1505,10 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + goto err; + } } mutex_unlock(&runtime->oss.params_lock); return xfer; -- GitLab From bee3f2d5c02aed452542fae2bb980f555dd887ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Jan 2018 14:03:53 +0100 Subject: [PATCH 0866/1398] ALSA: pcm: Allow aborting mutex lock at OSS read/write loops commit 900498a34a3ac9c611e9b425094c8106bdd7dc1c upstream. PCM OSS read/write loops keep taking the mutex lock for the whole read/write, and this might take very long when the exceptionally high amount of data is given. Also, since it invokes with mutex_lock(), the concurrent read/write becomes unbreakable. This patch tries to address these issues by replacing mutex_lock() with mutex_lock_interruptible(), and also splits / re-takes the lock at each read/write period chunk, so that it can switch the context more finely if requested. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index f82f4455cd77..3321348fd86b 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1369,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) @@ -1414,18 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha xfer += tmp; if ((substream->f_flags & O_NONBLOCK) != 0 && tmp != runtime->oss.period_bytes) - break; + tmp = -EAGAIN; } + err: + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; if (signal_pending(current)) { tmp = -ERESTARTSYS; - goto err; + break; } + tmp = 0; } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - - err: - mutex_unlock(&runtime->oss.params_lock); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1473,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); @@ -1505,16 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } + err: + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; if (signal_pending(current)) { tmp = -ERESTARTSYS; - goto err; + break; } + tmp = 0; } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - - err: - mutex_unlock(&runtime->oss.params_lock); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } -- GitLab From 01046dd834ac5ecf3c57dc34b60de127e7ac2d29 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jan 2018 16:09:47 +0100 Subject: [PATCH 0867/1398] ALSA: aloop: Release cable upon open error path commit 9685347aa0a5c2869058ca6ab79fd8e93084a67f upstream. The aloop runtime object and its assignment in the cable are left even when opening a substream fails. This doesn't mean any memory leak, but it still keeps the invalid pointer that may be referred by the another side of the cable spontaneously, which is a potential Oops cause. Clean up the cable assignment and the empty cable upon the error path properly. Fixes: 597603d615d2 ("ALSA: introduce the snd-aloop module for the PCM loopback") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 54f348a4fb78..2adc88d6d507 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -658,12 +658,31 @@ static int rule_channels(struct snd_pcm_hw_params *params, return snd_interval_refine(hw_param_interval(params, rule->var), &t); } +static void free_cable(struct snd_pcm_substream *substream) +{ + struct loopback *loopback = substream->private_data; + int dev = get_cable_index(substream); + struct loopback_cable *cable; + + cable = loopback->cables[substream->number][dev]; + if (!cable) + return; + if (cable->streams[!substream->stream]) { + /* other stream is still alive */ + cable->streams[substream->stream] = NULL; + } else { + /* free the cable */ + loopback->cables[substream->number][dev] = NULL; + kfree(cable); + } +} + static int loopback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm; - struct loopback_cable *cable; + struct loopback_cable *cable = NULL; int err = 0; int dev = get_cable_index(substream); @@ -682,7 +701,6 @@ static int loopback_open(struct snd_pcm_substream *substream) if (!cable) { cable = kzalloc(sizeof(*cable), GFP_KERNEL); if (!cable) { - kfree(dpcm); err = -ENOMEM; goto unlock; } @@ -724,6 +742,10 @@ static int loopback_open(struct snd_pcm_substream *substream) else runtime->hw = cable->hw; unlock: + if (err < 0) { + free_cable(substream); + kfree(dpcm); + } mutex_unlock(&loopback->cable_lock); return err; } @@ -732,20 +754,10 @@ static int loopback_close(struct snd_pcm_substream *substream) { struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; - struct loopback_cable *cable; - int dev = get_cable_index(substream); loopback_timer_stop(dpcm); mutex_lock(&loopback->cable_lock); - cable = loopback->cables[substream->number][dev]; - if (cable->streams[!substream->stream]) { - /* other stream is still alive */ - cable->streams[substream->stream] = NULL; - } else { - /* free the cable */ - loopback->cables[substream->number][dev] = NULL; - kfree(cable); - } + free_cable(substream); mutex_unlock(&loopback->cable_lock); return 0; } -- GitLab From 5af666d0ddb7dcb28acc822816fa11d8c55fe860 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jan 2018 16:15:33 +0100 Subject: [PATCH 0868/1398] ALSA: aloop: Fix inconsistent format due to incomplete rule commit b088b53e20c7d09b5ab84c5688e609f478e5c417 upstream. The extra hw constraint rule for the formats the aloop driver introduced has a slight flaw, where it doesn't return a positive value when the mask got changed. It came from the fact that it's basically a copy&paste from snd_hw_constraint_mask64(). The original code is supposed to be a single-shot and it modifies the mask bits only once and never after, while what we need for aloop is the dynamic hw rule that limits the mask bits. This difference results in the inconsistent state, as the hw_refine doesn't apply the dependencies fully. The worse and surprisingly result is that it causes a crash in OSS emulation when multiple full-duplex reads/writes are performed concurrently (I leave why it triggers Oops to readers as a homework). For fixing this, replace a few open-codes with the standard snd_mask_*() macros. Reported-by: syzbot+3902b5220e8ca27889ca@syzkaller.appspotmail.com Fixes: b1c73fc8e697 ("ALSA: snd-aloop: Fix hw_params restrictions and checking") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 2adc88d6d507..59e4a88757b1 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -622,14 +623,12 @@ static int rule_format(struct snd_pcm_hw_params *params, { struct snd_pcm_hardware *hw = rule->private; - struct snd_mask *maskp = hw_param_mask(params, rule->var); + struct snd_mask m; - maskp->bits[0] &= (u_int32_t)hw->formats; - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32); - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */ - if (! maskp->bits[0] && ! maskp->bits[1]) - return -EINVAL; - return 0; + snd_mask_none(&m); + m.bits[0] = (u_int32_t)hw->formats; + m.bits[1] = (u_int32_t)(hw->formats >> 32); + return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, -- GitLab From 43ff00f873773afc676d362e3c1d941f16d569af Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jan 2018 17:38:54 +0100 Subject: [PATCH 0869/1398] ALSA: aloop: Fix racy hw constraints adjustment commit 898dfe4687f460ba337a01c11549f87269a13fa2 upstream. The aloop driver tries to update the hw constraints of the connected target on the cable of the opened PCM substream. This is done by adding the extra hw constraints rules referring to the substream runtime->hw fields, while the other substream may update the runtime hw of another side on the fly. This is, however, racy and may result in the inconsistent values when both PCM streams perform the prepare concurrently. One of the reason is that it overwrites the other's runtime->hw field; which is not only racy but also broken when it's called before the open of another side finishes. And, since the reference to runtime->hw isn't protected, the concurrent write may give the partial value update and become inconsistent. This patch is an attempt to fix and clean up: - The prepare doesn't change the runtime->hw of other side any longer, but only update the cable->hw that is referred commonly. - The extra rules refer to the loopback_pcm object instead of the runtime->hw. The actual hw is deduced from cable->hw. - The extra rules take the cable_lock to protect against the race. Fixes: b1c73fc8e697 ("ALSA: snd-aloop: Fix hw_params restrictions and checking") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 51 ++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 59e4a88757b1..cbd20cb8ca11 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -306,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd) return 0; } -static void params_change_substream(struct loopback_pcm *dpcm, - struct snd_pcm_runtime *runtime) -{ - struct snd_pcm_runtime *dst_runtime; - - if (dpcm == NULL || dpcm->substream == NULL) - return; - dst_runtime = dpcm->substream->runtime; - if (dst_runtime == NULL) - return; - dst_runtime->hw = dpcm->cable->hw; -} - static void params_change(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -330,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream) cable->hw.rate_max = runtime->rate; cable->hw.channels_min = runtime->channels; cable->hw.channels_max = runtime->channels; - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK], - runtime); - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE], - runtime); } static int loopback_prepare(struct snd_pcm_substream *substream) @@ -621,24 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream) static int rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_mask m; snd_mask_none(&m); - m.bits[0] = (u_int32_t)hw->formats; - m.bits[1] = (u_int32_t)(hw->formats >> 32); + mutex_lock(&dpcm->loopback->cable_lock); + m.bits[0] = (u_int32_t)cable->hw.formats; + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32); + mutex_unlock(&dpcm->loopback->cable_lock); return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->rate_min; - t.max = hw->rate_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.rate_min; + t.max = cable->hw.rate_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -647,11 +635,14 @@ static int rule_rate(struct snd_pcm_hw_params *params, static int rule_channels(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->channels_min; - t.max = hw->channels_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.channels_min; + t.max = cable->hw.channels_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -717,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream) /* are cached -> they do not reflect the actual state */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, - rule_format, &runtime->hw, + rule_format, dpcm, SNDRV_PCM_HW_PARAM_FORMAT, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - rule_rate, &runtime->hw, + rule_rate, dpcm, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, - rule_channels, &runtime->hw, + rule_channels, dpcm, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) goto unlock; -- GitLab From 5c1b80f674e96708ca9bc7169ca07399a1a2e247 Mon Sep 17 00:00:00 2001 From: Vikas C Sajjan Date: Thu, 16 Nov 2017 21:43:45 +0530 Subject: [PATCH 0870/1398] x86/acpi: Reduce code duplication in mp_override_legacy_irq() commit 4ee2ec1b122599f7b10c849fa7915cebb37b7edb upstream. The new function mp_register_ioapic_irq() is a subset of the code in mp_override_legacy_irq(). Replace the code duplication by invoking mp_register_ioapic_irq() from mp_override_legacy_irq(). Signed-off-by: Vikas C Sajjan Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: linux-pm@vger.kernel.org Cc: kkamagui@gmail.com Cc: linux-acpi@vger.kernel.org Link: https://lkml.kernel.org/r/1510848825-21965-3-git-send-email-vikas.cha.sajjan@hpe.com Cc: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 928259488bc3..0a1e8a67cc99 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -335,13 +335,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi); + static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - /* * Check bus_irq boundary. */ @@ -350,14 +349,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, return; } - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - /* * TBD: This check is for faulty timer entries, where the override * erroneously sets the trigger to level, resulting in a HUGE @@ -366,16 +357,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0) + return; /* * Reset default identity mapping if gsi is also an legacy IRQ, * otherwise there will be more than one entry with the same GSI -- GitLab From 1ecdfc1ee99d296a0bd69836a343f6e601190a75 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Mon, 27 Feb 2017 14:26:53 -0800 Subject: [PATCH 0871/1398] zswap: don't param_set_charp while holding spinlock commit fd5bb66cd934987e49557455b6497fc006521940 upstream. Change the zpool/compressor param callback function to release the zswap_pools_lock spinlock before calling param_set_charp, since that function may sleep when it calls kmalloc with GFP_KERNEL. While this problem has existed for a while, I wasn't able to trigger it using a tight loop changing either/both the zpool and compressor params; I think it's very unlikely to be an issue on the stable kernels, especially since most zswap users will change the compressor and/or zpool from sysfs only one time each boot - or zero times, if they add the params to the kernel boot. Fixes: c99b42c3529e ("zswap: use charp for zswap param strings") Link: http://lkml.kernel.org/r/20170126155821.4545-1-ddstreet@ieee.org Signed-off-by: Dan Streetman Reported-by: Sergey Senozhatsky Cc: Michal Hocko Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index dbef27822a98..ded051e3433d 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -752,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, pool = zswap_pool_find_get(type, compressor); if (pool) { zswap_pool_debug("using existing", pool); + WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); - } else { - spin_unlock(&zswap_pools_lock); - pool = zswap_pool_create(type, compressor); - spin_lock(&zswap_pools_lock); } + spin_unlock(&zswap_pools_lock); + + if (!pool) + pool = zswap_pool_create(type, compressor); + if (pool) ret = param_set_charp(s, kp); else ret = -EINVAL; + spin_lock(&zswap_pools_lock); + if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); -- GitLab From 542bcc549379e43c1de75a510ea4eb8b9badd918 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:24 -0700 Subject: [PATCH 0872/1398] lan78xx: use skb_cow_head() to deal with cloned skbs commit d4ca73591916b760478d2b04334d5dcadc028e9c upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Eric Dumazet Cc: James Hughes Cc: Woojung Huh Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f33460cec79f..9c257ffedb15 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2419,14 +2419,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } if (lan78xx_linearize(skb) < 0) -- GitLab From ab4fd7a2ddc5d558b616cf09ff6fb5de1cafb7e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:23 -0700 Subject: [PATCH 0873/1398] sr9700: use skb_cow_head() to deal with cloned skbs commit d532c1082f68176363ed766d09bf187616e282fe upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: c9b37458e956 ("USB2NET : SR9700 : One chip USB 1.1 USB2NET SR9700Device Driver Support") Signed-off-by: Eric Dumazet Cc: James Hughes Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/sr9700.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 4a1e9c489f1f..aadfe1d1c37e 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb, len = skb->len; - if (skb_headroom(skb) < SR_TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SR_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } __skb_push(skb, SR_TX_OVERHEAD); -- GitLab From 7c5015409befbaa7521cea61b4759e1dfb686cd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:21 -0700 Subject: [PATCH 0874/1398] smsc75xx: use skb_cow_head() to deal with cloned skbs commit b7c6d2675899cfff0180412c63fc9cbd5bacdb4d upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver") Signed-off-by: Eric Dumazet Cc: James Hughes Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc75xx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 9af9799935db..4cb9b11a545a 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2205,13 +2205,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) { - struct sk_buff *skb2 = - skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS; -- GitLab From 135f98084eacc6c359b35899efb50363a5d3269a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:22 -0700 Subject: [PATCH 0875/1398] cx82310_eth: use skb_cow_head() to deal with cloned skbs commit a9e840a2081ed28c2b7caa6a9a0041c950b3c37d upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: cc28a20e77b2 ("introduce cx82310_eth: Conexant CX82310-based ADSL router USB ethernet driver") Signed-off-by: Eric Dumazet Cc: James Hughes Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cx82310_eth.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index e221bfcee76b..947bea81d924 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb, { int len = skb->len; - if (skb_headroom(skb) < 2) { - struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags); + if (skb_cow_head(skb, 2)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } skb_push(skb, 2); -- GitLab From 66bb6c2c4445d39f4086637d425da77dcc3364ae Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 11 Jan 2018 17:01:36 +0000 Subject: [PATCH 0876/1398] xhci: Fix ring leak in failure path of xhci_alloc_virt_device() This is a stable-only fix for the backport of commit 5d9b70f7d52e ("xhci: Don't add a virt_dev to the devs array before it's fully allocated"). In branches that predate commit c5628a2af83a ("xhci: remove endpoint ring cache") there is an additional failure path in xhci_alloc_virt_device() where ring cache allocation fails, in which case we need to free the ring allocated for endpoint 0. Signed-off-by: Ben Hutchings Cc: Mathias Nyman --- drivers/usb/host/xhci-mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 82eea55a7b5c..3b7d69ca83be 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1086,7 +1086,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, return 1; fail: - + if (dev->eps[0].ring) + xhci_ring_free(xhci, dev->eps[0].ring); if (dev->in_ctx) xhci_free_container_ctx(xhci, dev->in_ctx); if (dev->out_ctx) -- GitLab From fe71f34fbf83d0de41e5725cc4988e238d452d11 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 9 Jan 2018 13:40:41 -0800 Subject: [PATCH 0877/1398] 8021q: fix a memory leak for VLAN 0 device [ Upstream commit 78bbb15f2239bc8e663aa20bbe1987c91a0b75f6 ] A vlan device with vid 0 is allow to creat by not able to be fully cleaned up by unregister_vlan_dev() which checks for vlan_id!=0. Also, VLAN 0 is probably not a valid number and it is kinda "reserved" for HW accelerating devices, but it is probably too late to reject it from creation even if makes sense. Instead, just remove the check in unregister_vlan_dev(). Reported-by: Dmitry Vyukov Fixes: ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") Cc: Vlad Yasevich Cc: Ben Hutchings Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4a47074d1d7f..c8ea3cf9db85 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); -- GitLab From ca5681b723d39e85cbb2f05523a08ef2ee3a8086 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Mon, 25 Dec 2017 10:43:49 +0800 Subject: [PATCH 0878/1398] ip6_tunnel: disable dst caching if tunnel is dual-stack [ Upstream commit 23263ec86a5f44312d2899323872468752324107 ] When an ip6_tunnel is in mode 'any', where the transport layer protocol can be either 4 or 41, dst_cache must be disabled. This is because xfrm policies might apply to only one of the two protocols. Caching dst would cause xfrm policies for one protocol incorrectly used for the other. Signed-off-by: Eli Cooper Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 11d22d642488..131e6aa954bc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1080,10 +1080,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } -- GitLab From 61196a67cac4faa27d5e8f765b468bc25822f84d Mon Sep 17 00:00:00 2001 From: Andrii Vladyka Date: Thu, 4 Jan 2018 13:09:17 +0200 Subject: [PATCH 0879/1398] net: core: fix module type in sock_diag_bind [ Upstream commit b8fd0823e0770c2d5fdbd865bccf0d5e058e5287 ] Use AF_INET6 instead of AF_INET in IPv6-related code path Signed-off-by: Andrii Vladyka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 6b10573cc9fa..d1d9faf3046b 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -295,7 +295,7 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + NETLINK_SOCK_DIAG, AF_INET6); break; } return 0; -- GitLab From cebb382931c4fb341162eae80b696650260e4a2b Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 2 Jan 2018 19:44:34 +0000 Subject: [PATCH 0880/1398] RDS: Heap OOB write in rds_message_alloc_sgs() [ Upstream commit c095508770aebf1b9218e77026e48345d719b17c ] When args->nr_local is 0, nr_pages gets also 0 due some size calculation via rds_rm_size(), which is later used to allocate pages for DMA, this bug produces a heap Out-Of-Bound write access to a specific memory region. Signed-off-by: Mohamed Ghannam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index de8496e60735..4880f9a2d356 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -524,6 +524,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], -- GitLab From ce31b6ac1111096ae9bb0b45f4ba564a909bb366 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Wed, 3 Jan 2018 21:06:06 +0000 Subject: [PATCH 0881/1398] RDS: null pointer dereference in rds_atomic_free_op [ Upstream commit 7d11f77f84b27cef452cee332f4e469503084737 ] set rm->atomic.op_active to 0 when rds_pin_pages() fails or the user supplied address is invalid, this prevents a NULL pointer usage in rds_atomic_free_op() Signed-off-by: Mohamed Ghannam Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4880f9a2d356..f6027f41cd34 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -876,6 +876,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; -- GitLab From eb2f80e099d47a7b6e0d3acc87b59feaa0fa7364 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 3 Jan 2018 20:09:49 +0300 Subject: [PATCH 0882/1398] sh_eth: fix TSU resource handling [ Upstream commit dfe8266b8dd10e12a731c985b725fcf7f0e537f0 ] When switching the driver to the managed device API, I managed to break the case of a dual Ether devices sharing a single TSU: the 2nd Ether port wouldn't probe. Iwamatsu-san has tried to fix this but his patch was buggy and he then dropped the ball... The solution is to limit calling devm_request_mem_region() to the first of the two ports sharing the same TSU, so devm_ioremap_resource() can't be used anymore for the TSU resource... Fixes: d5e07e69218f ("sh_eth: use managed device API") Reported-by: Nobuhiro Iwamatsu Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 2140dedab712..dc39958537c7 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3087,10 +3087,29 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; -- GitLab From 7f4226ffcba0afe7a352e6fed7f095eb9730584e Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 4 Jan 2018 21:06:49 +0300 Subject: [PATCH 0883/1398] sh_eth: fix SH7757 GEther initialization [ Upstream commit 5133550296d43236439494aa955bfb765a89f615 ] Renesas SH7757 has 2 Fast and 2 Gigabit Ether controllers, while the 'sh_eth' driver can only reset and initialize TSU of the first controller pair. Shimoda-san tried to solve that adding the 'needs_init' member to the 'struct sh_eth_plat_data', however the platform code still never sets this flag. I think that we can infer this information from the 'devno' variable (set to 'platform_device::id') and reset/init the Ether controller pair only for an even 'devno'; therefore 'sh_eth_plat_data::needs_init' can be removed... Fixes: 150647fb2c31 ("net: sh_eth: change the condition of initialization") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- include/linux/sh_eth.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index dc39958537c7..b6816ae00b7a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3116,8 +3116,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index f2e27e078362..01b3778ba6da 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -16,7 +16,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif -- GitLab From 6f237183c7cad875e5ddc7c32641094117b036c9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 3 Jan 2018 16:46:29 +0100 Subject: [PATCH 0884/1398] net: stmmac: enable EEE in MII, GMII or RGMII only [ Upstream commit 879626e3a52630316d817cbda7cec9a5446d1d82 ] Note in the databook - Section 4.4 - EEE : " The EEE feature is not supported when the MAC is configured to use the TBI, RTBI, SMII, RMII or SGMII single PHY interface. Even if the MAC supports multiple PHY interfaces, you should activate the EEE mode only when the MAC is operating with GMII, MII, or RGMII interface." Applying this restriction solves a stability issue observed on Amlogic gxl platforms operating with RMII interface and the internal PHY. Fixes: 83bf79b6bb64 ("stmmac: disable at run-time the EEE if not supported") Signed-off-by: Jerome Brunet Tested-by: Arnaud Patard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++++ include/linux/phy.h | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index adf61a7b1b01..98bbb91336e4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -280,8 +280,14 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) bool stmmac_eee_init(struct stmmac_priv *priv) { unsigned long flags; + int interface = priv->plat->interface; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ diff --git a/include/linux/phy.h b/include/linux/phy.h index a04d69ab7c34..867110c9d707 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -683,6 +683,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_mode_is_rgmii - Convenience function for testing if a + * PHY interface mode is RGMII (all variants) + * @mode: the phy_interface_t enum + */ +static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) +{ + return mode >= PHY_INTERFACE_MODE_RGMII && + mode <= PHY_INTERFACE_MODE_RGMII_TXID; +}; + /** * phy_interface_is_rgmii - Convenience function for testing if a PHY interface * is RGMII (all variants) -- GitLab From dde00c92245d4f002a7c9adbea7639c74dc656e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jan 2018 03:45:49 -0800 Subject: [PATCH 0885/1398] ipv6: fix possible mem leaks in ipv6_make_skb() [ Upstream commit 862c03ee1deb7e19e0f9931682e0294ecd1fcaf9 ] ip6_setup_cork() might return an error, while memory allocations have been done and must be rolled back. Fixes: 6422398c2ab0 ("ipv6: introduce ipv6_make_skb") Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Reported-by: Mike Maloney Acked-by: Mike Maloney Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 506efba33a89..388584b8ff31 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1800,9 +1800,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); - if (err) + if (err) { + ip6_cork_release(&cork, &v6_cork); return ERR_PTR(err); - + } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; -- GitLab From 16d5b481d098c9b9ceba5b4c897a0df9c97b34cf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Dec 2017 10:02:52 -0800 Subject: [PATCH 0886/1398] ethtool: do not print warning for applications using legacy API [ Upstream commit 71891e2dab6b55a870f8f7735e44a2963860b5c6 ] In kernel log ths message appears on every boot: "warning: `NetworkChangeNo' uses legacy ethtool link settings API, link modes are only partially reported" When ethtool link settings API changed, it started complaining about usages of old API. Ironically, the original patch was from google but the application using the legacy API is chrome. Linux ABI is fixed as much as possible. The kernel must not break it and should not complain about applications using legacy API's. This patch just removes the warning since using legacy API's in Linux is perfectly acceptable. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Signed-off-by: Stephen Hemminger Signed-off-by: David Decotigny Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e9989b835a66..7913771ec474 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -742,15 +742,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -777,10 +768,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; -- GitLab From e2b825e8de16803750bc5031db95bb20b037f426 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Dec 2017 08:57:35 +0100 Subject: [PATCH 0887/1398] mlxsw: spectrum_router: Fix NULL pointer deref [ Upstream commit 8764a8267b128405cf383157d5e9a4a3735d2409 ] When we remove the neighbour associated with a nexthop we should always refuse to write the nexthop to the adjacency table. Regardless if it is already present in the table or not. Otherwise, we risk dereferencing the NULL pointer that was set instead of the neighbour. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9e31a3390154..8aa91ddff287 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1328,9 +1328,9 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } -- GitLab From b28394cbb4022db49e242e85e25110572b146b68 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Dec 2017 07:48:51 +0200 Subject: [PATCH 0888/1398] net/sched: Fix update of lastuse in act modules implementing stats_update [ Upstream commit 3bb23421a504f01551b7cb9dff0e41dbf16656b0 ] We need to update lastuse to to the most updated value between what is already set and the new value. If HW matching fails, i.e. because of an issue, the stats are not updated but it could be that software did match and updated lastuse. Fixes: 5712bf9c5c30 ("net/sched: act_mirred: Use passed lastuse argument") Fixes: 9fea47d93bcc ("net/sched: act_gact: Update statistics when offloaded to hardware") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_gact.c | 2 +- net/sched/act_mirred.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e0aa30f83c6c..9617b42aaf20 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -161,7 +161,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6b07fba5770b..fc3650b06192 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -211,7 +211,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, -- GitLab From 3752d2fb9a6d3356ce2386e94a69ff40296272ce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 14:30:19 -0600 Subject: [PATCH 0889/1398] crypto: algapi - fix NULL dereference in crypto_remove_spawns() commit 9a00674213a3f00394f4e3221b88f2d21fc05789 upstream. syzkaller triggered a NULL pointer dereference in crypto_remove_spawns() via a program that repeatedly and concurrently requests AEADs "authenc(cmac(des3_ede-asm),pcbc-aes-aesni)" and hashes "cmac(des3_ede)" through AF_ALG, where the hashes are requested as "untested" (CRYPTO_ALG_TESTED is set in ->salg_mask but clear in ->salg_feat; this causes the template to be instantiated for every request). Although AF_ALG users really shouldn't be able to request an "untested" algorithm, the NULL pointer dereference is actually caused by a longstanding race condition where crypto_remove_spawns() can encounter an instance which has had spawn(s) "grabbed" but hasn't yet been registered, resulting in ->cra_users still being NULL. We probably should properly initialize ->cra_users earlier, but that would require updating many templates individually. For now just fix the bug in a simple way that can easily be backported: make crypto_remove_spawns() treat a NULL ->cra_users list as empty. Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index 1fad2a6b3bbb..5c098ffa7d3d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, spawn->alg = NULL; spawns = &inst->alg.cra_users; + + /* + * We may encounter an unregistered instance here, since + * an instance's spawns are set up prior to the instance + * being registered. An unregistered instance will have + * NULL ->cra_users.next, since ->cra_users isn't + * properly initialized until registration. But an + * unregistered instance cannot have any users, so treat + * it the same as ->cra_users being empty. + */ + if (spawns->next == NULL) + break; } } while ((spawns = crypto_more_spawns(alg, &stack, &top, &secondary_spawns))); -- GitLab From 553a8b8c8d87e0b10a04c4568f2c1c412e1fc3de Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 21 Dec 2017 15:35:11 +0100 Subject: [PATCH 0890/1398] rbd: set max_segments to USHRT_MAX commit 21acdf45f4958135940f0b4767185cf911d4b010 upstream. Commit d3834fefcfe5 ("rbd: bump queue_max_segments") bumped max_segments (unsigned short) to max_hw_sectors (unsigned int). max_hw_sectors is set to the number of 512-byte sectors in an object and overflows unsigned short for 32M (largest possible) objects, making the block layer resort to handing us single segment (i.e. single page or even smaller) bios in that case. Fixes: d3834fefcfe5 ("rbd: bump queue_max_segments") Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 24f4b544d270..e32badd26c8a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4511,7 +4511,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); -- GitLab From 431fd501aa3f1f6a6e44a2022f7be18d8dd60b32 Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Mon, 1 Jan 2018 10:04:47 +0800 Subject: [PATCH 0891/1398] x86/microcode/intel: Extend BDW late-loading with a revision check commit b94b7373317164402ff7728d10f7023127a02b60 upstream. Instead of blacklisting all model 79 CPUs when attempting a late microcode loading, limit that only to CPUs with microcode revisions < 0x0b000021 because only on those late loading may cause a system hang. For such processors either: a) a BIOS update which might contain a newer microcode revision or b) the early microcode loading method should be considered. Processors with revisions 0x0b000021 or higher will not experience such hangs. For more details, see erratum BDF90 in document #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family Specification Update) from September 2017. [ bp: Heavily massage commit message and pr_* statements. ] Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79") Signed-off-by: Jia Zhang Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Tony Luck Cc: x86-ml Link: http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue.zj@alibaba-inc.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 13dbcc0f9d03..ac3e636ad586 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1051,8 +1051,17 @@ static bool is_blacklisted(unsigned int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { - pr_err_once("late loading on model 79 is disabled.\n"); + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 + * may result in a system hang. This behavior is documented in item + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == INTEL_FAM6_BROADWELL_X && + c->x86_mask == 0x01 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); return true; } -- GitLab From 012df71d2980d9ddf8039da09b2d5741189da8f1 Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Wed, 10 Jan 2018 10:12:03 -0800 Subject: [PATCH 0892/1398] KVM: x86: Add memory barrier on vmcs field lookup commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream. This adds a memory barrier when performing a lookup into the vmcs_field_to_offset_table. This is related to CVE-2017-5753. Signed-off-by: Andrew Honig Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d8582902a783..ab6605425497 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -857,8 +857,16 @@ static inline short vmcs_field_to_offset(unsigned long field) { BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || - vmcs_field_to_offset_table[field] == 0) + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + return -ENOENT; + + /* + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a + * generic mechanism. + */ + asm("lfence"); + + if (vmcs_field_to_offset_table[field] == 0) return -ENOENT; return vmcs_field_to_offset_table[field]; -- GitLab From 08a7525811043b1d4e085fa028ee6e9fe89bd7cf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2018 12:40:04 +0300 Subject: [PATCH 0893/1398] drm/vmwgfx: Potential off by one in vmw_view_add() commit 0d9cac0ca0429830c40fe1a4e50e60f6221fd7b6 upstream. The vmw_view_cmd_to_type() function returns vmw_view_max (3) on error. It's one element beyond the end of the vmw_view_cotables[] table. My read on this is that it's possible to hit this failure. header->id comes from vmw_cmd_check() and it's a user controlled number between 1040 and 1225 so we can hit that error. But I don't have the hardware to test this code. Fixes: d80efd5cb3de ("drm/vmwgfx: Initial DX support") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Hellstrom Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index fefb9d995d2c..81f5a552e32f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2729,6 +2729,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, } view_type = vmw_view_cmd_to_type(header->id); + if (view_type == vmw_view_max) + return -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, user_surface_converter, -- GitLab From ec61bafb2abd42a148d41143570732052a577fc0 Mon Sep 17 00:00:00 2001 From: Lepton Wu Date: Fri, 12 Jan 2018 13:42:56 -0800 Subject: [PATCH 0894/1398] kaiser: Set _PAGE_NX only if supported This finally resolve crash if loaded under qemu + haxm. Haitao Shan pointed out that the reason of that crash is that NX bit get set for page tables. It seems we missed checking if _PAGE_NX is supported in kaiser_add_user_map Link: https://www.spinics.net/lists/kernel/msg2689835.html Reviewed-by: Guenter Roeck Signed-off-by: Lepton Wu Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 8f8e5e03d083..a8ade08a9bf5 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -197,6 +197,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, * requires that not to be #defined to 0): so mask it off here. */ flags &= ~_PAGE_GLOBAL; + if (!(__supported_pte_mask & _PAGE_NX)) + flags &= ~_PAGE_NX; for (; address < end_addr; address += PAGE_SIZE) { target_address = get_pa_from_mapping(address); -- GitLab From 748e1b6281f5128c5ef8f9296761a232558c20d8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 27 Oct 2017 12:32:59 -0700 Subject: [PATCH 0895/1398] iscsi-target: Make TASK_REASSIGN use proper se_cmd->cmd_kref commit ae072726f6109bb1c94841d6fb3a82dde298ea85 upstream. Since commit 59b6986dbf fixed a potential NULL pointer dereference by allocating a se_tmr_req for ISCSI_TM_FUNC_TASK_REASSIGN, the se_tmr_req is currently leaked by iscsit_free_cmd() because no iscsi_cmd->se_cmd.se_tfo was associated. To address this, treat ISCSI_TM_FUNC_TASK_REASSIGN like any other TMR and call transport_init_se_cmd() + target_get_sess_cmd() to setup iscsi_cmd->se_cmd.se_tfo with se_cmd->cmd_kref of 2. This will ensure normal release operation once se_cmd->cmd_kref reaches zero and target_release_cmd_kref() is invoked, se_tmr_req will be released via existing target_free_cmd_mem() and core_tmr_release_req() code. Reported-by: Donald White Cc: Donald White Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 72e926d9868f..04d2b6e25503 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1940,7 +1940,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct iscsi_tmr_req *tmr_req; struct iscsi_tm *hdr; int out_of_order_cmdsn = 0, ret; - bool sess_ref = false; u8 function, tcm_function = TMR_UNKNOWN; hdr = (struct iscsi_tm *) buf; @@ -1982,18 +1981,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, buf); } + transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, + conn->sess->se_sess, 0, DMA_NONE, + TCM_SIMPLE_TAG, cmd->sense_buffer + 2); + + target_get_sess_cmd(&cmd->se_cmd, true); + /* * TASK_REASSIGN for ERL=2 / connection stays inside of * LIO-Target $FABRIC_MOD */ if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { - transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, - conn->sess->se_sess, 0, DMA_NONE, - TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - - target_get_sess_cmd(&cmd->se_cmd, true); - sess_ref = true; - switch (function) { case ISCSI_TM_FUNC_ABORT_TASK: tcm_function = TMR_ABORT_TASK; @@ -2132,12 +2130,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * For connection recovery, this is also the default action for * TMR TASK_REASSIGN. */ - if (sess_ref) { - pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(&cmd->se_cmd); - } - iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + target_put_sess_cmd(&cmd->se_cmd); return 0; } EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd); -- GitLab From 60c7a9cd5050378a7bfe385dc5feef9d406f22e1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 27 Oct 2017 22:19:26 -0800 Subject: [PATCH 0896/1398] target: Avoid early CMD_T_PRE_EXECUTE failures during ABORT_TASK commit 1c21a48055a67ceb693e9c2587824a8de60a217c upstream. This patch fixes bug where early se_cmd exceptions that occur before backend execution can result in use-after-free if/when a subsequent ABORT_TASK occurs for the same tag. Since an early se_cmd exception will have had se_cmd added to se_session->sess_cmd_list via target_get_sess_cmd(), it will not have CMD_T_COMPLETE set by the usual target_complete_cmd() backend completion path. This causes a subsequent ABORT_TASK + __target_check_io_state() to signal ABORT_TASK should proceed. As core_tmr_abort_task() executes, it will bring the outstanding se_cmd->cmd_kref count down to zero releasing se_cmd, after se_cmd has already been queued with error status into fabric driver response path code. To address this bug, introduce a CMD_T_PRE_EXECUTE bit that is set at target_get_sess_cmd() time, and cleared immediately before backend driver dispatch in target_execute_cmd() once CMD_T_ACTIVE is set. Then, check CMD_T_PRE_EXECUTE within __target_check_io_state() to determine when an early exception has occured, and avoid aborting this se_cmd since it will have already been queued into fabric driver response path code. Reported-by: Donald White Cc: Donald White Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tmr.c | 9 +++++++++ drivers/target/target_core_transport.c | 2 ++ include/target/target_core_base.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 27dd1e12f246..14bb2db5273c 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd, spin_unlock(&se_cmd->t_state_lock); return false; } + if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) { + if (se_cmd->scsi_status) { + pr_debug("Attempted to abort io tag: %llu early failure" + " status: 0x%02x\n", se_cmd->tag, + se_cmd->scsi_status); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + } if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { pr_debug("Attempted to abort io tag: %llu already shutdown," " skipping\n", se_cmd->tag); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 4c0782cb1e94..6f3eccf986c7 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1939,6 +1939,7 @@ void target_execute_cmd(struct se_cmd *cmd) } cmd->t_state = TRANSPORT_PROCESSING; + cmd->transport_state &= ~CMD_T_PRE_EXECUTE; cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; spin_unlock_irq(&cmd->t_state_lock); @@ -2592,6 +2593,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) ret = -ESHUTDOWN; goto out; } + se_cmd->transport_state |= CMD_T_PRE_EXECUTE; list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index eb3b23b6ec54..30f99ce4c6ce 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -493,6 +493,7 @@ struct se_cmd { #define CMD_T_BUSY (1 << 9) #define CMD_T_TAS (1 << 10) #define CMD_T_FABRIC_STOP (1 << 11) +#define CMD_T_PRE_EXECUTE (1 << 12) spinlock_t t_state_lock; struct kref cmd_kref; struct completion t_transport_stop_comp; -- GitLab From 28035366afe93f7bdb833a7867caccf4b7eda166 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:39 -0700 Subject: [PATCH 0897/1398] bpf: move fixup_bpf_calls() function commit e245c5c6a5656e4d61aa7bb08e9694fd6e5b2b9d upstream. no functional change. move fixup_bpf_calls() to verifier.c it's being refactored in the next patch Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Cc: Jiri Slaby [backported to 4.9 - gregkh] Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 54 ------------------------------------------- kernel/bpf/verifier.c | 54 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 72ea91df71c9..91a2d3752007 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -565,57 +565,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) list_add(&tl->list_node, &bpf_prog_types); } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... - * - * this function is called after eBPF program passed verification - */ -static void fixup_bpf_calls(struct bpf_prog *prog) -{ - const struct bpf_func_proto *fn; - int i; - - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; - - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; - } - } -} - /* drop refcnt on maps used by eBPF program and free auxilary data */ static void free_used_maps(struct bpf_prog_aux *aux) { @@ -808,9 +757,6 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* fixup BPF_CALL->imm field */ - fixup_bpf_calls(prog); - /* eBPF program is ready to be JITed */ prog = bpf_prog_select_runtime(prog, &err); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d7eeebfafe8d..0d7a8e95bbfa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3362,6 +3362,57 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } +/* fixup insn->imm field of bpf_call instructions: + * if (insn->imm == BPF_FUNC_map_lookup_elem) + * insn->imm = bpf_map_lookup_elem - __bpf_call_base; + * else if (insn->imm == BPF_FUNC_map_update_elem) + * insn->imm = bpf_map_update_elem - __bpf_call_base; + * else ... + * + * this function is called after eBPF program passed verification + */ +static void fixup_bpf_calls(struct bpf_prog *prog) +{ + const struct bpf_func_proto *fn; + int i; + + for (i = 0; i < prog->len; i++) { + struct bpf_insn *insn = &prog->insnsi[i]; + + if (insn->code == (BPF_JMP | BPF_CALL)) { + /* we reach here when program has bpf_call instructions + * and it passed bpf_check(), means that + * ops->get_func_proto must have been supplied, check it + */ + BUG_ON(!prog->aux->ops->get_func_proto); + + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode + * to avoid conditional branch in + * interpeter for every normal call + * and to prevent accidental JITing by + * JIT compiler that doesn't support + * bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + BUG_ON(!fn->func); + insn->imm = fn->func - __bpf_call_base; + } + } +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -3463,6 +3514,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); + if (ret == 0) + fixup_bpf_calls(env->prog); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ -- GitLab From f55093dccd3ac90f003698fae7ecd75cf2862179 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:40 -0700 Subject: [PATCH 0898/1398] bpf: refactor fixup_bpf_calls() commit 79741b3bdec01a8628368fbcfccc7d189ed606cb upstream. reduce indent and make it iterate over instructions similar to convert_ctx_accesses(). Also convert hard BUG_ON into soft verifier error. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Cc: Jiri Slaby [Backported to 4.9.y - gregkh] Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 73 ++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0d7a8e95bbfa..27370410c04d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3362,55 +3362,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... +/* fixup insn->imm field of bpf_call instructions * * this function is called after eBPF program passed verification */ -static void fixup_bpf_calls(struct bpf_prog *prog) +static int fixup_bpf_calls(struct bpf_verifier_env *env) { + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; int i; - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode to avoid + * conditional branch in the interpeter for every normal + * call and to prevent accidental JITing by JIT compiler + * that doesn't support bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + if (!fn->func) { + verbose("kernel subsystem misconfigured func %d\n", + insn->imm); + return -EFAULT; } + insn->imm = fn->func - __bpf_call_base; } + + return 0; } static void free_states(struct bpf_verifier_env *env) @@ -3515,7 +3510,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) ret = convert_ctx_accesses(env); if (ret == 0) - fixup_bpf_calls(env->prog); + ret = fixup_bpf_calls(env); if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); -- GitLab From a9bfac14cde2b481eeb0e64fbe15305df66ab32e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 7 Jan 2018 17:33:02 -0800 Subject: [PATCH 0899/1398] bpf: prevent out-of-bounds speculation commit b2157399cc9898260d6031c5bfe45fe137c1fbe7 upstream. Under speculation, CPUs may mis-predict branches in bounds checks. Thus, memory accesses under a bounds check may be speculated even if the bounds check fails, providing a primitive for building a side channel. To avoid leaking kernel data round up array-based maps and mask the index after bounds check, so speculated load with out of bounds index will load either valid value from the array or zero from the padded area. Unconditionally mask index for all array types even when max_entries are not rounded to power of 2 for root user. When map is created by unpriv user generate a sequence of bpf insns that includes AND operation to make sure that JITed code includes the same 'index & index_mask' operation. If prog_array map is created by unpriv user replace bpf_tail_call(ctx, map, index); with if (index >= max_entries) { index &= map->index_mask; bpf_tail_call(ctx, map, index); } (along with roundup to power 2) to prevent out-of-bounds speculation. There is secondary redundant 'if (index >= max_entries)' in the interpreter and in all JITs, but they can be optimized later if necessary. Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) cannot be used by unpriv, so no changes there. That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on all architectures with and without JIT. v2->v3: Daniel noticed that attack potentially can be crafted via syscall commands without loading the program, so add masking to those paths as well. Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: Daniel Borkmann Cc: Jiri Slaby [ Backported to 4.9 - gregkh ] Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 2 ++ include/linux/bpf_verifier.h | 5 ++++- kernel/bpf/arraymap.c | 31 ++++++++++++++++++-------- kernel/bpf/verifier.c | 42 +++++++++++++++++++++++++++++++++--- 4 files changed, 67 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 97498be2ca2e..75ffd3b2149e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -43,6 +43,7 @@ struct bpf_map { u32 max_entries; u32 map_flags; u32 pages; + bool unpriv_array; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; @@ -189,6 +190,7 @@ struct bpf_prog_aux { struct bpf_array { struct bpf_map map; u32 elem_size; + u32 index_mask; /* 'ownership' of prog_array is claimed by the first program that * is going to use this map or by the first program which FD is stored * in the map to make sure that all callers and callees have the same diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4c4e9358c146..070fc49e39e2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -67,7 +67,10 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; bool seen; /* this insn was processed by the verifier */ }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f3721e150d94..bc57ead11b0f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -46,9 +46,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; u64 array_size; - u32 elem_size; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -63,11 +64,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); + max_entries = attr->max_entries; + index_mask = roundup_pow_of_two(max_entries) - 1; + + if (unpriv) + /* round up array size to nearest power of 2, + * since cpu will speculate within index_mask limits + */ + max_entries = index_mask + 1; + array_size = sizeof(*array); if (percpu) - array_size += (u64) attr->max_entries * sizeof(void *); + array_size += (u64) max_entries * sizeof(void *); else - array_size += (u64) attr->max_entries * elem_size; + array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ if (array_size >= U32_MAX - PAGE_SIZE) @@ -77,6 +87,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array = bpf_map_area_alloc(array_size); if (!array) return ERR_PTR(-ENOMEM); + array->index_mask = index_mask; + array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ array->map.map_type = attr->map_type; @@ -110,7 +122,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * index; + return array->value + array->elem_size * (index & array->index_mask); } /* Called from eBPF program */ @@ -122,7 +134,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return this_cpu_ptr(array->pptrs[index]); + return this_cpu_ptr(array->pptrs[index & array->index_mask]); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) @@ -142,7 +154,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); off += size; @@ -190,10 +202,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, return -EEXIST; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) - memcpy(this_cpu_ptr(array->pptrs[index]), + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else - memcpy(array->value + array->elem_size * index, + memcpy(array->value + + array->elem_size * (index & array->index_mask), value, map->value_size); return 0; } @@ -227,7 +240,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); off += size; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 27370410c04d..19c44cf59bb2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1187,7 +1187,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } } -static int check_call(struct bpf_verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; @@ -1238,6 +1238,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id) err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; + if (func_id == BPF_FUNC_tail_call) { + if (meta.map_ptr == NULL) { + verbose("verifier bug\n"); + return -EINVAL; + } + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; + } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@ -3019,7 +3026,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm); + err = check_call(env, insn->imm, insn_idx); if (err) return err; @@ -3372,7 +3379,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; - int i; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; + for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL)) @@ -3390,6 +3401,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ insn->imm = 0; insn->code |= BPF_X; + + /* instead of changing every JIT dealing with tail_call + * emit two extra insns: + * if (index >= max_entries) goto out; + * index &= array->index_mask; + * to avoid out-of-bounds cpu speculation + */ + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (!map_ptr->unpriv_array) + continue; + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, + map_ptr->max_entries, 2); + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, + container_of(map_ptr, + struct bpf_array, + map)->index_mask); + insn_buf[2] = *insn; + cnt = 3; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; continue; } -- GitLab From 820ef2a0e54c4bed27758e393d09157d0d48c94c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Jan 2018 23:25:05 +0100 Subject: [PATCH 0900/1398] bpf, array: fix overflow in max_entries and undefined behavior in index_mask commit bbeb6e4323dad9b5e0ee9f60c223dd532e2403b1 upstream. syzkaller tried to alloc a map with 0xfffffffd entries out of a userns, and thus unprivileged. With the recently added logic in b2157399cc98 ("bpf: prevent out-of-bounds speculation") we round this up to the next power of two value for max_entries for unprivileged such that we can apply proper masking into potentially zeroed out map slots. However, this will generate an index_mask of 0xffffffff, and therefore a + 1 will let this overflow into new max_entries of 0. This will pass allocation, etc, and later on map access we still enforce on the original attr->max_entries value which was 0xfffffffd, therefore triggering GPF all over the place. Thus bail out on overflow in such case. Moreover, on 32 bit archs roundup_pow_of_two() can also not be used, since fls_long(max_entries - 1) can result in 32 and 1UL << 32 in 32 bit space is undefined. Therefore, do this by hand in a 64 bit variable. This fixes all the issues triggered by syzkaller's reproducers. Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Reported-by: syzbot+b0efb8e572d01bce1ae0@syzkaller.appspotmail.com Reported-by: syzbot+6c15e9744f75f2364773@syzkaller.appspotmail.com Reported-by: syzbot+d2f5524fb46fd3b312ee@syzkaller.appspotmail.com Reported-by: syzbot+61d23c95395cc90dbc2b@syzkaller.appspotmail.com Reported-by: syzbot+0d363c942452cca68c01@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/arraymap.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index bc57ead11b0f..9a1e6ed7babc 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -49,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; - u64 array_size; + u64 array_size, mask64; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -65,13 +65,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; - index_mask = roundup_pow_of_two(max_entries) - 1; - if (unpriv) + /* On 32 bit archs roundup_pow_of_two() with max_entries that has + * upper most bit set in u32 space is undefined behavior due to + * resulting 1U << 32, so do it manually here in u64 space. + */ + mask64 = fls_long(max_entries - 1); + mask64 = 1ULL << mask64; + mask64 -= 1; + + index_mask = mask64; + if (unpriv) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ max_entries = index_mask + 1; + /* Check for overflows. */ + if (max_entries < attr->max_entries) + return ERR_PTR(-E2BIG); + } array_size = sizeof(*array); if (percpu) -- GitLab From 4abe275c2deeab918601ce6f520cf514b8738607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Fri, 29 Dec 2017 09:54:25 +0000 Subject: [PATCH 0901/1398] USB: serial: cp210x: add IDs for LifeScan OneTouch Verio IQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4307413256ac1e09b8f53e8715af3df9e49beec3 upstream. Add IDs for the OneTouch Verio IQ that comes with an embedded USB-to-serial converter. Signed-off-by: Diego Elio Pettenò Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 11ee55e080e5..c6d3d1c2d406 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -121,6 +121,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ -- GitLab From 11632d079e9e36a5bbfca8e581dba4db305eff10 Mon Sep 17 00:00:00 2001 From: Christian Holl Date: Wed, 3 Jan 2018 19:53:02 +0100 Subject: [PATCH 0902/1398] USB: serial: cp210x: add new device ID ELV ALC 8xxx commit d14ac576d10f865970bb1324d337e5e24d79aaf4 upstream. This adds the ELV ALC 8xxx Battery Charging device to the list of USB IDs of drivers/usb/serial/cp210x.c Signed-off-by: Christian Holl Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index c6d3d1c2d406..3178d8afb3e6 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -172,6 +172,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */ { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ -- GitLab From 9f6ca0ea7a7a21963e73d69be42026e5c5954bc4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 11 Jan 2018 14:47:40 +0100 Subject: [PATCH 0903/1398] usb: misc: usb3503: make sure reset is low for at least 100us commit b8626f1dc29d3eee444bfaa92146ec7b291ef41c upstream. When using a GPIO which is high by default, and initialize the driver in USB Hub mode, initialization fails with: [ 111.757794] usb3503 0-0008: SP_ILOCK failed (-5) The reason seems to be that the chip is not properly reset. Probe does initialize reset low, however some lines later the code already set it back high, which is not long enouth. Make sure reset is asserted for at least 100us by inserting a delay after initializing the reset pin during probe. Signed-off-by: Stefan Agner Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb3503.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c index 8e7737d7ac0a..03be5d574f23 100644 --- a/drivers/usb/misc/usb3503.c +++ b/drivers/usb/misc/usb3503.c @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub) if (gpio_is_valid(hub->gpio_reset)) { err = devm_gpio_request_one(dev, hub->gpio_reset, GPIOF_OUT_INIT_LOW, "usb3503 reset"); + /* Datasheet defines a hardware reset to be at least 100us */ + usleep_range(100, 10000); if (err) { dev_err(dev, "unable to request GPIO %d as reset pin (%d)\n", -- GitLab From 435db24bb91f7dd16940d8ddedc3b5ca75547fb5 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Mon, 8 Jan 2018 15:46:41 -0600 Subject: [PATCH 0904/1398] USB: fix usbmon BUG trigger commit 46eb14a6e1585d99c1b9f58d0e7389082a5f466b upstream. Automated tests triggered this by opening usbmon and accessing the mmap while simultaneously resizing the buffers. This bug was with us since 2006, because typically applications only size the buffers once and thus avoid racing. Reported by Kirill A. Shutemov. Reported-by: Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mon/mon_bin.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 1a874a1f3890..80b37d214beb 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1002,7 +1002,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg break; case MON_IOCQ_RING_SIZE: + mutex_lock(&rp->fetch_lock); ret = rp->b_size; + mutex_unlock(&rp->fetch_lock); break; case MON_IOCT_RING_SIZE: @@ -1229,12 +1231,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long offset, chunk_idx; struct page *pageptr; + mutex_lock(&rp->fetch_lock); offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= rp->b_size) + if (offset >= rp->b_size) { + mutex_unlock(&rp->fetch_lock); return VM_FAULT_SIGBUS; + } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); + mutex_unlock(&rp->fetch_lock); vmf->page = pageptr; return 0; } -- GitLab From 6851ec74bfe44e8aa568fd00603759c4a2918752 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 17:00:06 -0700 Subject: [PATCH 0905/1398] usbip: remove kernel addresses from usb device and urb debug msgs commit e1346fd87c71a1f61de1fe476ec8df1425ac931c upstream. usbip_dump_usb_device() and usbip_dump_urb() print kernel addresses. Remove kernel addresses from usb device and urb debug msgs and improve the message content. Instead of printing parent device and bus addresses, print parent device and bus names. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index e24b24fa0f16..2a5d3180777d 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev) dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", udev->devnum, udev->devpath, usb_speed_string(udev->speed)); - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport); + pr_debug("tt hub ttport %d\n", udev->ttport); dev_dbg(dev, " "); for (i = 0; i < 16; i++) @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev) } pr_debug("\n"); - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus); - - dev_dbg(dev, - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n", - &udev->descriptor, udev->config, - udev->actconfig, udev->rawdescriptors); + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), + udev->bus->bus_name); dev_dbg(dev, "have_langid %d, string_langid %d\n", udev->have_langid, udev->string_langid); @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb) dev = &urb->dev->dev; - dev_dbg(dev, " urb :%p\n", urb); - dev_dbg(dev, " dev :%p\n", urb->dev); - usbip_dump_usb_device(urb->dev); dev_dbg(dev, " pipe :%08x ", urb->pipe); @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " status :%d\n", urb->status); dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer); dev_dbg(dev, " transfer_buffer_length:%d\n", urb->transfer_buffer_length); dev_dbg(dev, " actual_length :%d\n", urb->actual_length); - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet); if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) usbip_dump_usb_ctrlrequest( @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); dev_dbg(dev, " interval :%d\n", urb->interval); dev_dbg(dev, " error_count :%d\n", urb->error_count); - dev_dbg(dev, " context :%p\n", urb->context); - dev_dbg(dev, " complete :%p\n", urb->complete); } EXPORT_SYMBOL_GPL(usbip_dump_urb); -- GitLab From 86c8d58fc7538d0f44367f49d24568e925049c0d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 19:23:46 -0700 Subject: [PATCH 0906/1398] usbip: fix vudc_rx: harden CMD_SUBMIT path to handle malicious input commit b78d830f0049ef1966dc1e0ebd1ec2a594e2cf25 upstream. Harden CMD_SUBMIT path to handle malicious input that could trigger large memory allocations. Add checks to validate transfer_buffer_length and number_of_packets to protect against bad input requesting for unbounded memory allocations. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_rx.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c index e429b59f6f8a..d020e72b3122 100644 --- a/drivers/usb/usbip/vudc_rx.c +++ b/drivers/usb/usbip/vudc_rx.c @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc, urb_p->new = 1; urb_p->seqnum = pdu->base.seqnum; + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(urb_p->ep->desc); + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&udc->gadget.dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + ret = -EMSGSIZE; + goto free_urbp; + } + } + ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type); if (ret) { usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC); -- GitLab From 8ab8c6e6607a14ebd54319535b17e384d2872df9 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 19:23:47 -0700 Subject: [PATCH 0907/1398] usbip: vudc_tx: fix v_send_ret_submit() vulnerability to null xfer buffer commit 5fd77a3a0e408c23ab4002a57db980e46bc16e72 upstream. v_send_ret_submit() handles urb with a null transfer_buffer, when it replays a packet with potential malicious data that could contain a null buffer. Add a check for the condition when actual_length > 0 and transfer_buffer is null. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_tx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c index 234661782fa0..3ab4c86486a7 100644 --- a/drivers/usb/usbip/vudc_tx.c +++ b/drivers/usb/usbip/vudc_tx.c @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&udc->gadget.dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (urb_p->type == USB_ENDPOINT_XFER_ISOC) iovnum = 2 + urb->number_of_packets; else @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb_p); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; -- GitLab From c51d23dffc2e9ca05d611c86c440f9055541c62d Mon Sep 17 00:00:00 2001 From: Viktor Slavkovic Date: Mon, 8 Jan 2018 10:43:03 -0800 Subject: [PATCH 0908/1398] staging: android: ashmem: fix a race condition in ASHMEM_SET_SIZE ioctl commit 443064cb0b1fb4569fe0a71209da7625129fb760 upstream. A lock-unlock is missing in ASHMEM_SET_SIZE ioctl which can result in a race condition when mmap is called. After the !asma->file check, before setting asma->size, asma->file can be set in mmap. That would result in having different asma->size than the mapped memory size. Combined with ASHMEM_UNPIN ioctl and shrinker invocation, this can result in memory corruption. Signed-off-by: Viktor Slavkovic Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 2b770cb0c488..558a66b459fa 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -774,10 +774,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case ASHMEM_SET_SIZE: ret = -EINVAL; + mutex_lock(&ashmem_mutex); if (!asma->file) { ret = 0; asma->size = (size_t)arg; } + mutex_unlock(&ashmem_mutex); break; case ASHMEM_GET_SIZE: ret = asma->size; -- GitLab From 6aebc2670ebfdda0762a6b471fbf8ca18dcf44f2 Mon Sep 17 00:00:00 2001 From: Ben Seri Date: Fri, 8 Dec 2017 15:14:47 +0100 Subject: [PATCH 0909/1398] Bluetooth: Prevent stack info leak from the EFS element. commit 06e7e776ca4d36547e503279aeff996cbb292c16 upstream. In the function l2cap_parse_conf_rsp and in the function l2cap_parse_conf_req the following variable is declared without initialization: struct l2cap_conf_efs efs; In addition, when parsing input configuration parameters in both of these functions, the switch case for handling EFS elements may skip the memcpy call that will write to the efs variable: ... case L2CAP_CONF_EFS: if (olen == sizeof(efs)) memcpy(&efs, (void *)val, olen); ... The olen in the above if is attacker controlled, and regardless of that if, in both of these functions the efs variable would eventually be added to the outgoing configuration request that is being built: l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs); So by sending a configuration request, or response, that contains an L2CAP_CONF_EFS element, but with an element length that is not sizeof(efs) - the memcpy to the uninitialized efs variable can be avoided, and the uninitialized variable would be returned to the attacker (16 bytes). This issue has been assigned CVE-2017-1000410 Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: Ben Seri Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ffd09c1675d4..2bbca23a9d05 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3353,9 +3353,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_EFS: - remote_efs = 1; - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { + remote_efs = 1; memcpy(&efs, (void *) val, olen); + } break; case L2CAP_CONF_EWS: @@ -3574,16 +3575,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { memcpy(&efs, (void *)val, olen); - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); + } break; case L2CAP_CONF_FCS: -- GitLab From 3ba5d3a2cf40c4ebdc1f702af3b5dea405a6a11e Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 6 Jan 2018 00:56:44 +0800 Subject: [PATCH 0910/1398] uas: ignore UAS for Norelsys NS1068(X) chips commit 928afc85270753657b5543e052cc270c279a3fe9 upstream. The UAS mode of Norelsys NS1068(X) is reported to fail to work on several platforms with the following error message: xhci-hcd xhci-hcd.0.auto: ERROR Transfer event for unknown stream ring slot 1 ep 8 xhci-hcd xhci-hcd.0.auto: @00000000bf04a400 00000000 00000000 1b000000 01098001 And when trying to mount a partition on the disk the disk will disconnect from the USB controller, then after re-connecting the device will be offlined and not working at all. Falling back to USB mass storage can solve this problem, so ignore UAS function of this chip. Signed-off-by: Icenowy Zheng Acked-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 9f356f7cf7d5..719ec68ae309 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -156,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: Icenowy Zheng */ +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, + "Norelsys", + "NS1068X", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Takeo Nakayama */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", -- GitLab From d598347989aa8873cebfd7d7b4d29d7aba21b878 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 11 Dec 2017 16:26:40 +0900 Subject: [PATCH 0911/1398] e1000e: Fix e1000_check_for_copper_link_ich8lan return value. commit 4110e02eb45ea447ec6f5459c9934de0a273fb91 upstream. e1000e_check_for_copper_link() and e1000_check_for_copper_link_ich8lan() are the two functions that may be assigned to mac.ops.check_for_link when phy.media_type == e1000_media_type_copper. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed the meaning of the return value of check_for_link for copper media but only adjusted the first function. This patch adjusts the second function likewise. Reported-by: Christian Hesse Reported-by: Gabriel C Link: https://bugzilla.kernel.org/show_bug.cgi?id=198047 Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Tested-by: Christian Hesse Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index f3aaca743ea3..8a48656a376b 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1364,6 +1364,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1379,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1611,10 +1614,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) -- GitLab From 4e6c2af2ba93ee8709695835920fc57148e4b397 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 5 Jan 2018 09:44:36 -0800 Subject: [PATCH 0912/1398] x86/Documentation: Add PTI description commit 01c9b17bf673b05bb401b76ec763e9730ccf1376 upstream. Add some details about how PTI works, what some of the downsides are, and how to debug it when things go wrong. Also document the kernel parameter: 'pti/nopti'. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Randy Dunlap Reviewed-by: Kees Cook Cc: Moritz Lipp Cc: Daniel Gruss Cc: Michael Schwarz Cc: Richard Fellner Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Hugh Dickins Cc: Andi Lutomirsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 21 ++-- Documentation/x86/pti.txt | 186 ++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 7 deletions(-) create mode 100644 Documentation/x86/pti.txt diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5d2676d043de..2b1d782eda6f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2763,8 +2763,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nojitter [IA-64] Disables jitter checking for ITC timers. - nopti [X86-64] Disable KAISER isolation of kernel from user. - no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page @@ -3327,11 +3325,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. pt. [PARIDE] See Documentation/blockdev/paride.txt. - pti= [X86_64] - Control KAISER user/kernel address space isolation: - on - enable - off - disable - auto - default setting + pti= [X86_64] Control Page Table Isolation of user and + kernel address spaces. Disabling this feature + removes hardening, but improves performance of + system calls and interrupts. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable to issues that PTI mitigates + + Not specifying this option is equivalent to pti=auto. + + nopti [X86_64] + Equivalent to pti=off pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt new file mode 100644 index 000000000000..d11eff61fc9a --- /dev/null +++ b/Documentation/x86/pti.txt @@ -0,0 +1,186 @@ +Overview +======== + +Page Table Isolation (pti, previously known as KAISER[1]) is a +countermeasure against attacks on the shared user/kernel address +space such as the "Meltdown" approach[2]. + +To mitigate this class of attacks, we create an independent set of +page tables for use only when running userspace applications. When +the kernel is entered via syscalls, interrupts or exceptions, the +page tables are switched to the full "kernel" copy. When the system +switches back to user mode, the user copy is used again. + +The userspace page tables contain only a minimal amount of kernel +data: only what is needed to enter/exit the kernel such as the +entry/exit functions themselves and the interrupt descriptor table +(IDT). There are a few strictly unnecessary things that get mapped +such as the first C function when entering an interrupt (see +comments in pti.c). + +This approach helps to ensure that side-channel attacks leveraging +the paging structures do not function when PTI is enabled. It can be +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. +Once enabled at compile-time, it can be disabled at boot with the +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). + +Page Table Management +===================== + +When PTI is enabled, the kernel manages two sets of page tables. +The first set is very similar to the single set which is present in +kernels without PTI. This includes a complete mapping of userspace +that the kernel can use for things like copy_to_user(). + +Although _complete_, the user portion of the kernel page tables is +crippled by setting the NX bit in the top level. This ensures +that any missed kernel->user CR3 switch will immediately crash +userspace upon executing its first instruction. + +The userspace page tables map only the kernel data needed to enter +and exit the kernel. This data is entirely contained in the 'struct +cpu_entry_area' structure which is placed in the fixmap which gives +each CPU's copy of the area a compile-time-fixed virtual address. + +For new userspace mappings, the kernel makes the entries in its +page tables like normal. The only difference is when the kernel +makes entries in the top (PGD) level. In addition to setting the +entry in the main kernel PGD, a copy of the entry is made in the +userspace page tables' PGD. + +This sharing at the PGD level also inherently shares all the lower +layers of the page tables. This leaves a single, shared set of +userspace page tables to manage. One PTE to lock, one set of +accessed bits, dirty bits, etc... + +Overhead +======== + +Protection against side-channel attacks is important. But, +this protection comes at a cost: + +1. Increased Memory Use + a. Each process now needs an order-1 PGD instead of order-0. + (Consumes an additional 4k per process). + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB + aligned so that it can be mapped by setting a single PMD + entry. This consumes nearly 2MB of RAM once the kernel + is decompressed, but no space in the kernel image itself. + +2. Runtime Cost + a. CR3 manipulation to switch between the page table copies + must be done at interrupt, syscall, and exception entry + and exit (it can be skipped when the kernel is interrupted, + though.) Moves to CR3 are on the order of a hundred + cycles, and are required at every entry and exit. + b. A "trampoline" must be used for SYSCALL entry. This + trampoline depends on a smaller set of resources than the + non-PTI SYSCALL entry code, so requires mapping fewer + things into the userspace page tables. The downside is + that stacks must be switched at entry time. + d. Global pages are disabled for all kernel structures not + mapped into both kernel and userspace page tables. This + feature of the MMU allows different processes to share TLB + entries mapping the kernel. Losing the feature means more + TLB misses after a context switch. The actual loss of + performance is very small, however, never exceeding 1%. + d. Process Context IDentifiers (PCID) is a CPU feature that + allows us to skip flushing the entire TLB when switching page + tables by setting a special bit in CR3 when the page tables + are changed. This makes switching the page tables (at context + switch, or kernel entry/exit) cheaper. But, on systems with + PCID support, the context switch code must flush both the user + and kernel entries out of the TLB. The user PCID TLB flush is + deferred until the exit to userspace, minimizing the cost. + See intel.com/sdm for the gory PCID/INVPCID details. + e. The userspace page tables must be populated for each new + process. Even without PTI, the shared kernel mappings + are created by copying top-level (PGD) entries into each + new process. But, with PTI, there are now *two* kernel + mappings: one in the kernel page tables that maps everything + and one for the entry/exit structures. At fork(), we need to + copy both. + f. In addition to the fork()-time copying, there must also + be an update to the userspace PGD any time a set_pgd() is done + on a PGD used to map userspace. This ensures that the kernel + and userspace copies always map the same userspace + memory. + g. On systems without PCID support, each CR3 write flushes + the entire TLB. That means that each syscall, interrupt + or exception flushes the TLB. + h. INVPCID is a TLB-flushing instruction which allows flushing + of TLB entries for non-current PCIDs. Some systems support + PCIDs, but do not support INVPCID. On these systems, addresses + can only be flushed from the TLB for the current PCID. When + flushing a kernel address, we need to flush all PCIDs, so a + single kernel address flush will require a TLB-flushing CR3 + write upon the next use of every PCID. + +Possible Future Work +==================== +1. We can be more careful about not actually writing to CR3 + unless its value is actually changed. +2. Allow PTI to be enabled/disabled at runtime in addition to the + boot-time switching. + +Testing +======== + +To test stability of PTI, the following test procedure is recommended, +ideally doing all of these in parallel: + +1. Set CONFIG_DEBUG_ENTRY=y +2. Run several copies of all of the tools/testing/selftests/x86/ tests + (excluding MPX and protection_keys) in a loop on multiple CPUs for + several minutes. These tests frequently uncover corner cases in the + kernel entry code. In general, old kernels might cause these tests + themselves to crash, but they should never crash the kernel. +3. Run the 'perf' tool in a mode (top or record) that generates many + frequent performance monitoring non-maskable interrupts (see "NMI" + in /proc/interrupts). This exercises the NMI entry/exit code which + is known to trigger bugs in code paths that did not expect to be + interrupted, including nested NMIs. Using "-c" boosts the rate of + NMIs, and using two -c with separate counters encourages nested NMIs + and less deterministic behavior. + + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done + +4. Launch a KVM virtual machine. +5. Run 32-bit binaries on systems supporting the SYSCALL instruction. + This has been a lightly-tested code path and needs extra scrutiny. + +Debugging +========= + +Bugs in PTI cause a few different signatures of crashes +that are worth noting here. + + * Failures of the selftests/x86 code. Usually a bug in one of the + more obscure corners of entry_64.S + * Crashes in early boot, especially around CPU bringup. Bugs + in the trampoline code or mappings cause these. + * Crashes at the first interrupt. Caused by bugs in entry_64.S, + like screwing up a page table switch. Also caused by + incorrectly mapping the IRQ handler entry code. + * Crashes at the first NMI. The NMI code is separate from main + interrupt handlers and can have bugs that do not affect + normal interrupts. Also caused by incorrectly mapping NMI + code. NMIs that interrupt the entry code must be very + careful and can be the cause of crashes that show up when + running perf. + * Kernel crashes at the first exit to userspace. entry_64.S + bugs, or failing to map some of the exit code. + * Crashes at first interrupt that interrupts userspace. The paths + in entry_64.S that return to userspace are sometimes separate + from the ones that return to the kernel. + * Double faults: overflowing the kernel stack because of page + faults upon page faults. Caused by touching non-pti-mapped + data in the entry code, or forgetting to switch to kernel + CR3 before calling into C functions which are not pti-mapped. + * Userspace segfaults early in boot, sometimes manifesting + as mount(8) failing to mount the rootfs. These have + tended to be TLB invalidation issues. Usually invalidating + the wrong PCID, or otherwise missing an invalidation. + +1. https://gruss.cc/files/kaiser.pdf +2. https://meltdownattack.com/meltdown.pdf -- GitLab From ef463981018e29a7d880556181335a1853c3abf5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:38 -0800 Subject: [PATCH 0913/1398] x86/cpu: Factor out application of forced CPU caps commit 8bf1ebca215c262e48c15a4a15f175991776f57f upstream. There are multiple call sites that apply forced CPU caps. Factor them into a helper. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 918e44772b04..4c652255bd06 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c) } } +static void apply_forced_caps(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -1086,10 +1096,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overridden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); @@ -1151,10 +1158,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); /* * On SMP, boot_cpu_data holds the common feature set between -- GitLab From c2cacde516a4cf675f63d84aad7cf1e14a093f4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:32 +0100 Subject: [PATCH 0914/1398] x86/cpufeatures: Make CPU bugs sticky commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream. There is currently no way to force CPU bug bits like CPU feature bits. That makes it impossible to set a bug bit once at boot and have it stick for all upcoming CPUs. Extend the force set/clear arrays to handle bug bits as well. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/include/asm/processor.h | 4 ++-- arch/x86/kernel/cpu/common.c | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d2b69fc0ceb..9ea67a04ff4f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8cb52ee3ade6..e40b19ca486e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern struct tss_struct doublefault_tss; -extern __u32 cpu_caps_cleared[NCAPINTS]; -extern __u32 cpu_caps_set[NCAPINTS]; +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4c652255bd06..ba9b6014a426 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -480,8 +480,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -710,7 +710,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) { int i; - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } -- GitLab From d88f601b9ac9d2b819bb9d947b272d1cf1c36665 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:33 +0100 Subject: [PATCH 0915/1398] x86/cpufeatures: Add X86_BUG_CPU_INSECURE commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream. Many x86 CPUs leak information to user space due to missing isolation of user space and kernel space page tables. There are many well documented ways to exploit that. The upcoming software migitation of isolating the user and kernel space page tables needs a misfeature flag so code can be made runtime conditional. Add the BUG bits which indicates that the CPU is affected and add a feature bit which indicates that the software migitation is enabled. Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be made later. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 454a37adb823..57bd52cd2a2c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,5 +316,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ba9b6014a426..8c81adc0b926 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -882,6 +882,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + /* Assume for now that ALL x86 CPUs are insecure */ + setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + fpu__init_system(c); } -- GitLab From 43fe95308d276bdfd133f5951cc25565e39982ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Jan 2018 15:27:34 +0100 Subject: [PATCH 0916/1398] x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN commit de791821c295cc61419a06fe5562288417d1bc58 upstream. Use the name associated with the particular attack which needs page table isolation for mitigation. Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Alan Cox Cc: Jiri Koshina Cc: Linus Torvalds Cc: Tim Chen Cc: Andi Lutomirski Cc: Andi Kleen Cc: Peter Zijlstra Cc: Paul Turner Cc: Tom Lendacky Cc: Greg KH Cc: Dave Hansen Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 57bd52cd2a2c..985dfd7b3c31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,6 +316,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8c81adc0b926..5ab4fd7f343f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -884,7 +884,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); /* Assume for now that ALL x86 CPUs are insecure */ - setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); fpu__init_system(c); } -- GitLab From 26323fb4d717e11a69484c6df02eeef90dba7ef2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 6 Jan 2018 11:49:23 +0000 Subject: [PATCH 0917/1398] x86/cpufeatures: Add X86_BUG_SPECTRE_V[12] commit 99c6fa2511d8a683e61468be91b83f85452115fa upstream. Add the bug bits for spectre v1/2 and force them unconditionally for all cpus. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/common.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 985dfd7b3c31..f364c8919eb8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -317,5 +317,7 @@ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5ab4fd7f343f..8339b4363fb9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -886,6 +886,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) /* Assume for now that ALL x86 CPUs are insecure */ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + fpu__init_system(c); } -- GitLab From 56eff367e071981bed4a75428993ad896baa69eb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 24 Oct 2016 19:38:43 +0200 Subject: [PATCH 0918/1398] x86/cpu: Merge bugs.c and bugs_64.c commit 62a67e123e058a67db58bc6a14354dd037bafd0a upstream. Should be easier when following boot paths. It probably is a left over from the x86 unification eons ago. No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161024173844.23038-3-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/Makefile | 4 +--- arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/cpu/bugs_64.c | 33 --------------------------------- 3 files changed, 23 insertions(+), 40 deletions(-) delete mode 100644 arch/x86/kernel/cpu/bugs_64.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4a8697f7d4ef..33b63670bf09 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o +obj-y += bugs.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_X86_32) += bugs.o -obj-$(CONFIG_X86_64) += bugs_64.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0b6124315441..5d82f2ca4acf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include void __init check_bugs(void) { @@ -28,11 +30,13 @@ void __init check_bugs(void) #endif identify_boot_cpu(); -#ifndef CONFIG_SMP - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + +#ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. * @@ -48,4 +52,18 @@ void __init check_bugs(void) alternative_instructions(); fpu__init_check_bugs(); +#else /* CONFIG_X86_64 */ + alternative_instructions(); + + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); +#endif } diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c deleted file mode 100644 index a972ac4c7e7d..000000000000 --- a/arch/x86/kernel/cpu/bugs_64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * Copyright (C) 2000 SuSE - */ - -#include -#include -#include -#include -#include -#include -#include - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#if !defined(CONFIG_SMP) - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -} -- GitLab From 11ec2df9c02071a7c0a63a1febb53e76cdee56ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:00 +0100 Subject: [PATCH 0919/1398] sysfs/cpu: Add vulnerability folder commit 87590ce6e373d1a5401f6539f0c59ef92dd924a9 upstream. As the meltdown/spectre problem affects several CPU architectures, it makes sense to have common way to express whether a system is affected by a particular vulnerability or not. If affected the way to express the mitigation should be common as well. Create /sys/devices/system/cpu/vulnerabilities folder and files for meltdown, spectre_v1 and spectre_v2. Allow architectures to override the show function. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 16 +++++++ drivers/base/Kconfig | 3 ++ drivers/base/cpu.c | 48 +++++++++++++++++++ include/linux/cpu.h | 7 +++ 4 files changed, 74 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 498741737055..8b30a48b4c5e 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list Description: AArch64 CPU registers 'identification' directory exposes the CPU ID registers for identifying model and revision of the CPU. + +What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 +Date: Januar 2018 +Contact: Linux kernel mailing list +Description: Information about CPU vulnerabilities + + The files are named after the code names of CPU + vulnerabilities. The output of those files reflects the + state of the CPUs in the system. Possible output values: + + "Not affected" CPU is not affected by the vulnerability + "Vulnerable" CPU is affected and no mitigation in effect + "Mitigation: $M" CPU is affetcted and mitigation $M is in effect diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index d02e7c0f5bfd..0651010bba21 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES config GENERIC_CPU_AUTOPROBE bool +config GENERIC_CPU_VULNERABILITIES + bool + config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 4c28e1a09786..56b6c8508a89 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void) #endif } +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES + +ssize_t __weak cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + +static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, + &dev_attr_spectre_v1.attr, + &dev_attr_spectre_v2.attr, + NULL +}; + +static const struct attribute_group cpu_root_vulnerabilities_group = { + .name = "vulnerabilities", + .attrs = cpu_root_vulnerabilities_attrs, +}; + +static void __init cpu_register_vulnerabilities(void) +{ + if (sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_vulnerabilities_group)) + pr_err("Unable to register CPU vulnerabilities\n"); +} + +#else +static inline void cpu_register_vulnerabilities(void) { } +#endif + void __init cpu_dev_init(void) { if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) panic("Failed to register CPU subsystem"); cpu_dev_register_generic(); + cpu_register_vulnerabilities(); } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e571128ad99a..2f475ad89a0d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); +extern ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf); + extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, -- GitLab From 45a98824bd79b1cf969beadb6288438b66082f17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:01 +0100 Subject: [PATCH 0920/1398] x86/cpu: Implement CPU vulnerabilites sysfs functions commit 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 upstream. Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and spectre_v2. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index da8156fd3d58..447edeb0cfcb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -64,6 +64,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5d82f2ca4acf..cd46f9039119 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include @@ -67,3 +68,31 @@ void __init check_bugs(void) set_memory_4k((unsigned long)__va(0), 1); #endif } + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); + if (boot_cpu_has(X86_FEATURE_KAISER)) + return sprintf(buf, "Mitigation: PTI\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} +#endif -- GitLab From abcc3e5f0079b850dc4e343f53de1476ac6f5e5c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:21 -0600 Subject: [PATCH 0921/1398] x86/cpu/AMD: Make LFENCE a serializing instruction commit e4d0e84e490790798691aaa0f2e598637f1867ec upstream. To aid in speculation control, make LFENCE a serializing instruction since it has less overhead than MFENCE. This is done by setting bit 1 of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not have this MSR. For these families, the LFENCE instruction is already serializing. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/amd.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b601ddac5719..29a554e5dff6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -330,6 +330,8 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2b4cf04239b6..8b5b19d38182 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -782,6 +782,16 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } -- GitLab From 9c5e750c8e84bea3cacd652a8b20fbdd92998ecc Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:32 -0600 Subject: [PATCH 0922/1398] x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f upstream. With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference to MFENCE_RDTSC. However, since the kernel could be running under a hypervisor that does not support writing that MSR, read the MSR back and verify that the bit has been set successfully. If the MSR can be read and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the MFENCE_RDTSC feature. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 29a554e5dff6..b11c4c072df8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -332,6 +332,7 @@ #define MSR_FAM10H_NODE_ID 0xc001100c #define MSR_F10H_DECFG 0xc0011029 #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8b5b19d38182..1b89f0c4251e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -782,6 +782,9 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + unsigned long long val; + int ret; + /* * A serializing LFENCE has less overhead than MFENCE, so * use it for execution serialization. On families which @@ -792,8 +795,19 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_F10H_DECFG, MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + /* + * Verify that the MSR write was successful (could be running + * under a hypervisor) and only then assume that LFENCE is + * serializing. + */ + ret = rdmsrl_safe(MSR_F10H_DECFG, &val); + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } else { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } /* -- GitLab From 5ddd318a4715f4806aba256f33db1f0f3ab043db Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 9 Jan 2018 15:02:51 +0000 Subject: [PATCH 0923/1398] sysfs/cpu: Fix typos in vulnerability documentation commit 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 upstream. Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 8b30a48b4c5e..dfd56ec7a850 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -355,7 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 -Date: Januar 2018 +Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities @@ -365,4 +365,4 @@ Description: Information about CPU vulnerabilities "Not affected" CPU is not affected by the vulnerability "Vulnerable" CPU is affected and no mitigation in effect - "Mitigation: $M" CPU is affetcted and mitigation $M is in effect + "Mitigation: $M" CPU is affected and mitigation $M is in effect -- GitLab From 194dc04770f5d29b707832a5a71c30fffe2af582 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Jan 2018 12:28:16 +0100 Subject: [PATCH 0924/1398] x86/alternatives: Fix optimize_nops() checking commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca upstream. The alternatives code checks only the first byte whether it is a NOP, but with NOPs in front of the payload and having actual instructions after it breaks the "optimized' test. Make sure to scan all bytes before deciding to optimize the NOPs in there. Reported-by: David Woodhouse Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Dave Hansen Cc: Andi Kleen Cc: Andrew Lutomirski Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5cb272a7a5a3..10d5a3d6affc 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -340,9 +340,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; + int i; - if (instr[0] != 0x90) - return; + for (i = 0; i < a->padlen; i++) { + if (instr[i] != 0x90) + return; + } local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); -- GitLab From 91b7e5cdc80a3b684154cf0983f22d22ec9b29e5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Jan 2018 14:37:05 +0000 Subject: [PATCH 0925/1398] x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream. Where an ALTERNATIVE is used in the middle of an inline asm block, this would otherwise lead to the following instruction being appended directly to the trailing ".popsection", and a failed compile. Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: ak@linux.intel.com Cc: Tim Chen Cc: Peter Zijlstra Cc: Paul Turner Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index d4aea31eec03..deca9b9c7923 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. -- GitLab From 00bcb5ada638d884818aad7d46f90501bac761e9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 17 Sep 2017 09:03:50 -0700 Subject: [PATCH 0926/1398] x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream. Otherwise we might have the PCID feature bit set during cpu_init(). This is just for robustness. I haven't seen any actual bugs here. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels") Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 -------- arch/x86/kernel/cpu/common.c | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cd46f9039119..cb6b4f9d0b7a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,14 +22,6 @@ void __init check_bugs(void) { -#ifdef CONFIG_X86_32 - /* - * Regardless of whether PCID is enumerated, the SDM says - * that it can't be enabled in 32-bit mode. - */ - setup_clear_cpu_cap(X86_FEATURE_PCID); -#endif - identify_boot_cpu(); if (!IS_ENABLED(CONFIG_SMP)) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8339b4363fb9..7b9ae04ddf5d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -890,6 +890,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); fpu__init_system(c); + +#ifdef CONFIG_X86_32 + /* + * Regardless of whether PCID is enumerated, the SDM says + * that it can't be enabled in 32-bit mode. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); +#endif } void __init early_cpu_init(void) -- GitLab From 35aee626fa6311c3942fedd50c50f6748922e89f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Mar 2017 12:04:44 -0600 Subject: [PATCH 0927/1398] objtool, modules: Discard objtool annotation sections for modules commit e390f9a9689a42f477a6073e2e7df530a4c1b740 upstream. The '__unreachable' and '__func_stack_frame_non_standard' sections are only used at compile time. They're discarded for vmlinux but they should also be discarded for modules. Since this is a recurring pattern, prefix the section names with ".discard.". It's a nice convention and vmlinux.lds.h already discards such sections. Also remove the 'a' (allocatable) flag from the __unreachable section since it doesn't make sense for a discarded section. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Jessica Yu Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble Signed-off-by: Ingo Molnar [dwmw2: Remove the unreachable part in backporting since it's not here yet] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/frame.h | 2 +- scripts/mod/modpost.c | 1 + scripts/module-common.lds | 5 ++++- tools/objtool/builtin-check.c | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/frame.h b/include/linux/frame.h index e6baaba3f1ae..d772c61c31da 100644 --- a/include/linux/frame.h +++ b/include/linux/frame.h @@ -11,7 +11,7 @@ * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(__func_stack_frame_non_standard) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func #else /* !CONFIG_STACK_VALIDATION */ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index bd8349759095..845eb9b800f3 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -838,6 +838,7 @@ static const char *const section_white_list[] = ".cmem*", /* EZchip */ ".fmt_slot*", /* EZchip */ ".gnu.lto*", + ".discard.*", NULL }; diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 53234e85192a..9b6e246a45d0 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -4,7 +4,10 @@ * combine them automatically. */ SECTIONS { - /DISCARD/ : { *(.discard) } + /DISCARD/ : { + *(.discard) + *(.discard.*) + } __ksymtab 0 : { *(SORT(___ksymtab+*)) } __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index b8dadb050d2b..f8b6cf5fec87 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -1229,7 +1229,7 @@ int cmd_check(int argc, const char **argv) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); - file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); file.rodata = find_section_by_name(file.elf, ".rodata"); file.ignore_unreachables = false; file.c_file = find_section_by_name(file.elf, ".comment"); -- GitLab From 3adb52ab29760624cd59ea0579317a24c4827e9f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 11 Jan 2018 21:46:23 +0000 Subject: [PATCH 0928/1398] objtool: Detect jumps to retpoline thunks commit 39b735332cb8b33a27c28592d969e4016c86c3ea upstream. A direct jump to a retpoline thunk is really an indirect jump in disguise. Change the objtool instruction type accordingly. Objtool needs to know where indirect branches are so it can detect switch statement jump tables. This fixes a bunch of warnings with CONFIG_RETPOLINE like: arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame ... Signed-off-by: Josh Poimboeuf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk [dwmw2: Applies to tools/objtool/builtin-check.c not check.c] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- tools/objtool/builtin-check.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index f8b6cf5fec87..293a23e27968 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -382,6 +382,13 @@ static int add_jump_destinations(struct objtool_file *file) } else if (rela->sym->sec->idx) { dest_sec = rela->sym->sec; dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else if (strstr(rela->sym->name, "_indirect_thunk_")) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. + */ + insn->type = INSN_JUMP_DYNAMIC; + continue; } else { /* sibling call */ insn->jump_dest = 0; -- GitLab From 4d8bd3e2f6b1e45d8e080030f2554476e7c18da7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 11 Jan 2018 21:46:24 +0000 Subject: [PATCH 0929/1398] objtool: Allow alternatives to be ignored commit 258c76059cece01bebae098e81bacb1af2edad17 upstream. Getting objtool to understand retpolines is going to be a bit of a challenge. For now, take advantage of the fact that retpolines are patched in with alternatives. Just read the original (sane) non-alternative instruction, and ignore the patched-in retpoline. This allows objtool to understand the control flow *around* the retpoline, even if it can't yet follow what's inside. This means the ORC unwinder will fail to unwind from inside a retpoline, but will work fine otherwise. Signed-off-by: Josh Poimboeuf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk [dwmw2: Applies to tools/objtool/builtin-check.c not check.[ch]] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- tools/objtool/builtin-check.c | 64 +++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 293a23e27968..f789621cbdba 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -51,7 +51,7 @@ struct instruction { unsigned int len, state; unsigned char type; unsigned long immediate; - bool alt_group, visited; + bool alt_group, visited, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; struct list_head alts; @@ -352,6 +352,40 @@ static void add_ignores(struct objtool_file *file) } } +/* + * FIXME: For now, just ignore any alternatives which add retpolines. This is + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. + * But it at least allows objtool to understand the control flow *around* the + * retpoline. + */ +static int add_nospec_ignores(struct objtool_file *file) +{ + struct section *sec; + struct rela *rela; + struct instruction *insn; + + sec = find_section_by_name(file->elf, ".rela.discard.nospec"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.nospec entry"); + return -1; + } + + insn->ignore_alts = true; + } + + return 0; +} + /* * Find the destination instructions for all jumps. */ @@ -435,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); + /* + * FIXME: Thanks to retpolines, it's now considered + * normal for a function to call within itself. So + * disable this warning for now. + */ +#if 0 if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at offset 0x%lx", insn->sec, insn->offset, dest_off); return -1; } +#endif } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -601,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file) return ret; list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { - alt = malloc(sizeof(*alt)); - if (!alt) { - WARN("malloc failed"); - ret = -1; - goto out; - } orig_insn = find_insn(file, special_alt->orig_sec, special_alt->orig_off); @@ -617,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + /* Ignore retpoline alternatives. */ + if (orig_insn->ignore_alts) + continue; + new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -642,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + alt->insn = new_insn; list_add_tail(&alt->list, &orig_insn->alts); @@ -861,6 +907,10 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); + ret = add_nospec_ignores(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; -- GitLab From 4bf050da57d970ea19bb8643e4c278ffb7262228 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 29 Sep 2017 17:15:36 +0300 Subject: [PATCH 0930/1398] x86/asm: Use register variable to get stack pointer value commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream. Currently we use current_stack_pointer() function to get the value of the stack pointer register. Since commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") ... we have a stack register variable declared. It can be used instead of current_stack_pointer() function which allows to optimize away some excessive "mov %rsp, %" instructions: -mov %rsp,%rdx -sub %rdx,%rax -cmp $0x3fff,%rax -ja ffffffff810722fd +sub %rsp,%rax +cmp $0x3fff,%rax +ja ffffffff810722fa Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer and use it instead of the removed function. Signed-off-by: Andrey Ryabinin Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar [dwmw2: We want ASM_CALL_CONSTRAINT for retpoline] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 11 +++++++++++ arch/x86/include/asm/thread_info.h | 11 ----------- arch/x86/kernel/irq_32.c | 6 +++--- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/tlb.c | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7acb51c49fec..00523524edbf 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -125,4 +125,15 @@ /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif +#ifndef __ASSEMBLY__ +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95..bdf9c4c91572 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -152,17 +152,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1f38d9a4d9de..d4eb450144fd 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack) static inline void *current_stack(void) { - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) @@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -139,7 +139,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bd4e3d4d3625..322f433fbc76 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) * from double_fault. */ BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer()) >= THREAD_SIZE); + current_stack_pointer) >= THREAD_SIZE); preempt_enable_no_resched(); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 41205de487e7..578973ade71b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ - unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + unsigned int stack_pgd_index = pgd_index(current_stack_pointer); pgd_t *pgd = next->pgd + stack_pgd_index; -- GitLab From 2bb5de42f254bf5addedf17c9c25c68d65639b55 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:25 +0000 Subject: [PATCH 0931/1398] x86/retpoline: Add initial retpoline support commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream. Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide the corresponding thunks. Provide assembler macros for invoking the thunks in the same way that GCC does, from native and inline assembler. This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In some circumstances, IBRS microcode features may be used instead, and the retpoline can be disabled. On AMD CPUs if lfence is serialising, the retpoline can be dramatically simplified to a simple "lfence; jmp *\reg". A future patch, after it has been verified that lfence really is serialising in all circumstances, can enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition to X86_FEATURE_RETPOLINE. Do not align the retpoline in the altinstr section, because there is no guarantee that it stays aligned when it's copied over the oldinstr during alternative patching. [ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] [ tglx: Put actual function CALL/JMP in front of the macros, convert to symbolic labels ] [ dwmw2: Convert back to numeric labels, merge objtool fixes ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 13 +++ arch/x86/Makefile | 10 ++ arch/x86/include/asm/asm-prototypes.h | 25 +++++ arch/x86/include/asm/cpufeatures.h | 3 + arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 4 + arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 48 ++++++++++ 8 files changed, 232 insertions(+) create mode 100644 arch/x86/include/asm/nospec-branch.h create mode 100644 arch/x86/lib/retpoline.S diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 447edeb0cfcb..0ca4d12ce95c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -408,6 +408,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + ---help--- + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a360..1e1a7334db0f 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -182,6 +182,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + else + $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) + endif +endif + archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 44b8762fa0c7..b15aa4083dfd 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -10,7 +10,32 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); #endif + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +INDIRECT_THUNK(sp) +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f364c8919eb8..4467568a531b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,9 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 000000000000..e20e92ef2ca8 --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NOSPEC_BRANCH_H__ +#define __NOSPEC_BRANCH_H__ + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before a retpoline alternative. It tells + * objtool where the retpolines are so that it can make sense of the control + * flow by just reading the original instruction(s) and ignoring the + * alternatives. + */ +.macro ANNOTATE_NOSPEC_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.nospec + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_NOSPEC_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.nospec\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b9ae04ddf5d..6e885cc68152 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +#ifdef CONFIG_RETPOLINE + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +#endif + fpu__init_system(c); #ifdef CONFIG_X86_32 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 34a74131a12c..6bf1898ddf49 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 000000000000..cb45c6cb465f --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk.\reg + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +GENERATE_THUNK(_ASM_SP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif -- GitLab From 8f96937ee30484aebf687f33e65e8be7ecace13a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:26 +0000 Subject: [PATCH 0932/1398] x86/spectre: Add boot time option to select Spectre v2 mitigation commit da285121560e769cc31797bba6422eea71d473e0 upstream. Add a spectre_v2= option to select the mitigation used for the indirect branch speculation vulnerability. Currently, the only option available is retpoline, in its various forms. This will be expanded to cover the new IBRS/IBPB microcode features. The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a serializing instruction, which is indicated by the LFENCE_RDTSC feature. [ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS integration becomes simple ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 28 +++++ arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 158 ++++++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 - 4 files changed, 195 insertions(+), 5 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2b1d782eda6f..4c2667aa4634 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2691,6 +2691,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 + (indirect branch prediction) vulnerability. System may + allow data leaks with this option, which is equivalent + to spectre_v2=off. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3944,6 +3949,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable + + Selecting 'on' will, and 'auto' may, choose a + mitigation method at run time according to the + CPU, the available microcode, the setting of the + CONFIG_RETPOLINE configuration option, and the + compiler with which the kernel was built. + + Specific mitigations can also be selected manually: + + retpoline - replace indirect branches + retpoline,generic - google's original retpoline + retpoline,amd - AMD-specific minimal thunk + + Not specifying this option is equivalent to + spectre_v2=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e20e92ef2ca8..ea034fa6e261 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -124,5 +124,15 @@ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE_MINIMAL, + SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS, +}; + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cb6b4f9d0b7a..49d25ddf0e9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,9 @@ #include #include #include + +#include +#include #include #include #include @@ -20,6 +23,8 @@ #include #include +static void __init spectre_v2_select_mitigation(void); + void __init check_bugs(void) { identify_boot_cpu(); @@ -29,6 +34,9 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -61,6 +69,153 @@ void __init check_bugs(void) #endif } +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static inline bool retp_compiler(void) +{ + return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ + char arg[20]; + int ret; + + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret > 0) { + if (match_option(arg, ret, "off")) { + goto disable; + } else if (match_option(arg, ret, "on")) { + spec2_print_if_secure("force enabled on command line."); + return SPECTRE_V2_CMD_FORCE; + } else if (match_option(arg, ret, "retpoline")) { + spec2_print_if_insecure("retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE; + } else if (match_option(arg, ret, "retpoline,amd")) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + spec2_print_if_insecure("AMD retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_AMD; + } else if (match_option(arg, ret, "retpoline,generic")) { + spec2_print_if_insecure("generic retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_GENERIC; + } else if (match_option(arg, ret, "auto")) { + return SPECTRE_V2_CMD_AUTO; + } + } + + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_AUTO; +disable: + spec2_print_if_insecure("disabled on command line."); + return SPECTRE_V2_CMD_NONE; +} + +static void __init spectre_v2_select_mitigation(void) +{ + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + + /* + * If the CPU is not affected and the command line mode is NONE or AUTO + * then nothing to do. + */ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) + return; + + switch (cmd) { + case SPECTRE_V2_CMD_NONE: + return; + + case SPECTRE_V2_CMD_FORCE: + /* FALLTRHU */ + case SPECTRE_V2_CMD_AUTO: + goto retpoline_auto; + + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; + break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_generic; + break; + case SPECTRE_V2_CMD_RETPOLINE: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + } + pr_err("kernel not compiled with retpoline; no mitigation available!"); + return; + +retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : + SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : + SPECTRE_V2_RETPOLINE_MINIMAL; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); +} + +#undef pr_fmt + #ifdef CONFIG_SYSFS ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -85,6 +240,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6e885cc68152..7b9ae04ddf5d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,10 +889,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -#ifdef CONFIG_RETPOLINE - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -#endif - fpu__init_system(c); #ifdef CONFIG_X86_32 -- GitLab From 2adc2f74449f7d65ee20675aa2987c3a7446bfab Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:27 +0000 Subject: [PATCH 0933/1398] x86/retpoline/crypto: Convert crypto assembler indirect jumps commit 9697fa39efd3fc3692f2949d4045f393ec58450b upstream. Convert all indirect jumps in crypto assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_asm.S | 5 +++-- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060..fa8801b35e51 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * The following macros are used to move an (un)aligned 16 byte value to/from @@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f6..77ff4de2224d 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,6 +17,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way: vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656d..7384342fbb41 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -12,6 +12,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way: vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b..174fd4146043 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -45,6 +45,7 @@ #include #include +#include ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction @@ -172,7 +173,7 @@ continue_block: movzxw (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp - jmp *bufp + JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: -- GitLab From 8b1bacc3218c75707175782ed70c2aa916d366bb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:28 +0000 Subject: [PATCH 0934/1398] x86/retpoline/entry: Convert entry assembler indirect jumps commit 2641f08bb7fc63a636a2b18173221d7040a3512e upstream. Convert indirect jumps in core 32/64bit entry assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return address after the 'call' instruction must be *precisely* at the .Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work, and the use of alternatives will mess that up unless we play horrid games to prepend with NOPs and make the variants the same length. It's not worth it; in the case where we ALTERNATIVE out the retpoline, the first instruction at __x86.indirect_thunk.rax is going to be a bare jmp *%rax anyway. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/entry/entry_64.S | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index edba8606b99a..7b95f35ba818 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -45,6 +45,7 @@ #include #include #include +#include .section .entry.text, "ax" @@ -260,7 +261,7 @@ ENTRY(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - call *%ebx + CALL_NOSPEC %ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -1062,7 +1063,7 @@ error_code: movl %ecx, %es TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call *%edi + CALL_NOSPEC %edi jmp ret_from_exception END(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index af4e58132d91..b9c901ce6582 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include #include #include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath: * It might end up jumping to the slow path. If it jumps, RAX * and all argument registers are clobbered. */ +#ifdef CONFIG_RETPOLINE + movq sys_call_table(, %rax, 8), %rax + call __x86_indirect_thunk_rax +#else call *sys_call_table(, %rax, 8) +#endif .Lentry_SYSCALL_64_after_fastpath_call: movq %rax, RAX(%rsp) @@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64) jmp entry_SYSCALL64_slow_path 1: - jmp *%rax /* Called from C */ + JMP_NOSPEC %rax /* Called from C */ END(stub_ptregs_64) .macro ptregs_stub func @@ -457,7 +463,7 @@ ENTRY(ret_from_fork) 1: /* kernel thread */ movq %r12, %rdi - call *%rbx + CALL_NOSPEC %rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() -- GitLab From 83d7658362cc65e3d8c71b6002b107dc1f5bc55a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:29 +0000 Subject: [PATCH 0935/1398] x86/retpoline/ftrace: Convert ftrace assembler indirect jumps commit 9351803bd803cdbeb9b5a7850b7b6f464806e3db upstream. Convert all indirect jumps in ftrace assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/kernel/mcount_64.S | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 7b95f35ba818..bdc9aeaf2e45 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -985,7 +985,8 @@ trace: movl 0x4(%ebp), %edx subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + movl ftrace_trace_function, %ecx + CALL_NOSPEC %ecx popl %edx popl %ecx @@ -1021,7 +1022,7 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - jmp *%ecx + JMP_NOSPEC %ecx #endif #ifdef CONFIG_TRACING diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 7b0d3da52fb4..287ec3bc141f 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -8,7 +8,7 @@ #include #include #include - +#include .code64 .section .entry.text, "ax" @@ -290,8 +290,9 @@ trace: * ip and parent ip are used and the list function is called when * function tracing is enabled. */ - call *ftrace_trace_function + movq ftrace_trace_function, %r8 + CALL_NOSPEC %r8 restore_mcount_regs jmp fgraph_trace @@ -334,5 +335,5 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - jmp *%rdi + JMP_NOSPEC %rdi #endif -- GitLab From 9e37da4c3de1b6cca215a8515491de75360022ba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:30 +0000 Subject: [PATCH 0936/1398] x86/retpoline/hyperv: Convert assembler indirect jumps commit e70e5892b28c18f517f29ab6e83bd57705104b31 upstream. Convert all indirect jumps in hyperv inline asm code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk [ backport to 4.9, hopefully correct, not tested... - gregkh ] Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index e0a8216ecf2b..13c32eb40738 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" /* The one and only */ @@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) return (u64)ULLONG_MAX; __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); - __asm__ __volatile__("call *%3" : "=a" (hv_status) : + __asm__ __volatile__(CALL_NOSPEC : + "=a" (hv_status) : "c" (control), "d" (input_address), - "m" (hypercall_page)); + THUNK_TARGET(hypercall_page)); return hv_status; @@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hypercall_page) return (u64)ULLONG_MAX; - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), + __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi), "=a"(hv_status_lo) : "d" (control_hi), "a" (control_lo), "b" (input_address_hi), "c" (input_address_lo), "D"(output_address_hi), - "S"(output_address_lo), "m" (hypercall_page)); + "S"(output_address_lo), + THUNK_TARGET(hypercall_page)); return hv_status_lo | ((u64)hv_status_hi << 32); #endif /* !x86_64 */ -- GitLab From 87a1fe36250d65e0bffb0199ebd144ea80f87cd7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:31 +0000 Subject: [PATCH 0937/1398] x86/retpoline/xen: Convert Xen hypercall indirect jumps commit ea08816d5b185ab3d09e95e393f265af54560350 upstream. Convert indirect call in Xen hypercall to use non-speculative sequence, when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Reviewed-by: Juergen Gross Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 8b678af866f7..ccdc23d89b60 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -216,9 +217,9 @@ privcmd_call(unsigned call, __HYPERCALL_5ARG(a1, a2, a3, a4, a5); stac(); - asm volatile("call *%[call]" + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM - : [call] "a" (&hypercall_page[call]) + : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); clac(); -- GitLab From a590960ae6ea94a8b35b5abd52ba680dba62db66 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:32 +0000 Subject: [PATCH 0938/1398] x86/retpoline/checksum32: Convert assembler indirect jumps commit 5096732f6f695001fa2d6f1335a2680b37912c69 upstream. Convert all indirect jumps in 32bit checksum assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/checksum_32.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4d34bb548b41..46e71a74e612 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -29,7 +29,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -156,7 +157,7 @@ ENTRY(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) -- GitLab From 276e300447109c92a4f807d4ed7da01c5f590568 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 11 Jan 2018 21:46:33 +0000 Subject: [PATCH 0939/1398] x86/retpoline/irq32: Convert assembler indirect jumps commit 7614e913db1f40fff819b36216484dc3808995d4 upstream. Convert all indirect jumps in 32bit irq inline asm code to use non speculative sequences. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/irq_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d4eb450144fd..2763573ee1d2 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -54,11 +55,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=b" (stack) : "0" (stack), - "D"(func) + [thunk_target] "D"(func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -94,11 +95,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) call_on_stack(print_stack_overflow, isp); asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=a" (arg1), "=b" (isp) : "0" (desc), "1" (isp), - "D" (desc->handle_irq) + [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } -- GitLab From c1ddd99a029636e234a800f28790a60d6ac0318f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 11:11:27 +0000 Subject: [PATCH 0940/1398] x86/retpoline: Fill return stack buffer on vmexit commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream. In accordance with the Intel and AMD documentation, we need to overwrite all entries in the RSB on exiting a guest, to prevent malicious branch target predictions from affecting the host kernel. This is needed both for retpoline and for IBRS. [ak: numbers again for the RSB stuffing labels] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 78 +++++++++++++++++++++++++++- arch/x86/kvm/svm.c | 4 ++ arch/x86/kvm/vmx.c | 4 ++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ea034fa6e261..402a11c803c3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,6 +7,48 @@ #include #include +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -74,6 +116,20 @@ #else call *\reg #endif +.endm + + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: +#endif .endm #else /* __ASSEMBLY__ */ @@ -119,7 +175,7 @@ X86_FEATURE_RETPOLINE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) -#else /* No retpoline */ +#else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -134,5 +190,25 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future + * CPUs with IBRS_ATT *might* it be avoided. + */ +static inline void vmexit_fill_RSB(void) +{ +#ifdef CONFIG_RETPOLINE + unsigned long loops = RSB_CLEAR_LOOPS / 2; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=&r" (loops), ASM_CALL_CONSTRAINT + : "r" (loops) : "memory" ); +#endif +} #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8d96f9ce1926..24af898fb3a6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "trace.h" @@ -4917,6 +4918,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab6605425497..3ca6d15994e4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -9026,6 +9027,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); -- GitLab From c05d544d53437de40cdc2d1fa5297dfda558bcdb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Jan 2018 17:16:51 -0800 Subject: [PATCH 0941/1398] selftests/x86: Add test_vsyscall commit 352909b49ba0d74929b96af6dfbefc854ab6ebb5 upstream. This tests that the vsyscall entries do what they're expected to do. It also confirms that attempts to read the vsyscall page behave as expected. If changes are made to the vsyscall code or its memory map handling, running this test in all three of vsyscall=none, vsyscall=emulate, and vsyscall=native are helpful. (Because it's easy, this also compares the vsyscall results to their vDSO equivalents.) Note to KAISER backporters: please test this under all three vsyscall modes. Also, in the emulate and native modes, make sure that test_vsyscall_64 agrees with the command line or config option as to which mode you're in. It's quite easy to mess up the kernel such that native mode accidentally emulates or vice versa. Greg, etc: please backport this to all your Meltdown-patched kernels. It'll help make sure the patches didn't regress vsyscalls. CSigned-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/2b9c5a174c1d60fd7774461d518aa75598b1d8fd.1515719552.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/test_vsyscall.c | 500 ++++++++++++++++++++ 2 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/test_vsyscall.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 6300c1a41ff6..4af37bfe4aea 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,7 +6,7 @@ include ../lib.mk TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ - protection_keys + protection_keys test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c new file mode 100644 index 000000000000..6e0bd52ad53d --- /dev/null +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +# define VSYS(x) (x) +#else +# define VSYS(x) 0 +#endif + +#ifndef SYS_getcpu +# ifdef __x86_64__ +# define SYS_getcpu 309 +# else +# define SYS_getcpu 318 +# endif +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +/* vsyscalls and vDSO */ +bool should_read_vsyscall = false; + +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); +gtod_t vdso_gtod; + +typedef int (*vgettime_t)(clockid_t, struct timespec *); +vgettime_t vdso_gettime; + +typedef long (*time_func_t)(time_t *t); +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); +time_func_t vdso_time; + +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); +getcpu_t vdso_getcpu; + +static void init_vdso(void) +{ + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + printf("[WARN]\tfailed to find vDSO\n"); + return; + } + + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gtod) + printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); + + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_gettime) + printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); + + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); + if (!vdso_time) + printf("[WARN]\tfailed to find time in vDSO\n"); + + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); + if (!vdso_getcpu) { + /* getcpu() was never wired up in the 32-bit vDSO. */ + printf("[%s]\tfailed to find getcpu in vDSO\n", + sizeof(long) == 8 ? "WARN" : "NOTE"); + } +} + +static int init_vsys(void) +{ +#ifdef __x86_64__ + int nerrs = 0; + FILE *maps; + char line[128]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); + should_read_vsyscall = true; + return 0; + } + + while (fgets(line, sizeof(line), maps)) { + char r, x; + void *start, *end; + char name[128]; + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + printf("\tvsyscall map: %s", line); + + if (start != (void *)0xffffffffff600000 || + end != (void *)0xffffffffff601000) { + printf("[FAIL]\taddress range is nonsense\n"); + nerrs++; + } + + printf("\tvsyscall permissions are %c-%c\n", r, x); + should_read_vsyscall = (r == 'r'); + if (x != 'x') { + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("\tno vsyscall map in /proc/self/maps\n"); + should_read_vsyscall = false; + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + return nerrs; +#else + return 0; +#endif +} + +/* syscalls */ +static inline long sys_gtod(struct timeval *tv, struct timezone *tz) +{ + return syscall(SYS_gettimeofday, tv, tz); +} + +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(SYS_clock_gettime, id, ts); +} + +static inline long sys_time(time_t *t) +{ + return syscall(SYS_time, t); +} + +static inline long sys_getcpu(unsigned * cpu, unsigned * node, + void* cache) +{ + return syscall(SYS_getcpu, cpu, node, cache); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static double tv_diff(const struct timeval *a, const struct timeval *b) +{ + return (double)(a->tv_sec - b->tv_sec) + + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; +} + +static int check_gtod(const struct timeval *tv_sys1, + const struct timeval *tv_sys2, + const struct timezone *tz_sys, + const char *which, + const struct timeval *tv_other, + const struct timezone *tz_other) +{ + int nerrs = 0; + double d1, d2; + + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { + printf("[FAIL] %s tz mismatch\n", which); + nerrs++; + } + + d1 = tv_diff(tv_other, tv_sys1); + d2 = tv_diff(tv_sys2, tv_other); + printf("\t%s time offsets: %lf %lf\n", which, d1, d2); + + if (d1 < 0 || d2 < 0) { + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); + nerrs++; + } else { + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); + } + + return nerrs; +} + +static int test_gtod(void) +{ + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; + struct timezone tz_sys, tz_vdso, tz_vsys; + long ret_vdso = -1; + long ret_vsys = -1; + int nerrs = 0; + + printf("[RUN]\ttest gettimeofday()\n"); + + if (sys_gtod(&tv_sys1, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + if (vdso_gtod) + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); + if (vgtod) + ret_vsys = vgtod(&tv_vsys, &tz_vsys); + if (sys_gtod(&tv_sys2, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + + if (vdso_gtod) { + if (ret_vdso == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); + } else { + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); + nerrs++; + } + } + + if (vgtod) { + if (ret_vsys == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); + } else { + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); + nerrs++; + } + } + + return nerrs; +} + +static int test_time(void) { + int nerrs = 0; + + printf("[RUN]\ttest time()\n"); + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; + t_sys1 = sys_time(&t2_sys1); + if (vdso_time) + t_vdso = vdso_time(&t2_vdso); + if (vtime) + t_vsys = vtime(&t2_vsys); + t_sys2 = sys_time(&t2_sys2); + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); + nerrs++; + return nerrs; + } + + if (vdso_time) { + if (t_vdso < 0 || t_vdso != t2_vdso) { + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); + nerrs++; + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); + nerrs++; + } else { + printf("[OK]\tvDSO time() is okay\n"); + } + } + + if (vtime) { + if (t_vsys < 0 || t_vsys != t2_vsys) { + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); + nerrs++; + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); + nerrs++; + } else { + printf("[OK]\tvsyscall time() is okay\n"); + } + } + + return nerrs; +} + +static int test_getcpu(int cpu) +{ + int nerrs = 0; + long ret_sys, ret_vdso = -1, ret_vsys = -1; + + printf("[RUN]\tgetcpu() on CPU %d\n", cpu); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tfailed to force CPU %d\n", cpu); + return nerrs; + } + + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; + unsigned node = 0; + bool have_node = false; + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); + if (vdso_getcpu) + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + if (vgetcpu) + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); + + if (ret_sys == 0) { + if (cpu_sys != cpu) { + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); + nerrs++; + } + + have_node = true; + node = node_sys; + } + + if (vdso_getcpu) { + if (ret_vdso) { + printf("[FAIL]\tvDSO getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vdso; + } + + if (cpu_vdso != cpu) { + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct CPU\n"); + } + + if (node_vdso != node) { + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct node\n"); + } + } + } + + if (vgetcpu) { + if (ret_vsys) { + printf("[FAIL]\tvsyscall getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vsys; + } + + if (cpu_vsys != cpu) { + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct CPU\n"); + } + + if (node_vsys != node) { + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct node\n"); + } + } + } + + return nerrs; +} + +static int test_vsys_r(void) +{ +#ifdef __x86_64__ + printf("[RUN]\tChecking read access to the vsyscall page\n"); + bool can_read; + if (sigsetjmp(jmpbuf, 1) == 0) { + *(volatile int *)0xffffffffff600000; + can_read = true; + } else { + can_read = false; + } + + if (can_read && !should_read_vsyscall) { + printf("[FAIL]\tWe have read access, but we shouldn't\n"); + return 1; + } else if (!can_read && should_read_vsyscall) { + printf("[FAIL]\tWe don't have read access, but we should\n"); + return 1; + } else { + printf("[OK]\tgot expected result\n"); + } +#endif + + return 0; +} + + +#ifdef __x86_64__ +#define X86_EFLAGS_TF (1UL << 8) +static volatile sig_atomic_t num_vsyscall_traps; + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; + + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) + num_vsyscall_traps++; +} + +static int test_native_vsyscall(void) +{ + time_t tmp; + bool is_native; + + if (!vtime) + return 0; + + printf("[RUN]\tchecking for native vsyscall\n"); + sethandler(SIGTRAP, sigtrap, 0); + set_eflags(get_eflags() | X86_EFLAGS_TF); + vtime(&tmp); + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + + /* + * If vsyscalls are emulated, we expect a single trap in the + * vsyscall page -- the call instruction will trap with RIP + * pointing to the entry point before emulation takes over. + * In native mode, we expect two traps, since whatever code + * the vsyscall page contains will be more than just a ret + * instruction. + */ + is_native = (num_vsyscall_traps > 1); + + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "native" : "emulated"), + (int)num_vsyscall_traps); + + return 0; +} +#endif + +int main(int argc, char **argv) +{ + int nerrs = 0; + + init_vdso(); + nerrs += init_vsys(); + + nerrs += test_gtod(); + nerrs += test_time(); + nerrs += test_getcpu(0); + nerrs += test_getcpu(1); + + sethandler(SIGSEGV, sigsegv, 0); + nerrs += test_vsys_r(); + +#ifdef __x86_64__ + nerrs += test_native_vsyscall(); +#endif + + return nerrs ? 1 : 0; +} -- GitLab From 44f1eae7fe6597381ce550027fdfa2e578a96ad8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 22:13:29 +0100 Subject: [PATCH 0942/1398] x86/retpoline: Remove compile time warning commit b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 upstream. Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler does not have retpoline support. Linus rationale for this is: It's wrong because it will just make people turn off RETPOLINE, and the asm updates - and return stack clearing - that are independent of the compiler are likely the most important parts because they are likely the ones easiest to target. And it's annoying because most people won't be able to do anything about it. The number of people building their own compiler? Very small. So if their distro hasn't got a compiler yet (and pretty much nobody does), the warning is just annoying crap. It is already properly reported as part of the sysfs interface. The compile-time warning only encourages bad things. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Requested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uAtw@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1e1a7334db0f..cd22cb8ebd42 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -187,8 +187,6 @@ ifdef CONFIG_RETPOLINE RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ifneq ($(RETPOLINE_CFLAGS),) KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - else - $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) endif endif -- GitLab From 92e8f204947484f642b77836c0ce9f710d83f836 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 15 Jan 2018 11:00:54 -0600 Subject: [PATCH 0943/1398] objtool: Fix retpoline support for pre-ORC objtool Objtool 1.0 (pre-ORC) produces the following warning when it encounters a retpoline: arch/x86/crypto/camellia-aesni-avx2-asm_64.o: warning: objtool: .altinstr_replacement+0xf: return instruction outside of a callable function That warning is meant to catch GCC bugs and missing ENTRY/ENDPROC annotations, neither of which are applicable to alternatives. Silence the warning for alternative instructions, just like objtool 2.0 already does. Reported-by: David Woodhouse Signed-off-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- tools/objtool/builtin-check.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index f789621cbdba..a688a857a7ae 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -1230,6 +1230,14 @@ static int validate_uncallable_instructions(struct objtool_file *file) for_each_insn(file, insn) { if (!insn->visited && insn->type == INSN_RETURN) { + + /* + * Don't warn about call instructions in unvisited + * retpoline alternatives. + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + WARN_FUNC("return instruction outside of a callable function", insn->sec, insn->offset); warnings++; -- GitLab From 1b92c48a2eeb8e4c77a27c93052ba95dc6e1cdda Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 15 Jan 2018 11:44:14 -0500 Subject: [PATCH 0944/1398] x86/pti/efi: broken conversion from efi to kernel page table The page table order must be increased for EFI table in order to avoid a bug where NMI tries to change the page table to kernel page table, while efi page table is active. For more disccussion about this bug, see this thread: http://lkml.iu.edu/hypermail/linux/kernel/1801.1/00951.html Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgalloc.h | 11 +++++++++++ arch/x86/mm/pgtable.c | 7 ------- arch/x86/platform/efi/efi_64.c | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index b6d425999f99..1178a51b77f3 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -27,6 +27,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + /* * Allocate and free page tables. */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5aaec8effc5f..209b9465e97a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd) } #else -/* - * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ -#define PGD_ALLOCATION_ORDER kaiser_enabled - static inline pgd_t *_pgd_alloc(void) { return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2f25a363068c..dcb2d9d185a2 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; -- GitLab From b8cf9ff79d63a3ac5567bd7cbb9445bfed193f4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Jan 2018 09:39:00 +0100 Subject: [PATCH 0945/1398] Linux 4.9.77 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2637f0ed0a07..aba553531d6a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 76 +SUBLEVEL = 77 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 95bfec64e9323ecce2da4ca312e50aba56a1514f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Jan 2018 18:19:39 +0000 Subject: [PATCH 0946/1398] FROMLIST: arm64: Move post_ttbr_update_workaround to C code We will soon need to invoke a CPU-specific function pointer after changing page tables, so move post_ttbr_update_workaround out into C code to make this possible. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon (cherry picked from git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git commit 400a169447ad2268b023637a118fba27246bcc19) Change-Id: I4e6edb3dcb6aabe9c17e4698619a093e76495b36 Signed-off-by: Greg Hackmann --- arch/arm64/include/asm/assembler.h | 13 ------------- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/context.c | 9 +++++++++ arch/arm64/mm/proc.S | 3 +-- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index eb9fad288b04..e008b76cff0b 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -429,17 +429,4 @@ alternative_endif mrs \rd, sp_el0 .endm -/* - * Errata workaround post TTBRx_EL1 update. - */ - .macro post_ttbr_update_workaround -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -alternative_if ARM64_WORKAROUND_CAVIUM_27456 - ic iallu - dsb nsh - isb -alternative_else_nop_endif -#endif - .endm - #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3448a6250ac2..dafa13766f91 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -224,7 +224,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr_update_workaround + bl post_ttbr_update_workaround .endif 1: .if \el != 0 diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 05a885c14c83..d45dea9f14fc 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -238,6 +238,15 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) cpu_switch_mm(mm->pgd, mm); } +/* Errata workaround post TTBRx_EL1 update. */ +asmlinkage void post_ttbr_update_workaround(void) +{ + asm(ALTERNATIVE("nop; nop; nop", + "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456, + CONFIG_CAVIUM_ERRATUM_27456)); +} + static int asids_init(void) { asid_bits = get_cpu_asid_bits(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c1e868a036b5..9a2d800c33da 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -139,8 +139,7 @@ ENTRY(cpu_do_switch_mm) isb msr ttbr0_el1, x0 // now update TTBR0 isb - post_ttbr_update_workaround - ret + b post_ttbr_update_workaround // Back to C code... ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" -- GitLab From 87883134eb7130d6677ef5449e8c36f42a729c78 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 10 Jan 2018 13:18:30 +0000 Subject: [PATCH 0947/1398] FROMLIST: arm64: kpti: Fix the interaction between ASID switching and software PAN With ARM64_SW_TTBR0_PAN enabled, the exception entry code checks the active ASID to decide whether user access was enabled (non-zero ASID) when the exception was taken. On return from exception, if user access was previously disabled, it re-instates TTBR0_EL1 from the per-thread saved value (updated in switch_mm() or efi_set_pgd()). Commit 7655abb95386 ("arm64: mm: Move ASID from TTBR0 to TTBR1") makes a TTBR0_EL1 + ASID switching non-atomic. Subsequently, commit 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") changes the __uaccess_ttbr0_disable() function and asm macro to first write the reserved TTBR0_EL1 followed by the ASID=0 update in TTBR1_EL1. If an exception occurs between these two, the exception return code will re-instate a valid TTBR0_EL1. Similar scenario can happen in cpu_switch_mm() between setting the reserved TTBR0_EL1 and the ASID update in cpu_do_switch_mm(). This patch reverts the entry.S check for ASID == 0 to TTBR0_EL1 and disables the interrupts around the TTBR0_EL1 and ASID switching code in __uaccess_ttbr0_disable(). It also ensures that, when returning from the EFI runtime services, efi_set_pgd() doesn't leave a non-zero ASID in TTBR1_EL1 by using uaccess_ttbr0_{enable,disable}. The accesses to current_thread_info()->ttbr0 are updated to use READ_ONCE/WRITE_ONCE. As a safety measure, __uaccess_ttbr0_enable() always masks out any existing non-zero ASID TTBR1_EL1 before writing in the new ASID. Fixes: 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") Acked-by: Will Deacon Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Reviewed-by: James Morse Tested-by: James Morse Co-developed-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git commit 6b88a32c7af68895134872cdec3b6bfdb532d94e) Change-Id: Idc1ff3224449f6c24ae5ad9c5eee8bfa0784cb23 [ghackmann@google.com: - adjust context - apply asm-uaccess.h changes to uaccess.h] Signed-off-by: Greg Hackmann --- arch/arm64/include/asm/efi.h | 12 +++++++----- arch/arm64/include/asm/mmu_context.h | 6 ++++-- arch/arm64/include/asm/uaccess.h | 21 +++++++++++++-------- arch/arm64/kernel/entry.S | 2 +- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/mm/proc.S | 3 +++ arch/arm64/xen/hypercall.S | 2 +- 11 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 7278ce729cd6..6c4f9e8febf3 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -85,12 +85,14 @@ static inline void efi_set_pgd(struct mm_struct *mm) if (mm != current->active_mm) { /* * Update the current thread's saved ttbr0 since it is - * restored as part of a return from exception. Set - * the hardware TTBR0_EL1 using cpu_switch_mm() - * directly to enable potential errata workarounds. + * restored as part of a return from exception. Enable + * access to the valid TTBR0_EL1 and invoke the errata + * workaround directly since there is no return from + * exception when invoking the EFI run-time services. */ update_saved_ttbr0(current, mm); - cpu_switch_mm(mm->pgd, mm); + uaccess_ttbr0_enable(); + post_ttbr_update_workaround(); } else { /* * Defer the switch to the current thread's TTBR0_EL1 @@ -98,7 +100,7 @@ static inline void efi_set_pgd(struct mm_struct *mm) * thread's saved ttbr0 corresponding to its active_mm * (if different from init_mm). */ - cpu_set_reserved_ttbr0(); + uaccess_ttbr0_disable(); if (current->active_mm != &init_mm) update_saved_ttbr0(current, current->active_mm); } diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3c2ff5183d48..6ad61e7d9725 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -176,9 +176,10 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) { if (system_uses_ttbr0_pan()) { + u64 ttbr; BUG_ON(mm->pgd == swapper_pg_dir); - task_thread_info(tsk)->ttbr0 = - virt_to_phys(mm->pgd) | ASID(mm) << 48; + ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr); } } #else @@ -226,5 +227,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev,next) switch_mm(prev, next, current) void verify_cpu_asid_bits(void); +void post_ttbr_update_workaround(void); #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index c8af98bf9961..95cbadeee3d5 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -131,16 +131,18 @@ static inline void set_fs(mm_segment_t fs) #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void __uaccess_ttbr0_disable(void) { - unsigned long ttbr; + unsigned long flags, ttbr; + local_irq_save(flags); ttbr = read_sysreg(ttbr1_el1); + ttbr &= ~TTBR_ASID_MASK; /* reserved_ttbr0 placed at the end of swapper_pg_dir */ write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); isb(); /* Set reserved ASID */ - ttbr &= ~TTBR_ASID_MASK; write_sysreg(ttbr, ttbr1_el1); isb(); + local_irq_restore(flags); } static inline void __uaccess_ttbr0_enable(void) @@ -153,10 +155,11 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - ttbr0 = current_thread_info()->ttbr0; + ttbr0 = READ_ONCE(current_thread_info()->ttbr0); /* Restore active ASID */ ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); isb(); @@ -449,11 +452,11 @@ extern __must_check long strnlen_user(const char __user *str, long n); #ifdef CONFIG_ARM64_SW_TTBR0_PAN .macro __uaccess_ttbr0_disable, tmp1 mrs \tmp1, ttbr1_el1 // swapper_pg_dir + bic \tmp1, \tmp1, #TTBR_ASID_MASK add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE - bic \tmp1, \tmp1, #TTBR_ASID_MASK msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm @@ -470,9 +473,11 @@ extern __must_check long strnlen_user(const char __user *str, long n); isb .endm - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption __uaccess_ttbr0_disable \tmp1 + restore_irq \tmp2 alternative_else_nop_endif .endm @@ -484,7 +489,7 @@ alternative_if_not ARM64_HAS_PAN alternative_else_nop_endif .endm #else - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 .endm .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 @@ -494,8 +499,8 @@ alternative_else_nop_endif /* * These macros are no-ops when UAO is present. */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 + .macro uaccess_disable_not_uao, tmp1, tmp2 + uaccess_ttbr0_disable \tmp1, \tmp2 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(1) alternative_else_nop_endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index dafa13766f91..32de48466ed0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -151,7 +151,7 @@ alternative_if ARM64_HAS_PAN alternative_else_nop_endif .if \el != 0 - mrs x21, ttbr1_el1 + mrs x21, ttbr0_el1 tst x21, #TTBR_ASID_MASK // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index dd65ca253eb4..07c7ad97ee28 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 - uaccess_disable_not_uao x2 + uaccess_disable_not_uao x2, x3 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 7e7e687c17ac..c7a7d9689e8f 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -67,7 +67,7 @@ ENTRY(__arch_copy_from_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 074d52fcd75b..e8bfaf19f778 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -68,7 +68,7 @@ ENTRY(__copy_in_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 67118444cde0..f6cfcc0441de 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -66,7 +66,7 @@ ENTRY(__arch_copy_to_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 74404536d704..82ecb6c5f015 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -72,7 +72,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU isb mov x0, #0 1: - uaccess_ttbr0_disable x1 + uaccess_ttbr0_disable x1, x2 ret 9: mov x0, #-EFAULT diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9a2d800c33da..89e3d95e13bd 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -134,6 +134,9 @@ ENDPROC(cpu_do_resume) ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + bfi x0, x1, #48, #16 // set the ASID field in TTBR0 +#endif bfi x2, x1, #48, #16 // set the ASID msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) isb diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index f5422520cb01..69711f24b743 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -106,6 +106,6 @@ ENTRY(privcmd_call) /* * Disable userspace access from kernel once the hyp call completed. */ - uaccess_ttbr0_disable x6 + uaccess_ttbr0_disable x6, x7 ret ENDPROC(privcmd_call); -- GitLab From 767826629337c9cb9cd0a07469270af4914aa2a2 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Tue, 20 Jun 2017 19:06:40 -0700 Subject: [PATCH 0948/1398] UPSTREAM: tcp: Add a tcp_filter hook before handle ack packet Currently in both ipv4 and ipv6 code path, the ack packet received when sk at TCP_NEW_SYN_RECV state is not filtered by socket filter or cgroup filter since it is handled from tcp_child_process and never reaches the tcp_filter inside tcp_v4_rcv or tcp_v6_rcv. Adding a tcp_filter hooks here can make sure all the ingress tcp packet can be correctly filtered. Signed-off-by: Chenbo Feng Signed-off-by: David S. Miller (cherry picked from commit 8fac365f63c866a00015fa13932d8ffc584518b8) Bug: 30950746 Change-Id: I9b687b5acab56aa3589f65925da64d47dc55492e --- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c19a4d26ec3a..e2af3e123ede 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1685,6 +1685,8 @@ int tcp_v4_rcv(struct sk_buff *skb) } if (nsk == sk) { reqsk_put(req); + } else if (tcp_filter(sk, skb)) { + goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 71b3f9681451..5ead08f43874 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1450,6 +1450,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); + } else if (tcp_filter(sk, skb)) { + goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); goto discard_and_relse; -- GitLab From 2c5c4ccd96a7bad68943141b9806d00659c8d2f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 17:44:43 -0700 Subject: [PATCH 0949/1398] UPSTREAM: tcp: fix possible deadlock in TCP stack vs BPF filter Filtering the ACK packet was not put at the right place. At this place, we already allocated a child and put it into accept queue. We absolutely need to call tcp_child_process() to release its spinlock, or we will deadlock at accept() or close() time. Found by syzkaller team (Thanks a lot !) Fixes: 8fac365f63c8 ("tcp: Add a tcp_filter hook before handle ack packet") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Chenbo Feng Signed-off-by: David S. Miller (cherry picked from commit d624d276d1ddacbcb12ad96832ce0c7b82cd25db) Bug: 64616244 Bug: 30950746 Change-Id: I9b0aa14252c9698fb7179ea7da3ada18b465f442 --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e2af3e123ede..89811cd3e609 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1678,6 +1678,8 @@ int tcp_v4_rcv(struct sk_buff *skb) */ sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1685,8 +1687,6 @@ int tcp_v4_rcv(struct sk_buff *skb) } if (nsk == sk) { reqsk_put(req); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5ead08f43874..1e90f1f4e301 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1442,6 +1442,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) } sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1450,8 +1452,6 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); goto discard_and_relse; -- GitLab From 9c3804bc9b62d936f1d48b0cc9a0144564ddcbcc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2017 12:44:47 -0700 Subject: [PATCH 0950/1398] UPSTREAM: tcp: fix a request socket leak While the cited commit fixed a possible deadlock, it added a leak of the request socket, since reqsk_put() must be called if the BPF filter decided the ACK packet must be dropped. Fixes: d624d276d1dd ("tcp: fix possible deadlock in TCP stack vs BPF filter") Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller (cherry picked from commit 1f3b359f1004bd34b7b0bad70b93e3c7af92a37b) Bug: 30950746 Change-Id: I59a1f0c70925ee4bd93ac277d785759958bd2b9e --- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 89811cd3e609..710d1af93efa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1678,9 +1678,9 @@ int tcp_v4_rcv(struct sk_buff *skb) */ sock_hold(sk); refcounted = true; - if (tcp_filter(sk, skb)) - goto discard_and_relse; - nsk = tcp_check_req(sk, skb, req, false); + nsk = NULL; + if (!tcp_filter(sk, skb)) + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); goto discard_and_relse; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1e90f1f4e301..82b7a8e0d798 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1442,9 +1442,9 @@ static int tcp_v6_rcv(struct sk_buff *skb) } sock_hold(sk); refcounted = true; - if (tcp_filter(sk, skb)) - goto discard_and_relse; - nsk = tcp_check_req(sk, skb, req, false); + nsk = NULL; + if (!tcp_filter(sk, skb)) + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); goto discard_and_relse; -- GitLab From e12a9c4458ff887217ce44e4adb5681d317475aa Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 18 Jan 2018 16:17:16 -0800 Subject: [PATCH 0951/1398] ANDROID: sdcardfs: Move default_normal to superblock Moving default_normal from mount info to superblock info as it doesn't need to change between mount points. Signed-off-by: Daniel Rosenberg Bug: 72158116 Change-Id: I16c6a0577c601b4f7566269f7e189fcf697afd4e --- fs/sdcardfs/inode.c | 7 ++++--- fs/sdcardfs/main.c | 4 ++-- fs/sdcardfs/sdcardfs.h | 10 ++++++---- fs/sdcardfs/super.c | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4a971e2ba406..e3a023aeb1e6 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -645,7 +645,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma */ copy_attrs(&tmp, inode); tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); - tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, inode->i_sb, top)); tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(inode), top); data_put(top); @@ -724,7 +724,7 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct */ copy_attrs(&tmp, inode); tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); - tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, dentry->d_sb, top)); tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(inode), top); tmp.i_size = i_size_read(inode); @@ -826,6 +826,7 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); struct sdcardfs_inode_data *top = top_data_get(info); + struct super_block *sb = inode->i_sb; if (!top) return -EINVAL; @@ -835,7 +836,7 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, info, top); stat->nlink = inode->i_nlink; stat->uid = make_kuid(&init_user_ns, top->d_uid); - stat->gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top)); stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index d5239b21cb8a..ac27bb301c5e 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -70,7 +70,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->reserved_mb = 0; /* by default, gid derivation is off */ opts->gid_derivation = false; - vfsopts->default_normal = false; + opts->default_normal = false; *debug = 0; @@ -126,7 +126,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->gid_derivation = true; break; case Opt_default_normal: - vfsopts->default_normal = true; + opts->default_normal = true; break; /* unknown option */ default: diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 365b302149c5..3da9fe94b772 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -220,13 +220,13 @@ struct sdcardfs_mount_options { userid_t fs_user_id; bool multiuser; bool gid_derivation; + bool default_normal; unsigned int reserved_mb; }; struct sdcardfs_vfsmount_options { gid_t gid; mode_t mask; - bool default_normal; }; extern int parse_options_remount(struct super_block *sb, char *options, int silent, @@ -414,11 +414,13 @@ static inline void set_top(struct sdcardfs_inode_info *info, } static inline int get_gid(struct vfsmount *mnt, + struct super_block *sb, struct sdcardfs_inode_data *data) { - struct sdcardfs_vfsmount_options *opts = mnt->data; + struct sdcardfs_vfsmount_options *vfsopts = mnt->data; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(sb); - if (opts->gid == AID_SDCARD_RW && !opts->default_normal) + if (vfsopts->gid == AID_SDCARD_RW && !sbi->options.default_normal) /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing @@ -427,7 +429,7 @@ static inline int get_gid(struct vfsmount *mnt, */ return AID_SDCARD_RW; else - return multiuser_get_uid(data->userid, opts->gid); + return multiuser_get_uid(data->userid, vfsopts->gid); } static inline int get_mode(struct vfsmount *mnt, diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index a28b40f5adc8..87d6f836592e 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -304,7 +304,7 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_printf(m, ",userid=%u", opts->fs_user_id); if (opts->gid_derivation) seq_puts(m, ",derive_gid"); - if (vfsopts->default_normal) + if (opts->default_normal) seq_puts(m, ",default_normal"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); -- GitLab From c9ca9d9d9b7968d5490415c87a17db4335f846c3 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 18 Dec 2017 09:28:39 -0700 Subject: [PATCH 0952/1398] libnvdimm, btt: Fix an incompatibility in the log layout commit 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 upstream. Due to a spec misinterpretation, the Linux implementation of the BTT log area had different padding scheme from other implementations, such as UEFI and NVML. This fixes the padding scheme, and defaults to it for new BTT layouts. We attempt to detect the padding scheme in use when probing for an existing BTT. If we detect the older/incompatible scheme, we continue using it. Reported-by: Juston Li Cc: Dan Williams Cc: Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") Signed-off-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/btt.c | 203 +++++++++++++++++++++++++++++++++++-------- drivers/nvdimm/btt.h | 45 +++++++++- 2 files changed, 212 insertions(+), 36 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 94733f73d37f..7121453ec047 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -183,13 +183,13 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { - WARN_ON(!ent); + WARN_ON(!log); return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE); } static struct dentry *debugfs_root; @@ -229,6 +229,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -247,6 +249,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -256,8 +263,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -265,23 +274,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -306,17 +315,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_info(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -324,7 +334,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -338,17 +348,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half); if (ret) @@ -419,16 +425,16 @@ static int btt_log_init(struct arena_info *arena) { int ret; u32 i; - struct log_entry log, zerolog; + struct log_entry ent, zerolog; memset(&zerolog, 0, sizeof(zerolog)); for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent); if (ret) return ret; ret = __btt_log_write(arena, i, 1, &zerolog); @@ -490,6 +496,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -545,8 +668,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -563,6 +685,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -653,6 +779,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + ret = btt_freelist_init(arena); if (ret) goto out; diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index b2f8651e5395..0f80b6b3d4a3 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -26,6 +26,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -44,12 +45,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -117,6 +158,7 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -147,6 +189,7 @@ struct arena_info { struct dentry *debugfs_dir; /* Arena flags */ u32 flags; + int log_index[2]; }; /** -- GitLab From bb7119eea22c1764d3aa5edf541872cf5365f172 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Apr 2017 09:34:12 +0200 Subject: [PATCH 0953/1398] scsi: sg: disable SET_FORCE_LOW_DMA commit 745dfa0d8ec26b24f3304459ff6e9eacc5c8351b upstream. The ioctl SET_FORCE_LOW_DMA has never worked since the initial git check-in, and the respective setting is nowadays handled correctly. So disable it entirely. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 30 +++++++++--------------------- include/scsi/sg.h | 1 - 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 184c7db1e0ca..cd9537ddc19f 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -149,7 +149,6 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ - char low_dma; /* as in parent but possibly overridden to 1 */ char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */ char cmd_q; /* 1 -> allow command queuing, 0 -> don't */ unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */ @@ -922,24 +921,14 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) /* strange ..., for backward compatibility */ return sfp->timeout_user; case SG_SET_FORCE_LOW_DMA: - result = get_user(val, ip); - if (result) - return result; - if (val) { - sfp->low_dma = 1; - if ((0 == sfp->low_dma) && !sfp->res_in_use) { - val = (int) sfp->reserve.bufflen; - sg_remove_scat(sfp, &sfp->reserve); - sg_build_reserve(sfp, val); - } - } else { - if (atomic_read(&sdp->detaching)) - return -ENODEV; - sfp->low_dma = sdp->device->host->unchecked_isa_dma; - } + /* + * N.B. This ioctl never worked properly, but failed to + * return an error value. So returning '0' to keep compability + * with legacy applications. + */ return 0; case SG_GET_LOW_DMA: - return put_user((int) sfp->low_dma, ip); + return put_user((int) sdp->device->host->unchecked_isa_dma, ip); case SG_GET_SCSI_ID: if (!access_ok(VERIFY_WRITE, p, sizeof (sg_scsi_id_t))) return -EFAULT; @@ -1860,6 +1849,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) int sg_tablesize = sfp->parentdp->sg_tablesize; int blk_size = buff_size, order; gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; + struct sg_device *sdp = sfp->parentdp; if (blk_size < 0) return -EFAULT; @@ -1885,7 +1875,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = num; } - if (sfp->low_dma) + if (sdp->device->host->unchecked_isa_dma) gfp_mask |= GFP_DMA; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) @@ -2148,8 +2138,6 @@ sg_add_sfp(Sg_device * sdp) sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; - sfp->low_dma = (SG_DEF_FORCE_LOW_DMA == 0) ? - sdp->device->host->unchecked_isa_dma : 1; sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; @@ -2608,7 +2596,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) jiffies_to_msecs(fp->timeout), fp->reserve.bufflen, (int) fp->reserve.k_use_sg, - (int) fp->low_dma); + (int) sdp->device->host->unchecked_isa_dma); seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan); diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 3afec7032448..20bc71c3e0b8 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -197,7 +197,6 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_DEFAULT_RETRIES 0 /* Defaults, commented if they differ from original sg driver */ -#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */ #define SG_DEF_FORCE_PACK_ID 0 #define SG_DEF_KEEP_ORPHAN 0 #define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */ -- GitLab From d8a3170db0deca6bfee32ad77f492caba9f6791d Mon Sep 17 00:00:00 2001 From: Li Jinyue Date: Thu, 14 Dec 2017 17:04:54 +0800 Subject: [PATCH 0954/1398] futex: Prevent overflow by strengthen input validation commit fbe0e839d1e22d88810f3ee3e2f1479be4c0aa4a upstream. UBSAN reports signed integer overflow in kernel/futex.c: UBSAN: Undefined behaviour in kernel/futex.c:2041:18 signed integer overflow: 0 - -2147483648 cannot be represented in type 'int' Add a sanity check to catch negative values of nr_wake and nr_requeue. Signed-off-by: Li Jinyue Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: dvhart@infradead.org Link: https://lkml.kernel.org/r/1513242294-31786-1-git-send-email-lijinyue@huawei.com Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 88bad86180ac..bb2265ae5cbc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1711,6 +1711,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_q *this, *next; WAKE_Q(wake_q); + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + if (requeue_pi) { /* * Requeue PI only works on two distinct uaddrs. This -- GitLab From e4ff9f294629b65df3f7982cd4faf86b2856b9d7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Jan 2018 23:11:03 +0100 Subject: [PATCH 0955/1398] ALSA: seq: Make ioctls race-free commit b3defb791b26ea0683a93a4f49c77ec45ec96f10 upstream. The ALSA sequencer ioctls have no protection against racy calls while the concurrent operations may lead to interfere with each other. As reported recently, for example, the concurrent calls of setting client pool with a combination of write calls may lead to either the unkillable dead-lock or UAF. As a slightly big hammer solution, this patch introduces the mutex to make each ioctl exclusive. Although this may reduce performance via parallel ioctl calls, usually it's not demanded for sequencer usages, hence it should be negligible. Reported-by: Luo Quan Reviewed-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 3 +++ sound/core/seq/seq_clientmgr.h | 1 + 2 files changed, 4 insertions(+) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 45ef5915462c..16580a82e1c8 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -221,6 +221,7 @@ static struct snd_seq_client *seq_create_client1(int client_index, int poolsize) rwlock_init(&client->ports_lock); mutex_init(&client->ports_mutex); INIT_LIST_HEAD(&client->ports_list_head); + mutex_init(&client->ioctl_mutex); /* find free slot in the client table */ spin_lock_irqsave(&clients_lock, flags); @@ -2127,7 +2128,9 @@ static long snd_seq_ioctl(struct file *file, unsigned int cmd, return -EFAULT; } + mutex_lock(&client->ioctl_mutex); err = handler->func(client, &buf); + mutex_unlock(&client->ioctl_mutex); if (err >= 0) { /* Some commands includes a bug in 'dir' field. */ if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT || diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index c6614254ef8a..0611e1e0ed5b 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h @@ -61,6 +61,7 @@ struct snd_seq_client { struct list_head ports_list_head; rwlock_t ports_lock; struct mutex ports_mutex; + struct mutex ioctl_mutex; int convert32; /* convert 32->64bit */ /* output pool */ -- GitLab From b9e168a0c629761774df687e88fca7e0e47381bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 23:48:05 +0100 Subject: [PATCH 0956/1398] ALSA: pcm: Remove yet superfluous WARN_ON() commit 23b19b7b50fe1867da8d431eea9cd3e4b6328c2c upstream. muldiv32() contains a snd_BUG_ON() (which is morphed as WARN_ON() with debug option) for checking the case of 0 / 0. This would be helpful if this happens only as a logical error; however, since the hw refine is performed with any data set provided by user, the inconsistent values that can trigger such a condition might be passed easily. Actually, syzbot caught this by passing some zero'ed old hw_params ioctl. So, having snd_BUG_ON() there is simply superfluous and rather harmful to give unnecessary confusions. Let's get rid of it. Reported-by: syzbot+7e6ee55011deeebce15d@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_lib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index e685e779a4b8..9306604f5070 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -578,7 +578,6 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b, { u_int64_t n = (u_int64_t) a * b; if (c == 0) { - snd_BUG_ON(!n); *r = 0; return UINT_MAX; } -- GitLab From fae704d5bd29771974e8cfe5f84480d9871b18cb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 08:34:28 +0100 Subject: [PATCH 0957/1398] ALSA: hda - Apply headphone noise quirk for another Dell XPS 13 variant commit e4c9fd10eb21376f44723c40ad12395089251c28 upstream. There is another Dell XPS 13 variant (SSID 1028:082a) that requires the existing fixup for reducing the headphone noise. This patch adds the quirk entry for that. BugLink: http://lkml.kernel.org/r/CAHXyb9ZCZJzVisuBARa+UORcjRERV8yokez=DP1_5O5isTz0ZA@mail.gmail.com Reported-and-tested-by: Francisco G. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4ef3b0067876..71a058fcf884 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5617,6 +5617,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), + SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- GitLab From 4b6e681f595224618feae64cb8299224e218f1eb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 10:53:18 +0100 Subject: [PATCH 0958/1398] ALSA: hda - Apply the existing quirk to iMac 14,1 commit 031f335cda879450095873003abb03ae8ed3b74a upstream. iMac 14,1 requires the same quirk as iMac 12,2, using GPIO 2 and 3 for headphone and speaker output amps. Add the codec SSID quirk entry (106b:0600) accordingly. BugLink: http://lkml.kernel.org/r/CAEw6Zyteav09VGHRfD5QwsfuWv5a43r0tFBNbfcHXoNrxVz7ew@mail.gmail.com Reported-by: Freaky Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cirrus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 80bbadc83721..d6e079f4ec09 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -408,6 +408,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { /*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/ /* codec SSID */ + SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), -- GitLab From 676109b28cadbb9c6e6fedee733f3b2111923f17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 23:19:49 +0100 Subject: [PATCH 0959/1398] timers: Unconditionally check deferrable base commit ed4bbf7910b28ce3c691aef28d245585eaabda06 upstream. When the timer base is checked for expired timers then the deferrable base must be checked as well. This was missed when making the deferrable base independent of base::nohz_active. Fixes: ced6d5c11d3e ("timers: Use deferrable base independent of base::nohz_active") Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Paul McKenney Cc: rt@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e872f7f05e8a..2d5cc7dfee14 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1696,7 +1696,7 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ if (time_before(jiffies, base->clk)) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) return; /* CPU is awake, so check the deferrable base. */ base++; -- GitLab From e4dc05ab8f5af1e26064f674fb475f44e687b397 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 18:13:05 -0600 Subject: [PATCH 0960/1398] af_key: fix buffer overread in verify_address_len() commit 06b335cb51af018d5feeff5dd4fd53847ddb675a upstream. If a message sent to a PF_KEY socket ended with one of the extensions that takes a 'struct sadb_address' but there were not enough bytes remaining in the message for the ->sa_family member of the 'struct sockaddr' which is supposed to follow, then verify_address_len() read past the end of the message, into uninitialized memory. Fix it by returning -EINVAL in this case. This bug was found using syzkaller with KMSAN. Reproducer: #include #include #include int main() { int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); char buf[24] = { 0 }; struct sadb_msg *msg = (void *)buf; struct sadb_address *addr = (void *)(msg + 1); msg->sadb_msg_version = PF_KEY_V2; msg->sadb_msg_type = SADB_DELETE; msg->sadb_msg_len = 3; addr->sadb_address_len = 1; addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; write(sock, buf, 24); } Reported-by: Alexander Potapenko Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 94bf810ad242..ea81a58a4ff6 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -401,6 +401,11 @@ static int verify_address_len(const void *p) #endif int len; + if (sp->sadb_address_len < + DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), + sizeof(uint64_t))) + return -EINVAL; + switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); -- GitLab From 0476e6d0b7521d78cbdc9e99e6cbbf616d849042 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 18:15:23 -0600 Subject: [PATCH 0961/1398] af_key: fix buffer overread in parse_exthdrs() commit 4e765b4972af7b07adcb1feb16e7a525ce1f6b28 upstream. If a message sent to a PF_KEY socket ended with an incomplete extension header (fewer than 4 bytes remaining), then parse_exthdrs() read past the end of the message, into uninitialized memory. Fix it by returning -EINVAL in this case. Reproducer: #include #include #include int main() { int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); char buf[17] = { 0 }; struct sadb_msg *msg = (void *)buf; msg->sadb_msg_version = PF_KEY_V2; msg->sadb_msg_type = SADB_DELETE; msg->sadb_msg_len = 2; write(sock, buf, 17); } Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index ea81a58a4ff6..6482b001f19a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -516,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void * uint16_t ext_type; int ext_len; + if (len < sizeof(*ehdr)) + return -EINVAL; + ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; -- GitLab From d303d0ca9afb8e5f644e1392c684d44308bab96e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 26 Nov 2017 15:31:04 +0200 Subject: [PATCH 0962/1398] iser-target: Fix possible use-after-free in connection establishment error commit cd52cb26e7ead5093635e98e07e221e4df482d34 upstream. In case we fail to establish the connection we must drain our pre-posted login recieve work request before continuing safely with connection teardown. Fixes: a060b5629ab0 ("IB/core: generic RDMA READ/WRITE API") Reported-by: Amrani, Ram Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 39d28375aa37..0983470929bd 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -747,6 +747,7 @@ isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + ib_drain_qp(isert_conn->qp); list_del_init(&isert_conn->node); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); -- GitLab From 997231f9fd7a4a8ad7c862736ab571e99d3800cb Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Mon, 20 Mar 2017 16:42:48 +0100 Subject: [PATCH 0963/1398] scsi: hpsa: fix volume offline state commit eb94588dabec82e012281608949a860f64752914 upstream. In a previous patch a hpsa_scsi_dev_t.volume_offline update line has been removed, so let us put it back.. Fixes: 85b29008d8 (hpsa: update check for logical volume status) Signed-off-by: Tomas Henzl Acked-by: Don Brace Signed-off-by: Martin K. Petersen Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 99623701fc3d..0b8db8a74d50 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3857,6 +3857,7 @@ static int hpsa_update_device_info(struct ctlr_info *h, if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); + this_device->volume_offline = volume_offline; if (volume_offline == HPSA_LV_FAILED) { rc = HPSA_LV_FAILED; dev_err(&h->pdev->dev, -- GitLab From 1ad4f2872c3b93216313a18bbf6e9f6d90e573d3 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 10 May 2017 21:03:37 +0800 Subject: [PATCH 0964/1398] sched/deadline: Zero out positive runtime after throttling constrained tasks commit ae83b56a56f8d9643dedbee86b457fa1c5d42f59 upstream. When a contrained task is throttled by dl_check_constrained_dl(), it may carry the remaining positive runtime, as a result when dl_task_timer() fires and calls replenish_dl_entity(), it will not be replenished correctly due to the positive dl_se->runtime. This patch assigns its runtime to 0 if positive after throttling. Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: df8eac8cafce ("sched/deadline: Throttle a constrained deadline task activated after the deadline) Link: http://lkml.kernel.org/r/1494421417-27550-1-git-send-email-xlpang@redhat.com Signed-off-by: Ingo Molnar Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index df5c32a0c6ed..3042881169b4 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -723,6 +723,8 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) return; dl_se->dl_throttled = 1; + if (dl_se->runtime > 0) + dl_se->runtime = 0; } } -- GitLab From abf67b1e788194a2d13a8e77f05a44cbf9eae655 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 17:49:25 +0000 Subject: [PATCH 0965/1398] x86/retpoline: Fill RSB on context switch for affected CPUs commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 11 +++++++++ arch/x86/entry/entry_64.S | 11 +++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bdc9aeaf2e45..a76dc738ec61 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -229,6 +229,17 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl %esi popl %edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b9c901ce6582..3a15023bdf1a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -427,6 +427,17 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq %r15 popq %r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4467568a531b..2f60cb5e9360 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -200,6 +200,7 @@ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 49d25ddf0e9f..8cacf62ec458 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,6 +22,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -154,6 +155,23 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -212,6 +230,24 @@ static void __init spectre_v2_select_mitigation(void) spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); + + /* + * If neither SMEP or KPTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. + * + * Skylake era CPUs have a separate issue with *underflow* of the + * RSB, when they will predict 'ret' targets from the generic BTB. + * The proper mitigation for this is IBRS. If IBRS is not supported + * or deactivated in favour of retpolines the RSB fill on context + * switch is required. + */ + if ((!boot_cpu_has(X86_FEATURE_KAISER) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } } #undef pr_fmt -- GitLab From b73d68788f79a4906a5ef977fca58297a3d22482 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 13 Jan 2018 17:27:30 -0600 Subject: [PATCH 0966/1398] x86/retpoline: Add LFENCE to the retpoline/RSB filling RSB macros commit 28d437d550e1e39f805d99f9f8ac399c778827b7 upstream. The PAUSE instruction is currently used in the retpoline and RSB filling macros as a speculation trap. The use of PAUSE was originally suggested because it showed a very, very small difference in the amount of cycles/time used to execute the retpoline as compared to LFENCE. On AMD, the PAUSE instruction is not a serializing instruction, so the pause/jmp loop will use excess power as it is speculated over waiting for return to mispredict to the correct target. The RSB filling macro is applicable to AMD, and, if software is unable to verify that LFENCE is serializing on AMD (possible when running under a hypervisor), the generic retpoline support will be used and, so, is also applicable to AMD. Keep the current usage of PAUSE for Intel, but add an LFENCE instruction to the speculation trap for AMD. The same sequence has been adopted by GCC for the GCC generated retpolines. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Acked-by: David Woodhouse Acked-by: Arjan van de Ven Cc: Rik van Riel Cc: Andi Kleen Cc: Paul Turner Cc: Peter Zijlstra Cc: Tim Chen Cc: Jiri Kosina Cc: Dave Hansen Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Kees Cook Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 402a11c803c3..7b45d8424150 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,7 +11,7 @@ * Fill the CPU return stack buffer. * * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; jmp' loop to capture speculative execution. + * infinite 'pause; lfence; jmp' loop to capture speculative execution. * * This is required in various cases for retpoline and IBRS-based * mitigations for the Spectre variant 2 vulnerability. Sometimes to @@ -38,11 +38,13 @@ call 772f; \ 773: /* speculation trap */ \ pause; \ + lfence; \ jmp 773b; \ 772: \ call 774f; \ 775: /* speculation trap */ \ pause; \ + lfence; \ jmp 775b; \ 774: \ dec reg; \ @@ -73,6 +75,7 @@ call .Ldo_rop_\@ .Lspec_trap_\@: pause + lfence jmp .Lspec_trap_\@ .Ldo_rop_\@: mov \reg, (%_ASM_SP) @@ -165,6 +168,7 @@ " .align 16\n" \ "901: call 903f;\n" \ "902: pause;\n" \ + " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ "903: addl $4, %%esp;\n" \ -- GitLab From 13ccac5de85305cd1388ffa8b1e20b010373d76f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 15 Jan 2018 08:17:08 -0600 Subject: [PATCH 0967/1398] objtool: Improve error message for bad file argument commit 385d11b152c4eb638eeb769edcb3249533bb9a00 upstream. If a nonexistent file is supplied to objtool, it complains with a non-helpful error: open: No such file or directory Improve it to: objtool: Can't open 'foo': No such file or directory Reported-by: Markus Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/406a3d00a21225eee2819844048e17f68523ccf6.1516025651.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/elf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d897702ce742..faacf0c89976 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "elf.h" #include "warn.h" @@ -370,7 +371,8 @@ struct elf *elf_open(const char *name) elf->fd = open(name, O_RDONLY); if (elf->fd == -1) { - perror("open"); + fprintf(stderr, "objtool: Can't open '%s': %s\n", + name, strerror(errno)); goto err; } -- GitLab From a96cf98dda3f06a5e5eb3d393dba35a9424a7e13 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Jan 2018 16:42:25 +0100 Subject: [PATCH 0968/1398] x86/cpufeature: Move processor tracing out of scattered features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4fdec2034b7540dda461c6ba33325dfcff345c64 upstream. Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX), so do not duplicate it in the scattered features word. Besides being more tidy, this will be useful for KVM when it presents processor tracing to the guests. KVM selects host features that are supported by both the host kernel (depending on command line options, CPU errata, or whatever) and KVM. Whenever a full feature word exists, KVM's code is written in the expectation that the CPUID bit number matches the X86_FEATURE_* bit number, but this is not the case for X86_FEATURE_INTEL_PT. Signed-off-by: Paolo Bonzini Cc: Borislav Petkov Cc: Linus Torvalds Cc: Luwei Kang Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/scattered.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2f60cb5e9360..8537a21acd8b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,7 +197,6 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ @@ -236,6 +235,7 @@ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 1db8dc490b66..b0dd9aec183d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, -- GitLab From eee0cba7b02f21110a443867401b41dc25a60777 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 16 Jan 2018 12:52:28 -0800 Subject: [PATCH 0969/1398] module: Add retpoline tag to VERMAGIC commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12 upstream. Add a marker for retpoline to the module VERMAGIC. This catches the case when a non RETPOLINE compiled module gets loaded into a retpoline kernel, making it insecure. It doesn't handle the case when retpoline has been runtime disabled. Even in this case the match of the retcompile status will be enforced. This implies that even with retpoline run time disabled all modules loaded need to be recompiled. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Acked-by: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Cc: torvalds@linux-foundation.org Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- include/linux/vermagic.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 6f8fbcf10dfb..a3d04934aa96 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -24,10 +24,16 @@ #ifndef MODULE_ARCH_VERMAGIC #define MODULE_ARCH_VERMAGIC "" #endif +#ifdef RETPOLINE +#define MODULE_VERMAGIC_RETPOLINE "retpoline " +#else +#define MODULE_VERMAGIC_RETPOLINE "" +#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ - MODULE_ARCH_VERMAGIC + MODULE_ARCH_VERMAGIC \ + MODULE_VERMAGIC_RETPOLINE -- GitLab From 5ab44e8f0f0d6cde6f7b77eff73ca27bce499950 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Jan 2018 14:31:35 -0600 Subject: [PATCH 0970/1398] x86/mm/pkeys: Fix fill_sig_info_pkey commit beacd6f7ed5e2915959442245b3b2480c2e37490 upstream. SEGV_PKUERR is a signal specific si_code which happens to have the same numeric value as several others: BUS_MCEERR_AR, ILL_ILLTRP, FPE_FLTOVF, TRAP_HWBKPT, CLD_TRAPPED, POLL_ERR, SEGV_THREAD_ID, as such it is not safe to just test the si_code the signal number must also be tested to prevent a false positive in fill_sig_info_pkey. This error was by inspection, and BUS_MCEERR_AR appears to be a real candidate for confusion. So pass in si_signo and check for SIG_SEGV to verify that it is actually a SEGV_PKUERR Fixes: 019132ff3daf ("x86/mm/pkeys: Fill in pkey field in siginfo") Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: Oleg Nesterov Cc: Al Viro Link: https://lkml.kernel.org/r/20180112203135.4669-2-ebiederm@xmission.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8b5ff88aa4f8..74dea7f14c20 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -191,14 +191,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) +static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, + u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; /* Fault not from Protection Keys: nothing to do */ - if (si_code != SEGV_PKUERR) + if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) return; /* * force_sig_info_fault() is called from a number of @@ -237,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, pkey); + fill_sig_info_pkey(si_signo, si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } -- GitLab From 02802dfc82a2bd88a359c4ac85807c4d4d08f9bb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 22 Dec 2017 00:27:55 -0500 Subject: [PATCH 0971/1398] x86/tsc: Fix erroneous TSC rate on Skylake Xeon commit b511203093489eb1829cb4de86e8214752205ac6 upstream. The INTEL_FAM6_SKYLAKE_X hardcoded crystal_khz value of 25MHZ is problematic: - SKX workstations (with same model # as server variants) use a 24 MHz crystal. This results in a -4.0% time drift rate on SKX workstations. - SKX servers subject the crystal to an EMI reduction circuit that reduces its actual frequency by (approximately) -0.25%. This results in -1 second per 10 minute time drift as compared to network time. This issue can also trigger a timer and power problem, on configurations that use the LAPIC timer (versus the TSC deadline timer). Clock ticks scheduled with the LAPIC timer arrive a few usec before the time they are expected (according to the slow TSC). This causes Linux to poll-idle, when it should be in an idle power saving state. The idle and clock code do not graciously recover from this error, sometimes resulting in significant polling and measurable power impact. Stop using native_calibrate_tsc() for INTEL_FAM6_SKYLAKE_X. native_calibrate_tsc() will return 0, boot will run with tsc_khz = cpu_khz, and the TSC refined calibration will update tsc_khz to correct for the difference. [ tglx: Sanitized change log ] Fixes: 6baf3d61821f ("x86/tsc: Add additional Intel CPU models to the crystal quirk list") Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: Prarit Bhargava Link: https://lkml.kernel.org/r/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 44bf5cf417d3..d07a9390023e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -693,7 +693,6 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; -- GitLab From 5b13f593565f98a68f8e97415a16589d9826fa79 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Fri, 17 Nov 2017 15:29:21 -0800 Subject: [PATCH 0972/1398] pipe: avoid round_pipe_size() nr_pages overflow on 32-bit commit d3f14c485867cfb2e0c48aa88c41d0ef4bf5209c upstream. round_pipe_size() contains a right-bit-shift expression which may overflow, which would cause undefined results in a subsequent roundup_pow_of_two() call. static inline unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; } PAGE_SIZE is defined as (1UL << PAGE_SHIFT), so: - 4 bytes wide on 32-bit (0 to 0xffffffff) - 8 bytes wide on 64-bit (0 to 0xffffffffffffffff) That means that 32-bit round_pipe_size(), nr_pages may overflow to 0: size=0x00000000 nr_pages=0x0 size=0x00000001 nr_pages=0x1 size=0xfffff000 nr_pages=0xfffff size=0xfffff001 nr_pages=0x0 << ! size=0xffffffff nr_pages=0x0 << ! This is bad because roundup_pow_of_two(n) is undefined when n == 0! 64-bit is not a problem as the unsigned int size is 4 bytes wide (similar to 32-bit) and the larger, 8 byte wide unsigned long, is sufficient to handle the largest value of the bit shift expression: size=0xffffffff nr_pages=100000 Modify round_pipe_size() to return 0 if n == 0 and updates its callers to handle accordingly. Link: http://lkml.kernel.org/r/1507658689-11669-3-git-send-email-joe.lawrence@redhat.com Signed-off-by: Joe Lawrence Reported-by: Mikulas Patocka Reviewed-by: Mikulas Patocka Cc: Al Viro Cc: Jens Axboe Cc: Michael Kerrisk Cc: Randy Dunlap Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Dong Jinguang Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 8e0d9f26dfad..9faecf1b4a27 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1018,13 +1018,19 @@ const struct file_operations pipefifo_fops = { /* * Currently we rely on the pipe array holding a power-of-2 number - * of pages. + * of pages. Returns 0 on error. */ static inline unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; + if (size < pipe_min_size) + size = pipe_min_size; + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (nr_pages == 0) + return 0; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; } @@ -1040,6 +1046,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) long ret = 0; size = round_pipe_size(arg); + if (size == 0) + return -EINVAL; nr_pages = size >> PAGE_SHIFT; if (!nr_pages) @@ -1123,13 +1131,18 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, size_t *lenp, loff_t *ppos) { + unsigned int rounded_pipe_max_size; int ret; ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); if (ret < 0 || !write) return ret; - pipe_max_size = round_pipe_size(pipe_max_size); + rounded_pipe_max_size = round_pipe_size(pipe_max_size); + if (rounded_pipe_max_size == 0) + return -EINVAL; + + pipe_max_size = rounded_pipe_max_size; return ret; } -- GitLab From c557481a9491fe3c711ce2dd490fecac55950eab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 Jan 2018 12:20:18 +0100 Subject: [PATCH 0973/1398] x86/apic/vector: Fix off by one in error path commit 45d55e7bac4028af93f5fa324e69958a0b868e96 upstream. Keith reported the following warning: WARNING: CPU: 28 PID: 1420 at kernel/irq/matrix.c:222 irq_matrix_remove_managed+0x10f/0x120 x86_vector_free_irqs+0xa1/0x180 x86_vector_alloc_irqs+0x1e4/0x3a0 msi_domain_alloc+0x62/0x130 The reason for this is that if the vector allocation fails the error handling code tries to free the failed vector as well, which causes the above imbalance warning to trigger. Adjust the error path to handle this correctly. Fixes: b5dc8e6c21e7 ("x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors") Reported-by: Keith Busch Signed-off-by: Thomas Gleixner Tested-by: Keith Busch Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801161217300.1823@nanos Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1eb562..b5229abd1629 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -361,14 +361,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data->chip_data = data; irq_data->hwirq = virq + i; err = assign_irq_vector_policy(virq + i, node, data, info); - if (err) + if (err) { + irq_data->chip_data = NULL; + free_apic_chip_data(data); goto error; + } } return 0; error: - x86_vector_free_irqs(domain, virq, i + 1); + x86_vector_free_irqs(domain, virq, i); return err; } -- GitLab From 9792f9b483cddf47ec355fbc165c763fc7c24c32 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Sun, 9 Apr 2017 20:02:37 -0700 Subject: [PATCH 0974/1398] perf tools: Fix build with ARCH=x86_64 commit 7a759cd8e8272ee18922838ee711219c7c796a31 upstream. With commit: 0a943cb10ce78 (tools build: Add HOSTARCH Makefile variable) when building for ARCH=x86_64, ARCH=x86_64 is passed to perf instead of ARCH=x86, so the perf build process searchs header files from tools/arch/x86_64/include, which doesn't exist. The following build failure is seen: In file included from util/event.c:2:0: tools/include/uapi/linux/mman.h:4:27: fatal error: uapi/asm/mman.h: No such file or directory compilation terminated. Fix this issue by using SRCARCH instead of ARCH in perf, just like the main kernel Makefile and tools/objtool's. Signed-off-by: Jiada Wang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Eugeniu Rosca Cc: Jan Stancek Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rui Teng Cc: Sukadev Bhattiprolu Cc: Wang Nan Fixes: 0a943cb10ce7 ("tools build: Add HOSTARCH Makefile variable") Link: http://lkml.kernel.org/r/1491793357-14977-2-git-send-email-jiada_wang@mentor.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Tuomas Tynkkynen Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.config | 38 ++++++++++++++++++------------------- tools/perf/Makefile.perf | 2 +- tools/perf/arch/Build | 2 +- tools/perf/pmu-events/Build | 4 ++-- tools/perf/tests/Build | 2 +- tools/perf/util/header.c | 2 +- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cffdd9cf3ebf..ff375310efe4 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -205,12 +205,12 @@ ifeq ($(DEBUG),0) endif CFLAGS += -I$(src-perf)/util/include -CFLAGS += -I$(src-perf)/arch/$(ARCH)/include +CFLAGS += -I$(src-perf)/arch/$(SRCARCH)/include CFLAGS += -I$(srctree)/tools/include/uapi CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -321,7 +321,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -346,7 +346,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PROLOGUE) else - msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); + msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); endif else msg := $(warning DWARF support is off, BPF prologue is disabled); @@ -372,7 +372,7 @@ ifdef PERF_HAVE_JITDUMP endif endif -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif @@ -453,7 +453,7 @@ else endif ifndef NO_LOCAL_LIBUNWIND - ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) msg := $(warning No debug_frame support found in libunwind); @@ -717,7 +717,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq ($(ARCH), x86) + ifneq ($(SRCARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -746,7 +746,7 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - ifeq ($(ARCH),x86) + ifeq ($(SRCARCH),x86) ifeq ($(feature-get_cpuid), 0) msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); NO_AUXTRACE := 1 @@ -793,7 +793,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(ARCH)$(IS_64_BIT), x861) +ifeq ($(SRCARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ef52d1e3d431..2b92ffef554b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -192,7 +192,7 @@ endif ifeq ($(config),0) include $(srctree)/tools/scripts/Makefile.arch --include arch/$(ARCH)/Makefile +-include arch/$(SRCARCH)/Makefile endif # The FEATURE_DUMP_EXPORT holds location of the actual diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index 109eb75cf7de..d9b6af837c7d 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ libperf-y += common.o -libperf-y += $(ARCH)/ +libperf-y += $(SRCARCH)/ diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 9213a1273697..999a4e878162 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -2,7 +2,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o pmu-events-y += pmu-events.o -JDIR = pmu-events/arch/$(ARCH) +JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') # @@ -10,4 +10,4 @@ JSON = $(shell [ -d $(JDIR) ] && \ # directory and create tables in pmu-events.c. # $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS) - $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) + $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 8a4ce492f7b2..546250a273e7 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -71,7 +71,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ -ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc)) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5337f49db361..28bdb48357f0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -826,7 +826,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, /* * default get_cpuid(): nothing gets recorded - * actual implementation must be in arch/$(ARCH)/util/header.c + * actual implementation must be in arch/$(SRCARCH)/util/header.c */ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) { -- GitLab From 607b86e173522b806d6721a4dba0b291178d5e4c Mon Sep 17 00:00:00 2001 From: Nir Perry Date: Thu, 11 Jan 2018 23:43:26 -0800 Subject: [PATCH 0975/1398] Input: ALPS - fix multi-touch decoding on SS4 plus touchpads commit 4d94e776bd29670f01befa27e12df784fa05fa2e upstream. The fix for handling two-finger scroll (i4a646580f793 - "Input: ALPS - fix two-finger scroll breakage in right side on ALPS touchpad") introduced a minor "typo" that broke decoding of multi-touch events are decoded on some ALPS touchpads. For example, tapping with three-fingers can no longer be used to emulate middle-mouse-button (the kernel doesn't recognize this as the proper event, and doesn't report it correctly to userspace). This affects touchpads that use SS4 "plus" protocol variant, like those found on Dell E7270 & E7470 laptops (tested on E7270). First, probably the code in alps_decode_ss4_v2() for case SS4_PACKET_ID_MULTI used inconsistent indices to "f->mt[]". You can see 0 & 1 are used for the "if" part but 2 & 3 are used for the "else" part. Second, in the previous patch, new macros were introduced to decode X coordinates specific to the SS4 "plus" variant, but the macro to define the maximum X value wasn't changed accordingly. The macros to decode X values for "plus" variant are effectively shifted right by 1 bit, but the max wasn't shifted too. This causes the driver to incorrectly handle "no data" cases, which also interfered with how multi-touch was handled. Fixes: 4a646580f793 ("Input: ALPS - fix two-finger scroll breakage...") Signed-off-by: Nir Perry Reviewed-by: Masaki Ota Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/alps.c | 23 +++++++++++++---------- drivers/input/mouse/alps.h | 10 ++++++---- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index f26807c75be4..af83d2e34913 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1247,29 +1247,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL; } else { f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX_BL; } + no_data_y = SS4_MFPACKET_NO_AY_BL; f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX_BL; - no_data_y = SS4_MFPACKET_NO_AY_BL; } else { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX; } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_STD_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX; } + no_data_y = SS4_MFPACKET_NO_AY; + f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX; - no_data_y = SS4_MFPACKET_NO_AY; } f->first_mp = 0; diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index 793123717145..9bc2babd9256 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -120,10 +120,12 @@ enum SS4_PACKET_ID { #define SS4_IS_5F_DETECTED(_b) ((_b[2] & 0x10) == 0x10) -#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ -#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ -#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */ -#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ +#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */ +#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */ +#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */ +#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */ /* * enum V7_PACKET_ID - defines the packet type for V7 -- GitLab From 9be13b3357e12398bac998725437be64f0acc6fb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:20:18 -0800 Subject: [PATCH 0976/1398] Input: 88pm860x-ts - fix child-node lookup commit 906bf7daa0618d0ef39f4872ca42218c29a3631f upstream. Fix child node-lookup during probe, which ended up searching the whole device tree depth-first starting at parent rather than just matching on its children. To make things worse, the parent node was prematurely freed, while the child node was leaked. Fixes: 2e57d56747e6 ("mfd: 88pm860x: Device tree support") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/88pm860x-ts.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c index 251ff2aa0633..7a0dbce4dae9 100644 --- a/drivers/input/touchscreen/88pm860x-ts.c +++ b/drivers/input/touchscreen/88pm860x-ts.c @@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, int data, n, ret; if (!np) return -ENODEV; - np = of_find_node_by_name(np, "touch"); + np = of_get_child_by_name(np, "touch"); if (!np) { dev_err(&pdev->dev, "Can't find touch node\n"); return -EINVAL; @@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set tsi prebias time */ if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) { ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set prebias & prechg time of pen detect */ data = 0; @@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x); + + of_node_put(np); + return 0; + +err_put_node: + of_node_put(np); + + return -EINVAL; } #else #define pm860x_touch_dt_init(x, y, z) (-1) -- GitLab From eaabab6468b395e5d5fb66d7fd3363f96de3e61f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:17:48 -0800 Subject: [PATCH 0977/1398] Input: twl6040-vibra - fix child-node lookup commit dcaf12a8b0bbdbfcfa2be8dff2c4948d9844b4ad upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at parent rather than just matching on its children. Later sanity checks on node properties (which would likely be missing) should prevent this from causing much trouble however, especially as the original premature free of the parent node has already been fixed separately (but that "fix" was apparently never backported to stable). Fixes: e7ec014a47e4 ("Input: twl6040-vibra - update for device tree support") Fixes: c52c545ead97 ("Input: twl6040-vibra - fix DT node memory management") Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Tested-by: H. Nikolaus Schaller (on Pyra OMAP5 hardware) Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/twl6040-vibra.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 5690eb7ff954..15e0d352c4cc 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev) int vddvibr_uV = 0; int error; - of_node_get(twl6040_core_dev->of_node); - twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node, + twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node, "vibra"); if (!twl6040_core_node) { dev_err(&pdev->dev, "parent of node is missing?\n"); -- GitLab From cb513d1414f910f3c30781cd5491ca57db2cae2f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:15:06 -0800 Subject: [PATCH 0978/1398] Input: twl4030-vibra - fix sibling-node lookup commit 5b189201993ab03001a398de731045bfea90c689 upstream. A helper purported to look up a child node based on its name was using the wrong of-helper and ended up prematurely freeing the parent of-node while searching the whole device tree depth-first starting at the parent node. Fixes: 64b9e4d803b1 ("input: twl4030-vibra: Support for DT booted kernel") Fixes: e661d0a04462 ("Input: twl4030-vibra - fix ERROR: Bad of_node_put() warning") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/twl4030-vibra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index caa5a62c42fb..15929d862459 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops, twl4030_vibra_suspend, twl4030_vibra_resume); static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->coexist) return true; - node = of_find_node_by_name(node, "codec"); + node = of_get_child_by_name(parent, "codec"); if (node) { of_node_put(node); return true; -- GitLab From 9a50ea0ce7cc2f3b0115942aa313fcbce7f5183a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 18 Jan 2018 15:53:10 -0500 Subject: [PATCH 0979/1398] tracing: Fix converting enum's from the map in trace_event_eval_update() commit 1ebe1eaf2f02784921759992ae1fde1a9bec8fd0 upstream. Since enums do not get converted by the TRACE_EVENT macro into their values, the event format displaces the enum name and not the value. This breaks tools like perf and trace-cmd that need to interpret the raw binary data. To solve this, an enum map was created to convert these enums into their actual numbers on boot up. This is done by TRACE_EVENTS() adding a TRACE_DEFINE_ENUM() macro. Some enums were not being converted. This was caused by an optization that had a bug in it. All calls get checked against this enum map to see if it should be converted or not, and it compares the call's system to the system that the enum map was created under. If they match, then they call is processed. To cut down on the number of iterations needed to find the maps with a matching system, since calls and maps are grouped by system, when a match is made, the index into the map array is saved, so that the next call, if it belongs to the same system as the previous call, could start right at that array index and not have to scan all the previous arrays. The problem was, the saved index was used as the variable to know if this is a call in a new system or not. If the index was zero, it was assumed that the call is in a new system and would keep incrementing the saved index until it found a matching system. The issue arises when the first matching system was at index zero. The next map, if it belonged to the same system, would then think it was the first match and increment the index to one. If the next call belong to the same system, it would begin its search of the maps off by one, and miss the first enum that should be converted. This left a single enum not converted properly. Also add a comment to describe exactly what that index was for. It took me a bit too long to figure out what I was thinking when debugging this issue. Link: http://lkml.kernel.org/r/717BE572-2070-4C1E-9902-9F2E0FEDA4F8@oracle.com Fixes: 0c564a538aa93 ("tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values") Reported-by: Chuck Lever Teste-by: Chuck Lever Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 03c0a48c3ac4..9549ed120556 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2200,6 +2200,7 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -2207,15 +2208,28 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } -- GitLab From 969e2145eb4a069a60e8b090aa7e8b8aa0efc365 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jan 2018 11:12:05 +0100 Subject: [PATCH 0980/1398] phy: work around 'phys' references to usb-nop-xceiv devices commit b7563e2796f8b23c98afcfea7363194227fa089d upstream. Stefan Wahren reports a problem with a warning fix that was merged for v4.15: we had lots of device nodes with a 'phys' property pointing to a device node that is not compliant with the binding documented in Documentation/devicetree/bindings/phy/phy-bindings.txt This generally works because USB HCD drivers that support both the generic phy subsystem and the older usb-phy subsystem ignore most errors from phy_get() and related calls and then use the usb-phy driver instead. However, it turns out that making the usb-nop-xceiv device compatible with the generic-phy binding changes the phy_get() return code from -EINVAL to -EPROBE_DEFER, and the dwc2 usb controller driver for bcm2835 now returns -EPROBE_DEFER from its probe function rather than ignoring the failure, breaking all USB support on raspberry-pi when CONFIG_GENERIC_PHY is enabled. The same code is used in the dwc3 driver and the usb_add_hcd() function, so a reasonable assumption would be that many other platforms are affected as well. I have reviewed all the related patches and concluded that "usb-nop-xceiv" is the only USB phy that is affected by the change, and since it is by far the most commonly referenced phy, all the other USB phy drivers appear to be used in ways that are are either safe in DT (they don't use the 'phys' property), or in the driver (they already ignore -EPROBE_DEFER from generic-phy when usb-phy is available). To work around the problem, this adds a special case to _of_phy_get() so we ignore any PHY node that is compatible with "usb-nop-xceiv", as we know that this can never load no matter how much we defer. In the future, we might implement a generic-phy driver for "usb-nop-xceiv" and then remove this workaround. Since we generally want older kernels to also want to work with the fixed devicetree files, it would be good to backport the patch into stable kernels as well (3.13+ are possibly affected), even though they don't contain any of the patches that may have caused regressions. Fixes: 014d6da6cb25 ARM: dts: bcm283x: Fix DTC warnings about missing phy-cells Fixes: c5bbf358b790 arm: dts: nspire: Add missing #phy-cells to usb-nop-xceiv Fixes: 44e5dced2ef6 arm: dts: marvell: Add missing #phy-cells to usb-nop-xceiv Fixes: f568f6f554b8 ARM: dts: omap: Add missing #phy-cells to usb-nop-xceiv Fixes: d745d5f277bf ARM: dts: imx51-zii-rdu1: Add missing #phy-cells to usb-nop-xceiv Fixes: 915fbe59cbf2 ARM: dts: imx: Add missing #phy-cells to usb-nop-xceiv Link: https://marc.info/?l=linux-usb&m=151518314314753&w=2 Link: https://patchwork.kernel.org/patch/10158145/ Cc: Felipe Balbi Cc: Eric Anholt Tested-by: Stefan Wahren Acked-by: Rob Herring Tested-by: Hans Verkuil Acked-by: Kishon Vijay Abraham I Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index a268f4d6f3e9..48a365e303e5 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -395,6 +395,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index) if (ret) return ERR_PTR(-ENODEV); + /* This phy type handled by the usb-phy subsystem for now */ + if (of_device_is_compatible(args.np, "usb-nop-xceiv")) + return ERR_PTR(-ENODEV); + mutex_lock(&phy_provider_mutex); phy_provider = of_phy_provider_lookup(args.np); if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) { -- GitLab From 1f32f15ec73c76e5136ca9c42bc26c9526700c44 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 19 Jan 2018 14:32:08 +0100 Subject: [PATCH 0981/1398] ARM: sunxi_defconfig: Enable CMA commit c13e7f313da33d1488355440f1a10feb1897480a upstream. The DRM driver most notably, but also out of tree drivers (for now) like the VPU or GPU drivers, are quite big consumers of large, contiguous memory buffers. However, the sunxi_defconfig doesn't enable CMA in order to mitigate that, which makes them almost unusable. Enable it to make sure it somewhat works. Signed-off-by: Maxime Ripard Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/sunxi_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 714da336ec86..5d49b60841e3 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -11,6 +11,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_FREQ=y @@ -35,6 +36,7 @@ CONFIG_CAN_SUN4I=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_AHCI_SUNXI=y -- GitLab From 19f47eafe10c2ced548c381970e2cf7b3db77ba7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 4 Jan 2018 17:53:12 +0100 Subject: [PATCH 0982/1398] ARM: dts: kirkwood: fix pin-muxing of MPP7 on OpenBlocks A7 commit 56aeb07c914a616ab84357d34f8414a69b140cdf upstream. MPP7 is currently muxed as "gpio", but this function doesn't exist for MPP7, only "gpo" is available. This causes the following error: kirkwood-pinctrl f1010000.pin-controller: unsupported function gpio on pin mpp7 pinctrl core: failed to register map default (6): invalid type given kirkwood-pinctrl f1010000.pin-controller: error claiming hogs: -22 kirkwood-pinctrl f1010000.pin-controller: could not claim hogs: -22 kirkwood-pinctrl f1010000.pin-controller: unable to register pinctrl driver kirkwood-pinctrl: probe of f1010000.pin-controller failed with error -22 So the pinctrl driver is not probed, all device drivers (including the UART driver) do a -EPROBE_DEFER, and therefore the system doesn't really boot (well, it boots, but with no UART, and no devices that require pin-muxing). Back when the Device Tree file for this board was introduced, the definition was already wrong. The pinctrl driver also always described as "gpo" this function for MPP7. However, between Linux 4.10 and 4.11, a hog pin failing to be muxed was turned from a simple warning to a hard error that caused the entire pinctrl driver probe to bail out. This is probably the result of commit 6118714275f0a ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()"). This commit fixes the Device Tree to use the proper "gpo" function for MPP7, which fixes the boot of OpenBlocks A7, which was broken since Linux 4.11. Fixes: f24b56cbcd9d ("ARM: kirkwood: add support for OpenBlocks A7 platform") Signed-off-by: Thomas Petazzoni Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/kirkwood-openblocks_a7.dts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts index cf2f5240e176..27cc913ca0f5 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts @@ -53,7 +53,8 @@ }; pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>; + pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header + &pmx_gpio_header_gpo>; pinctrl-names = "default"; pmx_uart0: pmx-uart0 { @@ -85,11 +86,16 @@ * ground. */ pmx_gpio_header: pmx-gpio-header { - marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28", + marvell,pins = "mpp17", "mpp29", "mpp28", "mpp35", "mpp34", "mpp40"; marvell,function = "gpio"; }; + pmx_gpio_header_gpo: pxm-gpio-header-gpo { + marvell,pins = "mpp7"; + marvell,function = "gpo"; + }; + pmx_gpio_init: pmx-init { marvell,pins = "mpp38"; marvell,function = "gpio"; -- GitLab From 23d68eddd857d9bbc68ebd8d22255999c155823a Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Mon, 15 Jan 2018 16:31:19 +0100 Subject: [PATCH 0983/1398] can: peak: fix potential bug in packet fragmentation commit d8a243af1a68395e07ac85384a2740d4134c67f4 upstream. In some rare conditions when running one PEAK USB-FD interface over a non high-speed USB controller, one useless USB fragment might be sent. This patch fixes the way a USB command is fragmented when its length is greater than 64 bytes and when the underlying USB controller is not a high-speed one. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 304732550f0a..7f5ec40e2b4d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) void *cmd_head = pcan_usb_fd_cmd_buffer(dev); int err = 0; u8 *packet_ptr; - int i, n = 1, packet_len; + int packet_len; ptrdiff_t cmd_len; /* usb device unregistered? */ @@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr = cmd_head; + packet_len = cmd_len; /* firmware is not able to re-assemble 512 bytes buffer in full-speed */ - if ((dev->udev->speed != USB_SPEED_HIGH) && - (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) { - packet_len = PCAN_UFD_LOSPD_PKT_SIZE; - n += cmd_len / packet_len; - } else { - packet_len = cmd_len; - } + if (unlikely(dev->udev->speed != USB_SPEED_HIGH)) + packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE); - for (i = 0; i < n; i++) { + do { err = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, PCAN_USBPRO_EP_CMDOUT), @@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr += packet_len; - } + cmd_len -= packet_len; + + if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE) + packet_len = cmd_len; + + } while (packet_len > 0); return err; } -- GitLab From 43c3e093c26d7707b36241d579a35eaa0f4cd91f Mon Sep 17 00:00:00 2001 From: Xi Kangjie Date: Thu, 18 Jan 2018 16:34:00 -0800 Subject: [PATCH 0984/1398] scripts/gdb/linux/tasks.py: fix get_thread_info commit 883d50f56d263f70fd73c0d96b09eb36c34e9305 upstream. Since kernel 4.9, the thread_info has been moved into task_struct, no longer locates at the bottom of kernel stack. See commits c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") and 15f4eae70d36 ("x86: Move thread_info into task_struct"). Before fix: (gdb) set $current = $lx_current() (gdb) p $lx_thread_info($current) $1 = {flags = 1470918301} (gdb) p $current.thread_info $2 = {flags = 2147483648} After fix: (gdb) p $lx_thread_info($current) $1 = {flags = 2147483648} (gdb) p $current.thread_info $2 = {flags = 2147483648} Link: http://lkml.kernel.org/r/20180118210159.17223-1-imxikangjie@gmail.com Fixes: 15f4eae70d36 ("x86: Move thread_info into task_struct") Signed-off-by: Xi Kangjie Acked-by: Jan Kiszka Acked-by: Kieran Bingham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/gdb/linux/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index 1bf949c43b76..f6ab3ccf698f 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -96,6 +96,8 @@ def get_thread_info(task): thread_info_addr = task.address + ia64_task_size thread_info = thread_info_addr.cast(thread_info_ptr_type) else: + if task.type.fields()[0].type == thread_info_type.get_type(): + return task['thread_info'] thread_info = task['stack'].cast(thread_info_ptr_type) return thread_info.dereference() -- GitLab From 8a3f4baaa4c3fa73821c775d684bdc0120ef8e59 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Jan 2018 16:34:05 -0800 Subject: [PATCH 0985/1398] proc: fix coredump vs read /proc/*/stat race commit 8bb2ee192e482c5d500df9f2b1b26a560bd3026f upstream. do_task_stat() accesses IP and SP of a task without bumping reference count of a stack (which became an entity with independent lifetime at some point). Steps to reproduce: #include #include #include #include #include #include #include #include int main(void) { setrlimit(RLIMIT_CORE, &(struct rlimit){}); while (1) { char buf[64]; char buf2[4096]; pid_t pid; int fd; pid = fork(); if (pid == 0) { *(volatile int *)0 = 0; } snprintf(buf, sizeof(buf), "/proc/%u/stat", pid); fd = open(buf, O_RDONLY); read(fd, buf2, sizeof(buf2)); close(fd); waitpid(pid, NULL, 0); } return 0; } BUG: unable to handle kernel paging request at 0000000000003fd8 IP: do_task_stat+0x8b4/0xaf0 PGD 800000003d73e067 P4D 800000003d73e067 PUD 3d558067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 1417 Comm: a.out Not tainted 4.15.0-rc8-dirty #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 RIP: 0010:do_task_stat+0x8b4/0xaf0 Call Trace: proc_single_show+0x43/0x70 seq_read+0xe6/0x3b0 __vfs_read+0x1e/0x120 vfs_read+0x84/0x110 SyS_read+0x3d/0xa0 entry_SYSCALL_64_fastpath+0x13/0x6c RIP: 0033:0x7f4d7928cba0 RSP: 002b:00007ffddb245158 EFLAGS: 00000246 Code: 03 b7 a0 01 00 00 4c 8b 4c 24 70 4c 8b 44 24 78 4c 89 74 24 18 e9 91 f9 ff ff f6 45 4d 02 0f 84 fd f7 ff ff 48 8b 45 40 48 89 ef <48> 8b 80 d8 3f 00 00 48 89 44 24 20 e8 9b 97 eb ff 48 89 44 24 RIP: do_task_stat+0x8b4/0xaf0 RSP: ffffc90000607cc8 CR2: 0000000000003fd8 John Ogness said: for my tests I added an else case to verify that the race is hit and correctly mitigated. Link: http://lkml.kernel.org/r/20180116175054.GA11513@avx2 Signed-off-by: Alexey Dobriyan Reported-by: "Kohli, Gaurav" Tested-by: John Ogness Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index c932ec454625..794b52a6c20d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -423,8 +423,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * safe because the task has stopped executing permanently. */ if (permitted && (task->flags & PF_DUMPCORE)) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); + if (try_get_task_stack(task)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + put_task_stack(task); + } } } -- GitLab From d314f3bc7f3d0853e38364c95b1d30fb4e33e989 Mon Sep 17 00:00:00 2001 From: Xinyu Lin Date: Sun, 17 Dec 2017 20:13:39 +0800 Subject: [PATCH 0986/1398] libata: apply MAX_SEC_1024 to all LITEON EP1 series devices commit db5ff909798ef0099004ad50a0ff5fde92426fd1 upstream. LITEON EP1 has the same timeout issues as CX1 series devices. Revert max_sectors to the value of 1024. Fixes: e0edc8c54646 ("libata: apply MAX_SEC_1024 to all CX1-JB*-HP devices") Signed-off-by: Xinyu Lin Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 33e363dcc63b..aee39524375c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4322,6 +4322,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ -- GitLab From ca2d736867200b931ca61383af2fd68bb5fd2ecb Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 11 Jan 2018 09:53:35 +0900 Subject: [PATCH 0987/1398] workqueue: avoid hard lockups in show_workqueue_state() commit 62635ea8c18f0f62df4cc58379e4f1d33afd5801 upstream. show_workqueue_state() can print out a lot of messages while being in atomic context, e.g. sysrq-t -> show_workqueue_state(). If the console device is slow it may end up triggering NMI hard lockup watchdog. Signed-off-by: Sergey Senozhatsky Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 181c2ad0cb54..ebfea5f94b66 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -4424,6 +4425,12 @@ void show_workqueue_state(void) if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } } @@ -4451,6 +4458,12 @@ void show_workqueue_state(void) pr_cont("\n"); next_pool: spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } rcu_read_unlock_sched(); -- GitLab From cabf6294a6dc615324f8269b5839de2020dbdaf3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 20 Dec 2017 09:56:06 +0000 Subject: [PATCH 0988/1398] dm btree: fix serious bug in btree_split_beneath() commit bc68d0a43560e950850fc69b58f0f8254b28f6d6 upstream. When inserting a new key/value pair into a btree we walk down the spine of btree nodes performing the following 2 operations: i) space for a new entry ii) adjusting the first key entry if the new key is lower than any in the node. If the _root_ node is full, the function btree_split_beneath() allocates 2 new nodes, and redistibutes the root nodes entries between them. The root node is left with 2 entries corresponding to the 2 new nodes. btree_split_beneath() then adjusts the spine to point to one of the two new children. This means the first key is never adjusted if the new key was lower, ie. operation (ii) gets missed out. This can result in the new key being 'lost' for a period; until another low valued key is inserted that will uncover it. This is a serious bug, and quite hard to make trigger in normal use. A reproducing test case ("thin create devices-in-reverse-order") is available as part of the thin-provision-tools project: https://github.com/jthornber/thin-provisioning-tools/blob/master/functional-tests/device-mapper/dm-tests.scm#L593 Fix the issue by changing btree_split_beneath() so it no longer adjusts the spine. Instead it unlocks both the new nodes, and lets the main loop in btree_insert_raw() relock the appropriate one and make any neccessary adjustments. Reported-by: Monty Pavel Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 7a75b5010f73..e4ececd3df00 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -678,23 +678,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) pn->keys[1] = rn->keys[0]; memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); - /* - * rejig the spine. This is ugly, since it knows too - * much about the spine - */ - if (s->nodes[0] != new_parent) { - unlock_block(s->info, s->nodes[0]); - s->nodes[0] = new_parent; - } - if (key < le64_to_cpu(rn->keys[0])) { - unlock_block(s->info, right); - s->nodes[1] = left; - } else { - unlock_block(s->info, left); - s->nodes[1] = right; - } - s->count = 2; - + unlock_block(s->info, left); + unlock_block(s->info, right); return 0; } -- GitLab From 2904adc5b1c08894f954bff9e30eb5facb3b6591 Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Tue, 12 Dec 2017 18:21:40 +0800 Subject: [PATCH 0989/1398] dm thin metadata: THIN_MAX_CONCURRENT_LOCKS should be 6 commit 490ae017f54e55bde382d45ea24bddfb6d1a0aaf upstream. For btree removal, there is a corner case that a single thread could takes 6 locks which is more than THIN_MAX_CONCURRENT_LOCKS(5) and leads to deadlock. A btree removal might eventually call rebalance_children()->rebalance3() to rebalance entries of three neighbor child nodes when shadow_spine has already acquired two write locks. In rebalance3(), it tries to shadow and acquire the write locks of all three child nodes. However, shadowing a child node requires acquiring a read lock of the original child node and a write lock of the new block. Although the read lock will be released after block shadowing, shadowing the third child node in rebalance3() could still take the sixth lock. (2 write locks for shadow_spine + 2 write locks for the first two child nodes's shadow + 1 write lock for the last child node's shadow + 1 read lock for the last child node) Signed-off-by: Dennis Yang Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4477bf930cf4..e976f4f39334 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -81,10 +81,14 @@ #define SECTOR_TO_BLOCK_SHIFT 3 /* + * For btree insert: * 3 for btree insert + * 2 for btree lookup used within space map + * For btree remove: + * 2 for shadow spine + + * 4 for rebalance 3 child node */ -#define THIN_MAX_CONCURRENT_LOCKS 5 +#define THIN_MAX_CONCURRENT_LOCKS 6 /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 -- GitLab From ddfaa7acd7a27355200457eb56767cf5fcafc1d0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Jan 2018 10:23:47 +0000 Subject: [PATCH 0990/1398] arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls commit acfb3b883f6d6a4b5d27ad7fdded11f6a09ae6dd upstream. KVM doesn't follow the SMCCC when it comes to unimplemented calls, and inject an UNDEF instead of returning an error. Since firmware calls are now used for security mitigation, they are becoming more common, and the undef is counter productive. Instead, let's follow the SMCCC which states that -1 must be returned to the caller when getting an unknown function number. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/handle_exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 85baadab02d3..2e6e9e99977b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -44,7 +44,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -53,7 +53,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } -- GitLab From 0d92cf7f29e6ea8565546c5685b34e633300d8e8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Dec 2017 23:43:54 -0600 Subject: [PATCH 0991/1398] x86/cpu, x86/pti: Do not enable PTI on AMD processors commit 694d99d40972f12e59a3696effee8a376b79d7c8 upstream. AMD processors are not subject to the types of attacks that the kernel page table isolation feature protects against. The AMD microarchitecture does not allow memory references, including speculative references, that access higher privileged data when running in a lesser privileged mode when that access would result in a page fault. Disable page table isolation by default on AMD processors by not setting the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI is set. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net Cc: Nick Lowe Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b9ae04ddf5d..d198ae02f2b7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -883,8 +883,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - /* Assume for now that ALL x86 CPUs are insecure */ - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + if (c->x86_vendor != X86_VENDOR_AMD) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -- GitLab From 87ac29717de8abaa3199eda5ef3c04e2924a6fdc Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 5 Dec 2016 12:56:38 -0700 Subject: [PATCH 0992/1398] usbip: fix warning in vhci_hcd_probe/lockdep_init_map commit 918b8ac55b6c809b70aa05c279087109584e393e upstream. vhci_hcd calls sysfs_create_group() with dynamically allocated sysfs attributes triggering the lock-class key not persistent warning. Call sysfs_attr_init() for dynamically allocated sysfs attributes to fix it. vhci_hcd vhci_hcd: USB/IP Virtual Host Controller vhci_hcd vhci_hcd: new USB bus registered, assigned bus number 2 BUG: key ffff88006a7e8d18 not in .data! ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3131 lockdep_init_map+0x60c/0x770 DEBUG_LOCKS_WARN_ON(1)[ 1.567044] Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc7+ #58 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006bce6eb8 ffffffff81f96c8a ffffffff00000a02 1ffff1000d79cd6a ffffed000d79cd62 000000046bce6ed8 0000000041b58ab3 ffffffff8598af40 ffffffff81f969f8 0000000000000000 0000000041b58ab3 0000000000000200 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] __warn+0x19f/0x1e0 kernel/panic.c:550 [] warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:565 [] lockdep_init_map+0x60c/0x770 kernel/locking/lockdep.c:3131 [] __kernfs_create_file+0x114/0x2a0 fs/kernfs/file.c:954 [] sysfs_add_file_mode_ns+0x225/0x520 fs/sysfs/file.c:305 [< inline >] create_files fs/sysfs/group.c:64 [] internal_create_group+0x239/0x8f0 fs/sysfs/group.c:134 [] sysfs_create_group+0x1f/0x30 fs/sysfs/group.c:156 [] vhci_start+0x5b4/0x7a0 drivers/usb/usbip/vhci_hcd.c:978 [] usb_add_hcd+0x8da/0x1c60 drivers/usb/core/hcd.c:2867 [] vhci_hcd_probe+0x97/0x130 drivers/usb/usbip/vhci_hcd.c:1103 --- --- ---[ end trace c33c7b202cf3aac8 ]--- Signed-off-by: Shuah Khan Reported-by: Andrey Konovalov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index c404017c1b5a..b96e5b189269 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -361,6 +361,7 @@ static void set_status_attr(int id) status->attr.attr.name = status->name; status->attr.attr.mode = S_IRUGO; status->attr.show = status_show; + sysfs_attr_init(&status->attr.attr); } static int init_status_attrs(void) -- GitLab From c5aa687060a85d849574ad014639f93b1dc98ea0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Jan 2018 16:28:26 +0100 Subject: [PATCH 0993/1398] x86/mce: Make machine check speculation protected commit 6f41c34d69eb005e7848716bbcafc979b35037d5 upstream. The machine check idtentry uses an indirect branch directly from the low level code. This evades the speculation protection. Replace it by a direct call into C code and issue the indirect call there so the compiler can apply the proper speculation protection. Signed-off-by: Thomas Gleixner Reviewed-by:Borislav Petkov Reviewed-by: David Woodhouse Niced-by: Peter Zijlstra Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801181626290.1847@nanos Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/traps.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3a15023bdf1a..e729e1528584 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1064,7 +1064,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 #endif #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 01fd0a7f48cd..688315b7a922 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif +dotraplinkage void do_mce(struct pt_regs *, long); static inline int get_si_code(unsigned long condition) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ca5f8ad008e..fe5cd6ea1f0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1754,6 +1754,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: -- GitLab From 09402d83395f6b377841bcf9460aeee37501486d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:14:21 +0900 Subject: [PATCH 0994/1398] retpoline: Introduce start/end markers of indirect thunk commit 736e80a4213e9bbce40a7c050337047128b472ac upstream. Introduce start/end markers of __x86_indirect_thunk_* functions. To make it easy, consolidate .text.__x86.indirect_thunk.* sections to one .text.__x86.indirect_thunk section and put it in the end of kernel text section and adds __indirect_thunk_start/end so that other subsystem (e.g. kprobes) can identify it. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 +++ arch/x86/kernel/vmlinux.lds.S | 7 +++++++ arch/x86/lib/retpoline.S | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7b45d8424150..19ba5ad19c65 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,9 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + /* * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index dbf67f64d5ec..c7194e97c3d4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -105,6 +105,13 @@ SECTIONS SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; + *(.text.__x86.indirect_thunk) + __indirect_thunk_end = .; +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cb45c6cb465f..d3415dc30f82 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,7 +9,7 @@ #include .macro THUNK reg - .section .text.__x86.indirect_thunk.\reg + .section .text.__x86.indirect_thunk ENTRY(__x86_indirect_thunk_\reg) CFI_STARTPROC -- GitLab From 36ad6ba501d626f5bac505e6f70ffc54292b76d9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:14:51 +0900 Subject: [PATCH 0995/1398] kprobes/x86: Blacklist indirect thunk functions for kprobes commit c1804a236894ecc942da7dc6c5abe209e56cba93 upstream. Mark __x86_indirect_thunk_* functions as blacklist for kprobes because those functions can be called from anywhere in the kernel including blacklist functions of kprobes. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index d3415dc30f82..dfb2ba91b670 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) * than one per register with the correct names. So we do it * the simple and nasty way... */ -#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) GENERATE_THUNK(_ASM_AX) -- GitLab From 4b71be496642d3c7908a0e40a3073ce5bcf87330 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:15:20 +0900 Subject: [PATCH 0996/1398] kprobes/x86: Disable optimizing on the function jumps to indirect thunk commit c86a32c09f8ced67971a2310e3b0dda4d1749007 upstream. Since indirect jump instructions will be replaced by jump to __x86_indirect_thunk_*, those jmp instruction must be treated as an indirect jump. Since optprobe prohibits to optimize probes in the function which uses an indirect jump, it also needs to find out the function which jump to __x86_indirect_thunk_* and disable optimization. Add a check that the jump target address is between the __indirect_thunk_start/end when optimizing kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4d74f7386a61..dc20da1c78f0 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "common.h" @@ -192,7 +193,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int insn_is_indirect_jump(struct insn *insn) +static int __insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -226,6 +227,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } +static int insn_is_indirect_jump(struct insn *insn) +{ + int ret = __insn_is_indirect_jump(insn); + +#ifdef CONFIG_RETPOLINE + /* + * Jump to x86_indirect_thunk_* is treated as an indirect jump. + * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with + * older gcc may use indirect jump. So we add this check instead of + * replace indirect-jump check. + */ + if (!ret) + ret = insn_jump_into_range(insn, + (unsigned long)__indirect_thunk_start, + (unsigned long)__indirect_thunk_end - + (unsigned long)__indirect_thunk_start); +#endif + return ret; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { -- GitLab From b9f8b5935394362e68147cb2792c5f74ff3fc4ca Mon Sep 17 00:00:00 2001 From: "zhenwei.pi" Date: Thu, 18 Jan 2018 09:04:52 +0800 Subject: [PATCH 0997/1398] x86/pti: Document fix wrong index commit 98f0fceec7f84d80bc053e49e596088573086421 upstream. In section <2. Runtime Cost>, fix wrong index. Signed-off-by: zhenwei.pi Signed-off-by: Thomas Gleixner Cc: dave.hansen@linux.intel.com Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/pti.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt index d11eff61fc9a..5cd58439ad2d 100644 --- a/Documentation/x86/pti.txt +++ b/Documentation/x86/pti.txt @@ -78,7 +78,7 @@ this protection comes at a cost: non-PTI SYSCALL entry code, so requires mapping fewer things into the userspace page tables. The downside is that stacks must be switched at entry time. - d. Global pages are disabled for all kernel structures not + c. Global pages are disabled for all kernel structures not mapped into both kernel and userspace page tables. This feature of the MMU allows different processes to share TLB entries mapping the kernel. Losing the feature means more -- GitLab From 06d7342d8498b7426ab50368436417cdf6ed1923 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 17 Jan 2018 14:53:28 -0800 Subject: [PATCH 0998/1398] x86/retpoline: Optimize inline assembler for vmexit_fill_RSB commit 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 upstream. The generated assembler for the C fill RSB inline asm operations has several issues: - The C code sets up the loop register, which is then immediately overwritten in __FILL_RETURN_BUFFER with the same value again. - The C code also passes in the iteration count in another register, which is not used at all. Remove these two unnecessary operations. Just rely on the single constant passed to the macro for the iterations. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: dave.hansen@intel.com Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180117225328.15414-1-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ba5ad19c65..4ad41087ce0e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -206,16 +206,17 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops = RSB_CLEAR_LOOPS / 2; + unsigned long loops; asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE("jmp 910f", __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), X86_FEATURE_RETPOLINE) "910:" - : "=&r" (loops), ASM_CALL_CONSTRAINT - : "r" (loops) : "memory" ); + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ -- GitLab From 60249fe9050bad1bd814dfb1c4ed5fc9155ef5cb Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 29 Oct 2017 16:27:21 +0100 Subject: [PATCH 0999/1398] MIPS: AR7: ensure the port type's FCR value is used commit 0a5191efe06b5103909206e4fbcff81d30283f8e upstream. Since commit aef9a7bd9b67 ("serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers"), the port's default FCR value isn't used in serial8250_do_set_termios anymore, but copied over once in serial8250_config_port and then modified as needed. Unfortunately, serial8250_config_port will never be called if the port is shared between kernel and userspace, and the port's flag doesn't have UPF_BOOT_AUTOCONF, which would trigger a serial8250_config_port as well. This causes garbled output from userspace: [ 5.220000] random: procd urandom read with 49 bits of entropy available ers [kee Fix this by forcing it to be configured on boot, resulting in the expected output: [ 5.250000] random: procd urandom read with 50 bits of entropy available Press the [f] key and hit [enter] to enter failsafe mode Press the [1], [2], [3] or [4] key and hit [enter] to select the debug level Fixes: aef9a7bd9b67 ("serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers") Signed-off-by: Jonas Gorski Cc: Greg Kroah-Hartman Cc: Yoshihiro YUNOMAE Cc: Florian Fainelli Cc: Nicolas Schichan Cc: linux-mips@linux-mips.org Cc: linux-serial@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17544/ Signed-off-by: Ralf Baechle Cc: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/ar7/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 3446b6fb3acb..9da4e2292fc7 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -576,7 +576,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE; + uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; uart_port.regshift = 2; uart_port.line = 0; -- GitLab From 79584a4221253611a4d033087f373b046350df9f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 23 Jan 2018 19:57:10 +0100 Subject: [PATCH 1000/1398] Linux 4.9.78 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aba553531d6a..8a6f158a1176 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 77 +SUBLEVEL = 78 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 29eadc4b5c1301222b8c2aaa03b199d8e6e7b738 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 23 Jan 2018 14:34:38 -0800 Subject: [PATCH 1001/1398] ANDROID: xattr: Pass EOPNOTSUPP to permission2 The permission call for xattr operations happens regardless of whether or not the xattr functions are implemented. The xattr functions currently don't have support for permission2. Passing EOPNOTSUPP as the mount point in xattr_permission allows us to return EOPNOTSUPP early in permission2, if the filesystem supports it. Change-Id: I9d07e4cd633cf40af60450ffbff7ac5c1b4e8c2c Signed-off-by: Daniel Rosenberg Bug: 35848445 --- fs/sdcardfs/inode.c | 2 ++ fs/xattr.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index e3a023aeb1e6..87392331646a 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -629,6 +629,8 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma struct inode tmp; struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode)); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); if (!top) return -EINVAL; diff --git a/fs/xattr.c b/fs/xattr.c index 932b9061a3a2..1b00babeaa67 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -130,7 +130,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) return -EPERM; } - return inode_permission(inode, mask); + return inode_permission2(ERR_PTR(-EOPNOTSUPP), inode, mask); } int -- GitLab From 97be16f631ce5430fcf8b3047ad14bfc34cbb540 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 8 Aug 2017 17:08:55 +0200 Subject: [PATCH 1002/1398] BACKPORT: thermal: hisilicon: constify thermal_zone_of_device_ops structures The thermal_zone_of_device_ops structure is only passed as the fourth argument to devm_thermal_zone_of_sensor_register, which is declared as const. Thus the thermal_zone_of_device_ops structure itself can be const. Done with the help of Coccinelle. Change-Id: Ib8f3bddb34ce231375c18ed907f0dc77548b6737 Signed-off-by: Julia Lawall Signed-off-by: Zhang Rui (cherry picked from commit 3fe156f1dd9909fc63d200b4d432884161ad8385) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index c5285ed34fdd..6d8906d65476 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -227,7 +227,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) return 0; } -static struct thermal_zone_of_device_ops hisi_of_thermal_ops = { +static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { .get_temp = hisi_thermal_get_temp, }; -- GitLab From 077d1d3db0cbd8fc0dbde346688080ee25dc03e2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:44 +0200 Subject: [PATCH 1003/1398] BACKPORT: thermal/drivers/hisi: Remove the multiple sensors support By essence, the tsensor does not really support multiple sensor at the same time. It allows to set a sensor and use it to get the temperature, another sensor could be switched but with a delay of 3-5ms. It is difficult to read simultaneously several sensors without a big delay. Today, just one sensor is used, it is not necessary to deal with multiple sensors in the code. Remove them and if it is needed in the future add them on top of a code which will be clean up in the meantime. Change-Id: Ia1eaa1c3291f28871f1e22b9467d4d48f55eb4fe Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Acked-by: Wangtao (Kevin, Kirin) Signed-off-by: Eduardo Valentin (cherry picked from commit ff4ec2997df8fe7cc40513dbe5f86d9f88fb6be7) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 75 +++++++++------------------------- 1 file changed, 19 insertions(+), 56 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 6d8906d65476..aecbfe9fc8f2 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -40,6 +40,7 @@ #define HISI_TEMP_STEP (784) #define HISI_MAX_SENSORS 4 +#define HISI_DEFAULT_SENSOR 2 struct hisi_thermal_sensor { struct hisi_thermal_data *thermal; @@ -54,9 +55,8 @@ struct hisi_thermal_data { struct mutex thermal_lock; /* protects register data */ struct platform_device *pdev; struct clk *clk; - struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS]; - - int irq, irq_bind_sensor; + struct hisi_thermal_sensor sensors; + int irq; bool irq_enabled; void __iomem *regs; @@ -133,7 +133,7 @@ static void hisi_thermal_enable_bind_irq_sensor mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + sensor = &data->sensors; /* setting the hdak time */ writel(0x0, data->regs + TEMP0_CFG); @@ -181,31 +181,8 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; - int sensor_id = -1, i; - long max_temp = 0; - *temp = hisi_thermal_get_sensor_temp(data, sensor); - sensor->sensor_temp = *temp; - - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; - - if (data->sensors[i].sensor_temp >= max_temp) { - max_temp = data->sensors[i].sensor_temp; - sensor_id = i; - } - } - - /* If no sensor has been enabled, then skip to enable irq */ - if (sensor_id == -1) - return 0; - - mutex_lock(&data->thermal_lock); - data->irq_bind_sensor = sensor_id; - mutex_unlock(&data->thermal_lock); - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", sensor->id, data->irq_enabled, *temp, sensor->thres_temp); /* @@ -218,7 +195,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) return 0; } - if (max_temp < sensor->thres_temp) { + if (*temp < sensor->thres_temp) { data->irq_enabled = true; hisi_thermal_enable_bind_irq_sensor(data); enable_irq(data->irq); @@ -245,22 +222,16 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; struct hisi_thermal_sensor *sensor; - int i; mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + sensor = &data->sensors; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", sensor->thres_temp); mutex_unlock(&data->thermal_lock); - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; - - thermal_zone_device_update(data->sensors[i].tzd, - THERMAL_EVENT_UNSPECIFIED); - } + thermal_zone_device_update(data->sensors.tzd, + THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } @@ -317,7 +288,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; struct resource *res; - int i; int ret; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); @@ -359,14 +329,13 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_enable_bind_irq_sensor(data); data->irq_enabled = true; - for (i = 0; i < HISI_MAX_SENSORS; ++i) { - ret = hisi_thermal_register_sensor(pdev, data, - &data->sensors[i], i); - if (ret) - dev_err(&pdev->dev, - "failed to register thermal sensor: %d\n", ret); - else - hisi_thermal_toggle_sensor(&data->sensors[i], true); + ret = hisi_thermal_register_sensor(pdev, data, + &data->sensors, + HISI_DEFAULT_SENSOR); + if (ret) { + dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", + ret); + return ret; } ret = devm_request_threaded_irq(&pdev->dev, data->irq, @@ -378,6 +347,8 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } + hisi_thermal_toggle_sensor(&data->sensors, true); + enable_irq(data->irq); return 0; @@ -386,17 +357,9 @@ static int hisi_thermal_probe(struct platform_device *pdev) static int hisi_thermal_remove(struct platform_device *pdev) { struct hisi_thermal_data *data = platform_get_drvdata(pdev); - int i; - - for (i = 0; i < HISI_MAX_SENSORS; i++) { - struct hisi_thermal_sensor *sensor = &data->sensors[i]; - - if (!sensor->tzd) - continue; - - hisi_thermal_toggle_sensor(sensor, false); - } + struct hisi_thermal_sensor *sensor = &data->sensors; + hisi_thermal_toggle_sensor(sensor, false); hisi_thermal_disable_sensor(data); clk_disable_unprepare(data->clk); -- GitLab From be9b8f4a7009ea8f2f372752cbfd2a49330fd04e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:48 +0200 Subject: [PATCH 1004/1398] BACKPORT: thermal/drivers/hisi: Remove pointless lock The threaded interrupt inspect the sensors structure to look in the temp threshold field, but this field is read-only in all the code, except in the probe function before the threaded interrupt is set. In other words there is not race window in the threaded interrupt when reading the field value. Change-Id: I457c3b546a43bcf72a3f3d8b5cb1a27172f6a834 Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Signed-off-by: Eduardo Valentin (cherry picked from commit 2d4fa7b4c6f8080ced2e8237c9f46fb1fc110d64) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index aecbfe9fc8f2..0c2966c285b1 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -221,14 +221,10 @@ static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor; - - mutex_lock(&data->thermal_lock); - sensor = &data->sensors; + struct hisi_thermal_sensor *sensor = &data->sensors; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", sensor->thres_temp); - mutex_unlock(&data->thermal_lock); thermal_zone_device_update(data->sensors.tzd, THERMAL_EVENT_UNSPECIFIED); -- GitLab From e37256ce150d331b5710f399caa343579477e715 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:49 +0200 Subject: [PATCH 1005/1398] BACKPORT: thermal/drivers/hisi: Encapsulate register writes into helpers Hopefully, the function name can help to clarify the semantic of the operations when writing in the register. Change-Id: I2f5c5f20c1f52763b94e4998f1c45d118b35c53e Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit 1e11b014271ceccb5ea04ae58f4829ac8209a86d) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 92 ++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 0c2966c285b1..a803043f37ae 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -26,6 +26,7 @@ #include "thermal_core.h" +#define TEMP0_LAG (0x0) #define TEMP0_TH (0x4) #define TEMP0_RST_TH (0x8) #define TEMP0_CFG (0xC) @@ -96,6 +97,56 @@ static inline long hisi_thermal_round_temp(int temp) hisi_thermal_temp_to_step(temp)); } +static inline void hisi_thermal_set_lag(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_LAG); +} + +static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_INT_CLR); +} + +static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_INT_EN); +} + +static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp) +{ + writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH); +} + +static inline void hisi_thermal_reset_set(void __iomem *addr, int temp) +{ + writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH); +} + +static inline void hisi_thermal_reset_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_RST_MSK); +} + +static inline void hisi_thermal_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_EN); +} + +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +{ + writel((sensor << 12), addr + TEMP0_CFG); +} + +static inline int hisi_thermal_get_temperature(void __iomem *addr) +{ + return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); +} + +static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_CFG); +} + static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, struct hisi_thermal_sensor *sensor) { @@ -104,22 +155,21 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, mutex_lock(&data->thermal_lock); /* disable interrupt */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x1, data->regs + TEMP0_INT_CLR); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_alarm_clear(data->regs, 1); /* disable module firstly */ - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hisi_thermal_sensor_select(data->regs, sensor->id); /* enable module */ - writel(0x1, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 1); usleep_range(3000, 5000); - val = readl(data->regs + TEMP0_VALUE); - val = hisi_thermal_step_to_temp(val); + val = hisi_thermal_get_temperature(data->regs); mutex_unlock(&data->thermal_lock); @@ -136,28 +186,26 @@ static void hisi_thermal_enable_bind_irq_sensor sensor = &data->sensors; /* setting the hdak time */ - writel(0x0, data->regs + TEMP0_CFG); + hisi_thermal_hdak_set(data->regs, 0); /* disable module firstly */ - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hisi_thermal_sensor_select(data->regs, sensor->id); /* enable for interrupt */ - writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, - data->regs + TEMP0_TH); + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET), - data->regs + TEMP0_RST_TH); + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); /* enable module */ - writel(0x1, data->regs + TEMP0_RST_MSK); - writel(0x1, data->regs + TEMP0_EN); + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); - writel(0x0, data->regs + TEMP0_INT_CLR); - writel(0x1, data->regs + TEMP0_INT_EN); + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); usleep_range(3000, 5000); @@ -169,9 +217,9 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) mutex_lock(&data->thermal_lock); /* disable sensor module */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_reset_enable(data->regs, 0); mutex_unlock(&data->thermal_lock); } -- GitLab From fc8f6e2239fd3e3fda5e3cc004964fca48f006e9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:50 +0200 Subject: [PATCH 1006/1398] BACKPORT: thermal/drivers/hisi: Fix configuration register setting The TEMP0_CFG configuration register contains different field to set up the temperature controller. However in the code, nothing prevents a setup to overwrite the previous one: eg. writing the hdak value overwrites the sensor selection, the sensor selection overwrites the hdak value. In order to prevent such thing, use a regmap-like mechanism by reading the value before, set the corresponding bits and write the result. Change-Id: Iecb3eb216337430842c1eb7eb5ff4cc35e8fcc43 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit b424315a287c70eeb5f920f84c92492bd2f5658e) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index a803043f37ae..31b52ebf426f 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -30,6 +30,8 @@ #define TEMP0_TH (0x4) #define TEMP0_RST_TH (0x8) #define TEMP0_CFG (0xC) +#define TEMP0_CFG_SS_MSK (0xF000) +#define TEMP0_CFG_HDAK_MSK (0x30) #define TEMP0_EN (0x10) #define TEMP0_INT_EN (0x14) #define TEMP0_INT_CLR (0x18) @@ -132,19 +134,41 @@ static inline void hisi_thermal_enable(void __iomem *addr, int value) writel(value, addr + TEMP0_EN); } -static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +static inline int hisi_thermal_get_temperature(void __iomem *addr) { - writel((sensor << 12), addr + TEMP0_CFG); + return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); } -static inline int hisi_thermal_get_temperature(void __iomem *addr) +/* + * Temperature configuration register - Sensor selection + * + * Bits [19:12] + * + * 0x0: local sensor (default) + * 0x1: remote sensor 1 (ACPU cluster 1) + * 0x2: remote sensor 2 (ACPU cluster 0) + * 0x3: remote sensor 3 (G3D) + */ +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) { - return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_SS_MSK) | + (sensor << 12), addr + TEMP0_CFG); } +/* + * Temperature configuration register - Hdak conversion polling interval + * + * Bits [5:4] + * + * 0x0 : 0.768 ms + * 0x1 : 6.144 ms + * 0x2 : 49.152 ms + * 0x3 : 393.216 ms + */ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) { - writel(value, addr + TEMP0_CFG); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_HDAK_MSK) | + (value << 4), addr + TEMP0_CFG); } static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, -- GitLab From 3cdf15a158b9df077d54132bcc255626908838a7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:51 +0200 Subject: [PATCH 1007/1398] BACKPORT: thermal/drivers/hisi: Remove costly sensor inspection The sensor is all setup, bind, resetted, acked, etc... every single second. That was the way to workaround a problem with the interrupt bouncing again and again. With the following changes, we fix all in one: - Do the setup, one time, at probe time - Add the IRQF_ONESHOT, ack the interrupt in the threaded handler - Remove the interrupt handler - Set the correct value for the LAG register - Remove all the irq_enabled stuff in the code as the interruption handling is fixed - Remove the 3ms delay - Reorder the initialization routine to be in the right order It ends up to a nicer code and more efficient, the 3-5ms delay is removed from the get_temp() path. Change-Id: I73a357caed3a34cd1f4de594c724577b4146683c Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin (cherry picked from commit 10d7e9a9181f4637640f388d334c6740c1b5d0e8) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 203 +++++++++++++++------------------ 1 file changed, 93 insertions(+), 110 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 31b52ebf426f..7c5d4647cb5e 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -41,6 +41,7 @@ #define HISI_TEMP_BASE (-60000) #define HISI_TEMP_RESET (100000) #define HISI_TEMP_STEP (784) +#define HISI_TEMP_LAG (3500) #define HISI_MAX_SENSORS 4 #define HISI_DEFAULT_SENSOR 2 @@ -60,8 +61,6 @@ struct hisi_thermal_data { struct clk *clk; struct hisi_thermal_sensor sensors; int irq; - bool irq_enabled; - void __iomem *regs; }; @@ -99,9 +98,40 @@ static inline long hisi_thermal_round_temp(int temp) hisi_thermal_temp_to_step(temp)); } +/* + * The lag register contains 5 bits encoding the temperature in steps. + * + * Each time the temperature crosses the threshold boundary, an + * interrupt is raised. It could be when the temperature is going + * above the threshold or below. However, if the temperature is + * fluctuating around this value due to the load, we can receive + * several interrupts which may not desired. + * + * We can setup a temperature representing the delta between the + * threshold and the current temperature when the temperature is + * decreasing. + * + * For instance: the lag register is 5°C, the threshold is 65°C, when + * the temperature reaches 65°C an interrupt is raised and when the + * temperature decrease to 65°C - 5°C another interrupt is raised. + * + * A very short lag can lead to an interrupt storm, a long lag + * increase the latency to react to the temperature changes. In our + * case, that is not really a problem as we are polling the + * temperature. + * + * [0:4] : lag register + * + * The temperature is coded in steps, cf. HISI_TEMP_STEP. + * + * Min : 0x00 : 0.0 °C + * Max : 0x1F : 24.3 °C + * + * The 'value' parameter is in milliCelsius. + */ static inline void hisi_thermal_set_lag(void __iomem *addr, int value) { - writel(value, addr + TEMP0_LAG); + writel((value / HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); } static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) @@ -171,71 +201,6 @@ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) (value << 4), addr + TEMP0_CFG); } -static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor) -{ - long val; - - mutex_lock(&data->thermal_lock); - - /* disable interrupt */ - hisi_thermal_alarm_enable(data->regs, 0); - hisi_thermal_alarm_clear(data->regs, 1); - - /* disable module firstly */ - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* enable module */ - hisi_thermal_enable(data->regs, 1); - - usleep_range(3000, 5000); - - val = hisi_thermal_get_temperature(data->regs); - - mutex_unlock(&data->thermal_lock); - - return val; -} - -static void hisi_thermal_enable_bind_irq_sensor - (struct hisi_thermal_data *data) -{ - struct hisi_thermal_sensor *sensor; - - mutex_lock(&data->thermal_lock); - - sensor = &data->sensors; - - /* setting the hdak time */ - hisi_thermal_hdak_set(data->regs, 0); - - /* disable module firstly */ - hisi_thermal_reset_enable(data->regs, 0); - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* enable for interrupt */ - hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - - hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); - - /* enable module */ - hisi_thermal_reset_enable(data->regs, 1); - hisi_thermal_enable(data->regs, 1); - - hisi_thermal_alarm_clear(data->regs, 0); - hisi_thermal_alarm_enable(data->regs, 1); - - usleep_range(3000, 5000); - - mutex_unlock(&data->thermal_lock); -} - static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) { mutex_lock(&data->thermal_lock); @@ -253,25 +218,10 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; - *temp = hisi_thermal_get_sensor_temp(data, sensor); - - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", - sensor->id, data->irq_enabled, *temp, sensor->thres_temp); - /* - * Bind irq to sensor for two cases: - * Reenable alarm IRQ if temperature below threshold; - * if irq has been enabled, always set it; - */ - if (data->irq_enabled) { - hisi_thermal_enable_bind_irq_sensor(data); - return 0; - } + *temp = hisi_thermal_get_temperature(data->regs); - if (*temp < sensor->thres_temp) { - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); - enable_irq(data->irq); - } + dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", + sensor->id, *temp, sensor->thres_temp); return 0; } @@ -280,26 +230,27 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { .get_temp = hisi_thermal_get_temp, }; -static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) +static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; + struct hisi_thermal_sensor *sensor = &data->sensors; + int temp; - disable_irq_nosync(irq); - data->irq_enabled = false; + hisi_thermal_alarm_clear(data->regs, 1); - return IRQ_WAKE_THREAD; -} + temp = hisi_thermal_get_temperature(data->regs); -static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) -{ - struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor = &data->sensors; + if (temp >= sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", + temp, sensor->thres_temp); - dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", - sensor->thres_temp); + thermal_zone_device_update(data->sensors.tzd, + THERMAL_EVENT_UNSPECIFIED); - thermal_zone_device_update(data->sensors.tzd, - THERMAL_EVENT_UNSPECIFIED); + } else if (temp < sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n", + temp, sensor->thres_temp); + } return IRQ_HANDLED; } @@ -352,6 +303,40 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED); } +static int hisi_thermal_setup(struct hisi_thermal_data *data) +{ + struct hisi_thermal_sensor *sensor; + + sensor = &data->sensors; + + /* disable module firstly */ + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); + + /* select sensor id */ + hisi_thermal_sensor_select(data->regs, sensor->id); + + /* setting the hdak time */ + hisi_thermal_hdak_set(data->regs, 0); + + /* setting lag value between current temp and the threshold */ + hisi_thermal_set_lag(data->regs, HISI_TEMP_LAG); + + /* enable for interrupt */ + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); + + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); + + /* enable module */ + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); + + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); + + return 0; +} + static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; @@ -394,9 +379,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - hisi_thermal_enable_bind_irq_sensor(data); - data->irq_enabled = true; - ret = hisi_thermal_register_sensor(pdev, data, &data->sensors, HISI_DEFAULT_SENSOR); @@ -406,10 +388,15 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - ret = devm_request_threaded_irq(&pdev->dev, data->irq, - hisi_thermal_alarm_irq, + ret = hisi_thermal_setup(data); + if (ret) { + dev_err(&pdev->dev, "Failed to setup the sensor: %d\n", ret); + return ret; + } + + ret = devm_request_threaded_irq(&pdev->dev, data->irq, NULL, hisi_thermal_alarm_irq_thread, - 0, "hisi_thermal", data); + IRQF_ONESHOT, "hisi_thermal", data); if (ret < 0) { dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); return ret; @@ -417,8 +404,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_toggle_sensor(&data->sensors, true); - enable_irq(data->irq); - return 0; } @@ -440,7 +425,6 @@ static int hisi_thermal_suspend(struct device *dev) struct hisi_thermal_data *data = dev_get_drvdata(dev); hisi_thermal_disable_sensor(data); - data->irq_enabled = false; clk_disable_unprepare(data->clk); @@ -456,8 +440,7 @@ static int hisi_thermal_resume(struct device *dev) if (ret) return ret; - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); + hisi_thermal_setup(data); return 0; } -- GitLab From 72050106c375f43f833e3ab0f22ca25b107db7f2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:52 +0200 Subject: [PATCH 1008/1398] BACKPORT: thermal/drivers/hisi: Rename and remove unused field Rename the 'sensors' field to 'sensor' as we describe only one sensor. Remove the 'sensor_temp' as it is no longer used. Change-Id: I1cbef35e4c0958b4882ae189e9c67d42f2cb627d Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin (cherry picked from commit 609f26dcbb340d7d6b30b1951c8b525611a97c20) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 7c5d4647cb5e..0b1f56a81b9f 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -49,8 +49,6 @@ struct hisi_thermal_sensor { struct hisi_thermal_data *thermal; struct thermal_zone_device *tzd; - - long sensor_temp; uint32_t id; uint32_t thres_temp; }; @@ -59,9 +57,9 @@ struct hisi_thermal_data { struct mutex thermal_lock; /* protects register data */ struct platform_device *pdev; struct clk *clk; - struct hisi_thermal_sensor sensors; - int irq; + struct hisi_thermal_sensor sensor; void __iomem *regs; + int irq; }; /* @@ -233,7 +231,7 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor = &data->sensors; + struct hisi_thermal_sensor *sensor = &data->sensor; int temp; hisi_thermal_alarm_clear(data->regs, 1); @@ -244,7 +242,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", temp, sensor->thres_temp); - thermal_zone_device_update(data->sensors.tzd, + thermal_zone_device_update(data->sensor.tzd, THERMAL_EVENT_UNSPECIFIED); } else if (temp < sensor->thres_temp) { @@ -307,7 +305,7 @@ static int hisi_thermal_setup(struct hisi_thermal_data *data) { struct hisi_thermal_sensor *sensor; - sensor = &data->sensors; + sensor = &data->sensor; /* disable module firstly */ hisi_thermal_reset_enable(data->regs, 0); @@ -380,7 +378,7 @@ static int hisi_thermal_probe(struct platform_device *pdev) } ret = hisi_thermal_register_sensor(pdev, data, - &data->sensors, + &data->sensor, HISI_DEFAULT_SENSOR); if (ret) { dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", @@ -402,7 +400,7 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - hisi_thermal_toggle_sensor(&data->sensors, true); + hisi_thermal_toggle_sensor(&data->sensor, true); return 0; } @@ -410,7 +408,7 @@ static int hisi_thermal_probe(struct platform_device *pdev) static int hisi_thermal_remove(struct platform_device *pdev) { struct hisi_thermal_data *data = platform_get_drvdata(pdev); - struct hisi_thermal_sensor *sensor = &data->sensors; + struct hisi_thermal_sensor *sensor = &data->sensor; hisi_thermal_toggle_sensor(sensor, false); hisi_thermal_disable_sensor(data); -- GitLab From aa3276c930da739a2b527fd6dcd498b3c6d22336 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:53 +0200 Subject: [PATCH 1009/1398] BACKPORT: thermal/drivers/hisi: Convert long to int There is no point to specify the temperature as long variable, the int is enough. Replace all long variables to int, so making the code consistent. Change-Id: Iabed1d48fa61963bb2ef7ec13b018970fe96cff6 Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Signed-off-by: Eduardo Valentin (cherry picked from commit bc02ef6d98a601a39f735cbbb67807fb752b5138) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 0b1f56a81b9f..30bc69921dca 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -85,12 +85,12 @@ static inline int hisi_thermal_step_to_temp(int step) return HISI_TEMP_BASE + (step * HISI_TEMP_STEP); } -static inline long hisi_thermal_temp_to_step(long temp) +static inline int hisi_thermal_temp_to_step(int temp) { return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; } -static inline long hisi_thermal_round_temp(int temp) +static inline int hisi_thermal_round_temp(int temp) { return hisi_thermal_step_to_temp( hisi_thermal_temp_to_step(temp)); -- GitLab From 00c4d2c93929b421884221c587bc3d1cdd6ba9a9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:54 +0200 Subject: [PATCH 1010/1398] BACKPORT: thermal/drivers/hisi: Remove thermal data back pointer The presence of the thermal data pointer in the sensor structure has the unique purpose of accessing the thermal data in the interrupt handler. The sensor pointer is passed when registering the interrupt handler, replace the cookie by the thermal data pointer, so the back pointer is no longer needed. Change-Id: I7b107fcbef723cc74b24aa45420ce12454d5800a Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit 81d7cb7946f03d339244e1a822b7da246d8c79ff) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 30bc69921dca..e09b52313ceb 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -47,7 +47,6 @@ #define HISI_DEFAULT_SENSOR 2 struct hisi_thermal_sensor { - struct hisi_thermal_data *thermal; struct thermal_zone_device *tzd; uint32_t id; uint32_t thres_temp; @@ -211,10 +210,10 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) mutex_unlock(&data->thermal_lock); } -static int hisi_thermal_get_temp(void *_sensor, int *temp) +static int hisi_thermal_get_temp(void *__data, int *temp) { - struct hisi_thermal_sensor *sensor = _sensor; - struct hisi_thermal_data *data = sensor->thermal; + struct hisi_thermal_data *data = __data; + struct hisi_thermal_sensor *sensor = &data->sensor; *temp = hisi_thermal_get_temperature(data->regs); @@ -262,10 +261,10 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, const struct thermal_trip *trip; sensor->id = index; - sensor->thermal = data; sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, - sensor->id, sensor, &hisi_of_thermal_ops); + sensor->id, data, + &hisi_of_thermal_ops); if (IS_ERR(sensor->tzd)) { ret = PTR_ERR(sensor->tzd); sensor->tzd = NULL; -- GitLab From 980897084b7faf3a0bf5957809e0ff7477af8f6b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:55 +0200 Subject: [PATCH 1011/1398] BACKPORT: thermal/drivers/hisi: Remove mutex_lock in the code The mutex is used to protect against writes in the configuration register. That happens at probe time, with no possible race yet. Then when the module is unloaded and at suspend/resume. When the module is unloaded, it is an userspace operation, thus via a process. Suspending the system goes through the freezer to suspend all the tasks synchronously before continuing. So it is not possible to hit the suspend ops in this driver while we are unloading it. The resume is the same situation than the probe. In other words, even if there are several places where we write the configuration register, there is no situation where we can write it at the same time, so far as I can judge Change-Id: Ie71c809f485240dbf463b142fe0f3ab1d2fae2ae Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin (cherry picked from commit cc1ab38d2b2ad3290426c2234cf83e7a323554c7) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index e09b52313ceb..90e91fedc76d 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -53,7 +53,6 @@ struct hisi_thermal_sensor { }; struct hisi_thermal_data { - struct mutex thermal_lock; /* protects register data */ struct platform_device *pdev; struct clk *clk; struct hisi_thermal_sensor sensor; @@ -200,14 +199,10 @@ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) { - mutex_lock(&data->thermal_lock); - /* disable sensor module */ hisi_thermal_enable(data->regs, 0); hisi_thermal_alarm_enable(data->regs, 0); hisi_thermal_reset_enable(data->regs, 0); - - mutex_unlock(&data->thermal_lock); } static int hisi_thermal_get_temp(void *__data, int *temp) @@ -344,7 +339,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) if (!data) return -ENOMEM; - mutex_init(&data->thermal_lock); data->pdev = pdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- GitLab From 90d49124b9fd52ceea44ddb48b8bb92ece0686d1 Mon Sep 17 00:00:00 2001 From: Kevin Wangtao Date: Thu, 19 Oct 2017 19:05:56 +0200 Subject: [PATCH 1012/1398] BACKPORT: thermal/drivers/hisi: Move the clk setup in the corresponding functions The sensor's clock is enabled and disabled outside of the probe and disable function. Moving the corresponding action in the hisi_thermal_setup() and hisi_thermal_disable_sensor(), factors out some lines of code and makes the code more symmetric. Change-Id: If40b040a6b54187065b1d99c40b203f96fd5840e Signed-off-by: Kevin Wangtao Tested-by: Daniel Lezcano # hikey6220 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit 943c0f6abfb6010065b80943356975885a33233c) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 90e91fedc76d..373bb6d094a1 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -203,6 +203,8 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) hisi_thermal_enable(data->regs, 0); hisi_thermal_alarm_enable(data->regs, 0); hisi_thermal_reset_enable(data->regs, 0); + + clk_disable_unprepare(data->clk); } static int hisi_thermal_get_temp(void *__data, int *temp) @@ -297,9 +299,13 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, static int hisi_thermal_setup(struct hisi_thermal_data *data) { - struct hisi_thermal_sensor *sensor; + struct hisi_thermal_sensor *sensor = &data->sensor; + int ret; - sensor = &data->sensor; + /* enable clock for tsensor */ + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; /* disable module firstly */ hisi_thermal_reset_enable(data->regs, 0); @@ -363,13 +369,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - /* enable clock for thermal */ - ret = clk_prepare_enable(data->clk); - if (ret) { - dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret); - return ret; - } - ret = hisi_thermal_register_sensor(pdev, data, &data->sensor, HISI_DEFAULT_SENSOR); @@ -405,7 +404,6 @@ static int hisi_thermal_remove(struct platform_device *pdev) hisi_thermal_toggle_sensor(sensor, false); hisi_thermal_disable_sensor(data); - clk_disable_unprepare(data->clk); return 0; } @@ -417,23 +415,14 @@ static int hisi_thermal_suspend(struct device *dev) hisi_thermal_disable_sensor(data); - clk_disable_unprepare(data->clk); - return 0; } static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - int ret; - ret = clk_prepare_enable(data->clk); - if (ret) - return ret; - - hisi_thermal_setup(data); - - return 0; + return hisi_thermal_setup(data); } #endif -- GitLab From 53b1c65b9926c62252507d4a5c3f2956a9813cf7 Mon Sep 17 00:00:00 2001 From: Kevin Wangtao Date: Thu, 19 Oct 2017 19:05:57 +0200 Subject: [PATCH 1013/1398] BACKPORT: thermal/drivers/hisi: Use round up step value Use round up division to ensure the programmed value of threshold and the lag are not less than what we set, and in order to keep the accuracy while using round up division, the step value should be a rounded up value. There is no need to use hisi_thermal_round_temp. Change-Id: I1b064d0a000fde3e64f42ac152b54ffdacb45d5e Signed-off-by: Kevin Wangtao Tested-by: Daniel Lezcano # hikey6220 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit e42bbe1160c3b5fcab8d5afcc49d8647adff6c9c) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 373bb6d094a1..65f9e5c80741 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -40,7 +40,7 @@ #define HISI_TEMP_BASE (-60000) #define HISI_TEMP_RESET (100000) -#define HISI_TEMP_STEP (784) +#define HISI_TEMP_STEP (785) #define HISI_TEMP_LAG (3500) #define HISI_MAX_SENSORS 4 @@ -63,19 +63,19 @@ struct hisi_thermal_data { /* * The temperature computation on the tsensor is as follow: * Unit: millidegree Celsius - * Step: 255/200 (0.7843) + * Step: 200/255 (0.7843) * Temperature base: -60°C * - * The register is programmed in temperature steps, every step is 784 + * The register is programmed in temperature steps, every step is 785 * millidegree and begins at -60 000 m°C * * The temperature from the steps: * - * Temp = TempBase + (steps x 784) + * Temp = TempBase + (steps x 785) * * and the steps from the temperature: * - * steps = (Temp - TempBase) / 784 + * steps = (Temp - TempBase) / 785 * */ static inline int hisi_thermal_step_to_temp(int step) @@ -85,13 +85,7 @@ static inline int hisi_thermal_step_to_temp(int step) static inline int hisi_thermal_temp_to_step(int temp) { - return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; -} - -static inline int hisi_thermal_round_temp(int temp) -{ - return hisi_thermal_step_to_temp( - hisi_thermal_temp_to_step(temp)); + return DIV_ROUND_UP(temp - HISI_TEMP_BASE, HISI_TEMP_STEP); } /* @@ -127,7 +121,7 @@ static inline int hisi_thermal_round_temp(int temp) */ static inline void hisi_thermal_set_lag(void __iomem *addr, int value) { - writel((value / HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); + writel(DIV_ROUND_UP(value, HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); } static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) @@ -274,7 +268,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) { if (trip[i].type == THERMAL_TRIP_PASSIVE) { - sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature); + sensor->thres_temp = trip[i].temperature; break; } } -- GitLab From 10af9dc92b61558db76e37a268e228aeafa66af1 Mon Sep 17 00:00:00 2001 From: Kevin Wangtao Date: Sun, 22 Oct 2017 10:54:32 +0200 Subject: [PATCH 1014/1398] BACKPORT: thermal/drivers/hisi: Put platform code together Reorganize the code for next patches by moving the functions upper in the file which will prevent a forward declaration. There is no functional change here. Change-Id: I3eb1665e91bf768a2536653a29699db1460b4643 Signed-off-by: Kevin Wangtao Tested-by: Daniel Lezcano # hikey6220 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit a0678da82ca01771db57f53bfb7f0a5a7494900e) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 76 +++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 65f9e5c80741..a0b7e26e4796 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -201,6 +201,44 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) clk_disable_unprepare(data->clk); } +static int hisi_thermal_setup(struct hisi_thermal_data *data) +{ + struct hisi_thermal_sensor *sensor = &data->sensor; + int ret; + + /* enable clock for tsensor */ + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; + + /* disable module firstly */ + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); + + /* select sensor id */ + hisi_thermal_sensor_select(data->regs, sensor->id); + + /* setting the hdak time */ + hisi_thermal_hdak_set(data->regs, 0); + + /* setting lag value between current temp and the threshold */ + hisi_thermal_set_lag(data->regs, HISI_TEMP_LAG); + + /* enable for interrupt */ + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); + + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); + + /* enable module */ + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); + + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); + + return 0; +} + static int hisi_thermal_get_temp(void *__data, int *temp) { struct hisi_thermal_data *data = __data; @@ -291,44 +329,6 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED); } -static int hisi_thermal_setup(struct hisi_thermal_data *data) -{ - struct hisi_thermal_sensor *sensor = &data->sensor; - int ret; - - /* enable clock for tsensor */ - ret = clk_prepare_enable(data->clk); - if (ret) - return ret; - - /* disable module firstly */ - hisi_thermal_reset_enable(data->regs, 0); - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* setting the hdak time */ - hisi_thermal_hdak_set(data->regs, 0); - - /* setting lag value between current temp and the threshold */ - hisi_thermal_set_lag(data->regs, HISI_TEMP_LAG); - - /* enable for interrupt */ - hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - - hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); - - /* enable module */ - hisi_thermal_reset_enable(data->regs, 1); - hisi_thermal_enable(data->regs, 1); - - hisi_thermal_alarm_clear(data->regs, 0); - hisi_thermal_alarm_enable(data->regs, 1); - - return 0; -} - static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; -- GitLab From 6fb8c5b069ce533e29285ceeddadc35792bde8a7 Mon Sep 17 00:00:00 2001 From: Kevin Wangtao Date: Sun, 22 Oct 2017 10:54:33 +0200 Subject: [PATCH 1015/1398] BACKPORT: thermal/drivers/hisi: Add platform prefix to function name As the next patches will provide support for the hikey3660's sensor, several functions with the same purpose but for different platforms will be introduced. In order to make a clear distinction between them, let's prefix the function names with the platform name. This patch has no functional changes, only name changes. Change-Id: I3364f730a80944441dc34094493146ee1990548b Signed-off-by: Kevin Wangtao Tested-by: Daniel Lezcano # hikey6220 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit 5ed82b79e526f755bf0630a7c47a31ca2f4a7ad5) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 145 +++++++++++++++++---------------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index a0b7e26e4796..c2cb2807372d 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -26,25 +26,24 @@ #include "thermal_core.h" -#define TEMP0_LAG (0x0) -#define TEMP0_TH (0x4) -#define TEMP0_RST_TH (0x8) -#define TEMP0_CFG (0xC) -#define TEMP0_CFG_SS_MSK (0xF000) -#define TEMP0_CFG_HDAK_MSK (0x30) -#define TEMP0_EN (0x10) -#define TEMP0_INT_EN (0x14) -#define TEMP0_INT_CLR (0x18) -#define TEMP0_RST_MSK (0x1C) -#define TEMP0_VALUE (0x28) - -#define HISI_TEMP_BASE (-60000) -#define HISI_TEMP_RESET (100000) -#define HISI_TEMP_STEP (785) -#define HISI_TEMP_LAG (3500) - -#define HISI_MAX_SENSORS 4 -#define HISI_DEFAULT_SENSOR 2 +#define HI6220_TEMP0_LAG (0x0) +#define HI6220_TEMP0_TH (0x4) +#define HI6220_TEMP0_RST_TH (0x8) +#define HI6220_TEMP0_CFG (0xC) +#define HI6220_TEMP0_CFG_SS_MSK (0xF000) +#define HI6220_TEMP0_CFG_HDAK_MSK (0x30) +#define HI6220_TEMP0_EN (0x10) +#define HI6220_TEMP0_INT_EN (0x14) +#define HI6220_TEMP0_INT_CLR (0x18) +#define HI6220_TEMP0_RST_MSK (0x1C) +#define HI6220_TEMP0_VALUE (0x28) + +#define HI6220_TEMP_BASE (-60000) +#define HI6220_TEMP_RESET (100000) +#define HI6220_TEMP_STEP (785) +#define HI6220_TEMP_LAG (3500) + +#define HI6220_DEFAULT_SENSOR 2 struct hisi_thermal_sensor { struct thermal_zone_device *tzd; @@ -78,14 +77,14 @@ struct hisi_thermal_data { * steps = (Temp - TempBase) / 785 * */ -static inline int hisi_thermal_step_to_temp(int step) +static inline int hi6220_thermal_step_to_temp(int step) { - return HISI_TEMP_BASE + (step * HISI_TEMP_STEP); + return HI6220_TEMP_BASE + (step * HI6220_TEMP_STEP); } -static inline int hisi_thermal_temp_to_step(int temp) +static inline int hi6220_thermal_temp_to_step(int temp) { - return DIV_ROUND_UP(temp - HISI_TEMP_BASE, HISI_TEMP_STEP); + return DIV_ROUND_UP(temp - HI6220_TEMP_BASE, HI6220_TEMP_STEP); } /* @@ -112,51 +111,53 @@ static inline int hisi_thermal_temp_to_step(int temp) * * [0:4] : lag register * - * The temperature is coded in steps, cf. HISI_TEMP_STEP. + * The temperature is coded in steps, cf. HI6220_TEMP_STEP. * * Min : 0x00 : 0.0 °C * Max : 0x1F : 24.3 °C * * The 'value' parameter is in milliCelsius. */ -static inline void hisi_thermal_set_lag(void __iomem *addr, int value) +static inline void hi6220_thermal_set_lag(void __iomem *addr, int value) { - writel(DIV_ROUND_UP(value, HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); + writel(DIV_ROUND_UP(value, HI6220_TEMP_STEP) & 0x1F, + addr + HI6220_TEMP0_LAG); } -static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) +static inline void hi6220_thermal_alarm_clear(void __iomem *addr, int value) { - writel(value, addr + TEMP0_INT_CLR); + writel(value, addr + HI6220_TEMP0_INT_CLR); } -static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value) +static inline void hi6220_thermal_alarm_enable(void __iomem *addr, int value) { - writel(value, addr + TEMP0_INT_EN); + writel(value, addr + HI6220_TEMP0_INT_EN); } -static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp) +static inline void hi6220_thermal_alarm_set(void __iomem *addr, int temp) { - writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH); + writel(hi6220_thermal_temp_to_step(temp) | 0x0FFFFFF00, + addr + HI6220_TEMP0_TH); } -static inline void hisi_thermal_reset_set(void __iomem *addr, int temp) +static inline void hi6220_thermal_reset_set(void __iomem *addr, int temp) { - writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH); + writel(hi6220_thermal_temp_to_step(temp), addr + HI6220_TEMP0_RST_TH); } -static inline void hisi_thermal_reset_enable(void __iomem *addr, int value) +static inline void hi6220_thermal_reset_enable(void __iomem *addr, int value) { - writel(value, addr + TEMP0_RST_MSK); + writel(value, addr + HI6220_TEMP0_RST_MSK); } -static inline void hisi_thermal_enable(void __iomem *addr, int value) +static inline void hi6220_thermal_enable(void __iomem *addr, int value) { - writel(value, addr + TEMP0_EN); + writel(value, addr + HI6220_TEMP0_EN); } -static inline int hisi_thermal_get_temperature(void __iomem *addr) +static inline int hi6220_thermal_get_temperature(void __iomem *addr) { - return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); + return hi6220_thermal_step_to_temp(readl(addr + HI6220_TEMP0_VALUE)); } /* @@ -169,10 +170,10 @@ static inline int hisi_thermal_get_temperature(void __iomem *addr) * 0x2: remote sensor 2 (ACPU cluster 0) * 0x3: remote sensor 3 (G3D) */ -static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +static inline void hi6220_thermal_sensor_select(void __iomem *addr, int sensor) { - writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_SS_MSK) | - (sensor << 12), addr + TEMP0_CFG); + writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_SS_MSK) | + (sensor << 12), addr + HI6220_TEMP0_CFG); } /* @@ -185,23 +186,23 @@ static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) * 0x2 : 49.152 ms * 0x3 : 393.216 ms */ -static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) +static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value) { - writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_HDAK_MSK) | - (value << 4), addr + TEMP0_CFG); + writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_HDAK_MSK) | + (value << 4), addr + HI6220_TEMP0_CFG); } -static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) +static void hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) { /* disable sensor module */ - hisi_thermal_enable(data->regs, 0); - hisi_thermal_alarm_enable(data->regs, 0); - hisi_thermal_reset_enable(data->regs, 0); + hi6220_thermal_enable(data->regs, 0); + hi6220_thermal_alarm_enable(data->regs, 0); + hi6220_thermal_reset_enable(data->regs, 0); clk_disable_unprepare(data->clk); } -static int hisi_thermal_setup(struct hisi_thermal_data *data) +static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) { struct hisi_thermal_sensor *sensor = &data->sensor; int ret; @@ -212,29 +213,29 @@ static int hisi_thermal_setup(struct hisi_thermal_data *data) return ret; /* disable module firstly */ - hisi_thermal_reset_enable(data->regs, 0); - hisi_thermal_enable(data->regs, 0); + hi6220_thermal_reset_enable(data->regs, 0); + hi6220_thermal_enable(data->regs, 0); /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); + hi6220_thermal_sensor_select(data->regs, sensor->id); /* setting the hdak time */ - hisi_thermal_hdak_set(data->regs, 0); + hi6220_thermal_hdak_set(data->regs, 0); /* setting lag value between current temp and the threshold */ - hisi_thermal_set_lag(data->regs, HISI_TEMP_LAG); + hi6220_thermal_set_lag(data->regs, HI6220_TEMP_LAG); /* enable for interrupt */ - hisi_thermal_alarm_set(data->regs, sensor->thres_temp); + hi6220_thermal_alarm_set(data->regs, sensor->thres_temp); - hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); + hi6220_thermal_reset_set(data->regs, HI6220_TEMP_RESET); /* enable module */ - hisi_thermal_reset_enable(data->regs, 1); - hisi_thermal_enable(data->regs, 1); + hi6220_thermal_reset_enable(data->regs, 1); + hi6220_thermal_enable(data->regs, 1); - hisi_thermal_alarm_clear(data->regs, 0); - hisi_thermal_alarm_enable(data->regs, 1); + hi6220_thermal_alarm_clear(data->regs, 0); + hi6220_thermal_alarm_enable(data->regs, 1); return 0; } @@ -244,7 +245,7 @@ static int hisi_thermal_get_temp(void *__data, int *temp) struct hisi_thermal_data *data = __data; struct hisi_thermal_sensor *sensor = &data->sensor; - *temp = hisi_thermal_get_temperature(data->regs); + *temp = hi6220_thermal_get_temperature(data->regs); dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", sensor->id, *temp, sensor->thres_temp); @@ -260,11 +261,11 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; struct hisi_thermal_sensor *sensor = &data->sensor; - int temp; + int temp = 0; - hisi_thermal_alarm_clear(data->regs, 1); + hi6220_thermal_alarm_clear(data->regs, 1); - temp = hisi_thermal_get_temperature(data->regs); + hisi_thermal_get_temp(data, &temp); if (temp >= sensor->thres_temp) { dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", @@ -273,7 +274,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) thermal_zone_device_update(data->sensor.tzd, THERMAL_EVENT_UNSPECIFIED); - } else if (temp < sensor->thres_temp) { + } else { dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n", temp, sensor->thres_temp); } @@ -365,14 +366,14 @@ static int hisi_thermal_probe(struct platform_device *pdev) ret = hisi_thermal_register_sensor(pdev, data, &data->sensor, - HISI_DEFAULT_SENSOR); + HI6220_DEFAULT_SENSOR); if (ret) { dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", ret); return ret; } - ret = hisi_thermal_setup(data); + ret = hi6220_thermal_enable_sensor(data); if (ret) { dev_err(&pdev->dev, "Failed to setup the sensor: %d\n", ret); return ret; @@ -397,7 +398,7 @@ static int hisi_thermal_remove(struct platform_device *pdev) struct hisi_thermal_sensor *sensor = &data->sensor; hisi_thermal_toggle_sensor(sensor, false); - hisi_thermal_disable_sensor(data); + hi6220_thermal_disable_sensor(data); return 0; } @@ -407,7 +408,7 @@ static int hisi_thermal_suspend(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - hisi_thermal_disable_sensor(data); + hi6220_thermal_disable_sensor(data); return 0; } @@ -416,7 +417,7 @@ static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - return hisi_thermal_setup(data); + return hi6220_thermal_enable_sensor(data); } #endif -- GitLab From 107ec61ae38c2f213fa7a6d52615c7f1697ff76e Mon Sep 17 00:00:00 2001 From: Kevin Wangtao Date: Sun, 22 Oct 2017 10:54:34 +0200 Subject: [PATCH 1016/1398] BACKPORT: thermal/drivers/hisi: Prepare to add support for other hisi platforms For platform compatibility, add the tsensor ops to a thermal data structure. Each platform has its own probe function to register proper tsensor ops function to the pointer, platform related resource request are also implemented in the platform probe function. Change-Id: I5c9f006884aa3ebe5df7a07cde41980a62168e84 Signed-off-by: Kevin Wangtao Tested-by: Daniel Lezcano # hikey6220 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit a160a465297362c515db28848b79eb876ceab9c0) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 132 ++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 45 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index c2cb2807372d..b862506a588b 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "thermal_core.h" @@ -30,7 +31,7 @@ #define HI6220_TEMP0_TH (0x4) #define HI6220_TEMP0_RST_TH (0x8) #define HI6220_TEMP0_CFG (0xC) -#define HI6220_TEMP0_CFG_SS_MSK (0xF000) +#define HI6220_TEMP0_CFG_SS_MSK (0xF000) #define HI6220_TEMP0_CFG_HDAK_MSK (0x30) #define HI6220_TEMP0_EN (0x10) #define HI6220_TEMP0_INT_EN (0x14) @@ -41,7 +42,7 @@ #define HI6220_TEMP_BASE (-60000) #define HI6220_TEMP_RESET (100000) #define HI6220_TEMP_STEP (785) -#define HI6220_TEMP_LAG (3500) +#define HI6220_TEMP_LAG (3500) #define HI6220_DEFAULT_SENSOR 2 @@ -52,6 +53,10 @@ struct hisi_thermal_sensor { }; struct hisi_thermal_data { + int (*get_temp)(struct hisi_thermal_data *data); + int (*enable_sensor)(struct hisi_thermal_data *data); + int (*disable_sensor)(struct hisi_thermal_data *data); + int (*irq_handler)(struct hisi_thermal_data *data); struct platform_device *pdev; struct clk *clk; struct hisi_thermal_sensor sensor; @@ -192,7 +197,18 @@ static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value) (value << 4), addr + HI6220_TEMP0_CFG); } -static void hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) +static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data) +{ + hi6220_thermal_alarm_clear(data->regs, 1); + return 0; +} + +static int hi6220_thermal_get_temp(struct hisi_thermal_data *data) +{ + return hi6220_thermal_get_temperature(data->regs); +} + +static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) { /* disable sensor module */ hi6220_thermal_enable(data->regs, 0); @@ -200,6 +216,8 @@ static void hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) hi6220_thermal_reset_enable(data->regs, 0); clk_disable_unprepare(data->clk); + + return 0; } static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) @@ -240,12 +258,48 @@ static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) return 0; } +static int hi6220_thermal_probe(struct hisi_thermal_data *data) +{ + struct platform_device *pdev = data->pdev; + struct device *dev = &pdev->dev; + struct resource *res; + int ret; + + data->get_temp = hi6220_thermal_get_temp; + data->enable_sensor = hi6220_thermal_enable_sensor; + data->disable_sensor = hi6220_thermal_disable_sensor; + data->irq_handler = hi6220_thermal_irq_handler; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(data->regs)) { + dev_err(dev, "failed to get io address\n"); + return PTR_ERR(data->regs); + } + + data->clk = devm_clk_get(dev, "thermal_clk"); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get thermal clk: %d\n", ret); + return ret; + } + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + data->sensor.id = HI6220_DEFAULT_SENSOR; + + return 0; +} + static int hisi_thermal_get_temp(void *__data, int *temp) { struct hisi_thermal_data *data = __data; struct hisi_thermal_sensor *sensor = &data->sensor; - *temp = hi6220_thermal_get_temperature(data->regs); + *temp = data->get_temp(data); dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", sensor->id, *temp, sensor->thres_temp); @@ -263,7 +317,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) struct hisi_thermal_sensor *sensor = &data->sensor; int temp = 0; - hi6220_thermal_alarm_clear(data->regs, 1); + data->irq_handler(data); hisi_thermal_get_temp(data, &temp); @@ -284,14 +338,11 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) static int hisi_thermal_register_sensor(struct platform_device *pdev, struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor, - int index) + struct hisi_thermal_sensor *sensor) { int ret, i; const struct thermal_trip *trip; - sensor->id = index; - sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, sensor->id, data, &hisi_of_thermal_ops); @@ -316,7 +367,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, } static const struct of_device_id of_hisi_thermal_match[] = { - { .compatible = "hisilicon,tsensor" }, + { .compatible = "hisilicon,tsensor", .data = hi6220_thermal_probe }, { /* end */ } }; MODULE_DEVICE_TABLE(of, of_hisi_thermal_match); @@ -333,58 +384,48 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; - struct resource *res; + int const (*platform_probe)(struct hisi_thermal_data *); + struct device *dev = &pdev->dev; int ret; - data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->pdev = pdev; + platform_set_drvdata(pdev, data); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(data->regs)) { - dev_err(&pdev->dev, "failed to get io address\n"); - return PTR_ERR(data->regs); + platform_probe = of_device_get_match_data(dev); + if (!platform_probe) { + dev_err(dev, "failed to get probe func\n"); + return -EINVAL; } - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) - return data->irq; - - platform_set_drvdata(pdev, data); - - data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); - if (IS_ERR(data->clk)) { - ret = PTR_ERR(data->clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "failed to get thermal clk: %d\n", ret); + ret = platform_probe(data); + if (ret) return ret; - } ret = hisi_thermal_register_sensor(pdev, data, - &data->sensor, - HI6220_DEFAULT_SENSOR); + &data->sensor); if (ret) { - dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", - ret); + dev_err(dev, "failed to register thermal sensor: %d\n", ret); return ret; } - ret = hi6220_thermal_enable_sensor(data); + ret = data->enable_sensor(data); if (ret) { - dev_err(&pdev->dev, "Failed to setup the sensor: %d\n", ret); + dev_err(dev, "Failed to setup the sensor: %d\n", ret); return ret; } - ret = devm_request_threaded_irq(&pdev->dev, data->irq, NULL, - hisi_thermal_alarm_irq_thread, - IRQF_ONESHOT, "hisi_thermal", data); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); - return ret; + if (data->irq) { + ret = devm_request_threaded_irq(dev, data->irq, NULL, + hisi_thermal_alarm_irq_thread, + IRQF_ONESHOT, "hisi_thermal", data); + if (ret < 0) { + dev_err(dev, "failed to request alarm irq: %d\n", ret); + return ret; + } } hisi_thermal_toggle_sensor(&data->sensor, true); @@ -398,7 +439,8 @@ static int hisi_thermal_remove(struct platform_device *pdev) struct hisi_thermal_sensor *sensor = &data->sensor; hisi_thermal_toggle_sensor(sensor, false); - hi6220_thermal_disable_sensor(data); + + data->disable_sensor(data); return 0; } @@ -408,7 +450,7 @@ static int hisi_thermal_suspend(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - hi6220_thermal_disable_sensor(data); + data->disable_sensor(data); return 0; } @@ -417,7 +459,7 @@ static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - return hi6220_thermal_enable_sensor(data); + return data->enable_sensor(data); } #endif -- GitLab From f518fe49bbaa28b46734b65905214ea857412595 Mon Sep 17 00:00:00 2001 From: Kevin Wangtao Date: Sun, 22 Oct 2017 10:54:35 +0200 Subject: [PATCH 1017/1398] BACKPORT: thermal/drivers/hisi: Add support for hi3660 SoC This patch adds the support for thermal sensor on the Hi3660 SoC. Hi3660 tsensor support alarm in alarm threshold, it also has a configurable hysteresis interval, interrupt will be triggered when temperature rise above the alarm threshold or fall below the hysteresis threshold. Change-Id: Ia745bd0777e40bd46eb206031aae52bad714a801 Signed-off-by: Kevin Wangtao Tested-by: Daniel Lezcano # hikey6220 Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin (cherry picked from commit 2bb60a8ea721900c13b580689d647a6423e88104) Signed-off-by: Kevin Wangtao --- drivers/thermal/hisi_thermal.c | 145 ++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index b862506a588b..2d855a96cdd9 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -39,12 +39,24 @@ #define HI6220_TEMP0_RST_MSK (0x1C) #define HI6220_TEMP0_VALUE (0x28) +#define HI3660_OFFSET(chan) ((chan) * 0x40) +#define HI3660_TEMP(chan) (HI3660_OFFSET(chan) + 0x1C) +#define HI3660_TH(chan) (HI3660_OFFSET(chan) + 0x20) +#define HI3660_LAG(chan) (HI3660_OFFSET(chan) + 0x28) +#define HI3660_INT_EN(chan) (HI3660_OFFSET(chan) + 0x2C) +#define HI3660_INT_CLR(chan) (HI3660_OFFSET(chan) + 0x30) + #define HI6220_TEMP_BASE (-60000) #define HI6220_TEMP_RESET (100000) #define HI6220_TEMP_STEP (785) #define HI6220_TEMP_LAG (3500) +#define HI3660_TEMP_BASE (-63780) +#define HI3660_TEMP_STEP (205) +#define HI3660_TEMP_LAG (4000) + #define HI6220_DEFAULT_SENSOR 2 +#define HI3660_DEFAULT_SENSOR 1 struct hisi_thermal_sensor { struct thermal_zone_device *tzd; @@ -92,6 +104,24 @@ static inline int hi6220_thermal_temp_to_step(int temp) return DIV_ROUND_UP(temp - HI6220_TEMP_BASE, HI6220_TEMP_STEP); } +/* + * for Hi3660, + * Step: 189/922 (0.205) + * Temperature base: -63.780°C + * + * The register is programmed in temperature steps, every step is 205 + * millidegree and begins at -63 780 m°C + */ +static inline int hi3660_thermal_step_to_temp(int step) +{ + return HI3660_TEMP_BASE + step * HI3660_TEMP_STEP; +} + +static inline int hi3660_thermal_temp_to_step(int temp) +{ + return DIV_ROUND_UP(temp - HI3660_TEMP_BASE, HI3660_TEMP_STEP); +} + /* * The lag register contains 5 bits encoding the temperature in steps. * @@ -165,6 +195,45 @@ static inline int hi6220_thermal_get_temperature(void __iomem *addr) return hi6220_thermal_step_to_temp(readl(addr + HI6220_TEMP0_VALUE)); } +/* + * [0:6] lag register + * + * The temperature is coded in steps, cf. HI3660_TEMP_STEP. + * + * Min : 0x00 : 0.0 °C + * Max : 0x7F : 26.0 °C + * + */ +static inline void hi3660_thermal_set_lag(void __iomem *addr, + int id, int value) +{ + writel(DIV_ROUND_UP(value, HI3660_TEMP_STEP) & 0x7F, + addr + HI3660_LAG(id)); +} + +static inline void hi3660_thermal_alarm_clear(void __iomem *addr, + int id, int value) +{ + writel(value, addr + HI3660_INT_CLR(id)); +} + +static inline void hi3660_thermal_alarm_enable(void __iomem *addr, + int id, int value) +{ + writel(value, addr + HI3660_INT_EN(id)); +} + +static inline void hi3660_thermal_alarm_set(void __iomem *addr, + int id, int value) +{ + writel(value, addr + HI3660_TH(id)); +} + +static inline int hi3660_thermal_get_temperature(void __iomem *addr, int id) +{ + return hi3660_thermal_step_to_temp(readl(addr + HI3660_TEMP(id))); +} + /* * Temperature configuration register - Sensor selection * @@ -203,11 +272,22 @@ static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data) return 0; } +static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data) +{ + hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1); + return 0; +} + static int hi6220_thermal_get_temp(struct hisi_thermal_data *data) { return hi6220_thermal_get_temperature(data->regs); } +static int hi3660_thermal_get_temp(struct hisi_thermal_data *data) +{ + return hi3660_thermal_get_temperature(data->regs, data->sensor.id); +} + static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) { /* disable sensor module */ @@ -220,6 +300,13 @@ static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) return 0; } +static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data) +{ + /* disable sensor module */ + hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0); + return 0; +} + static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) { struct hisi_thermal_sensor *sensor = &data->sensor; @@ -258,6 +345,28 @@ static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) return 0; } +static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data) +{ + unsigned int value; + struct hisi_thermal_sensor *sensor = &data->sensor; + + /* disable interrupt */ + hi3660_thermal_alarm_enable(data->regs, sensor->id, 0); + + /* setting lag value between current temp and the threshold */ + hi3660_thermal_set_lag(data->regs, sensor->id, HI3660_TEMP_LAG); + + /* set interrupt threshold */ + value = hi3660_thermal_temp_to_step(sensor->thres_temp); + hi3660_thermal_alarm_set(data->regs, sensor->id, value); + + /* enable interrupt */ + hi3660_thermal_alarm_clear(data->regs, sensor->id, 1); + hi3660_thermal_alarm_enable(data->regs, sensor->id, 1); + + return 0; +} + static int hi6220_thermal_probe(struct hisi_thermal_data *data) { struct platform_device *pdev = data->pdev; @@ -294,6 +403,33 @@ static int hi6220_thermal_probe(struct hisi_thermal_data *data) return 0; } +static int hi3660_thermal_probe(struct hisi_thermal_data *data) +{ + struct platform_device *pdev = data->pdev; + struct device *dev = &pdev->dev; + struct resource *res; + + data->get_temp = hi3660_thermal_get_temp; + data->enable_sensor = hi3660_thermal_enable_sensor; + data->disable_sensor = hi3660_thermal_disable_sensor; + data->irq_handler = hi3660_thermal_irq_handler; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(data->regs)) { + dev_err(dev, "failed to get io address\n"); + return PTR_ERR(data->regs); + } + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + data->sensor.id = HI3660_DEFAULT_SENSOR; + + return 0; +} + static int hisi_thermal_get_temp(void *__data, int *temp) { struct hisi_thermal_data *data = __data; @@ -367,7 +503,14 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, } static const struct of_device_id of_hisi_thermal_match[] = { - { .compatible = "hisilicon,tsensor", .data = hi6220_thermal_probe }, + { + .compatible = "hisilicon,tsensor", + .data = hi6220_thermal_probe + }, + { + .compatible = "hisilicon,hi3660-tsensor", + .data = hi3660_thermal_probe + }, { /* end */ } }; MODULE_DEVICE_TABLE(of, of_hisi_thermal_match); -- GitLab From 68d447c0a37bfc24e1d2bf8397b4edf0867de7c2 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 8 Mar 2017 19:03:44 +0100 Subject: [PATCH 1018/1398] UPSTREAM: eventpoll.h: add missing epoll event masks [resend due to me forgetting to cc: linux-api the first time around I posted these back on Feb 23] From: Greg Kroah-Hartman For some reason these values are not in the uapi header file, so any libc has to define it themselves. To prevent them from needing to do this, just have the kernel provide the correct values. Reported-by: Elliott Hughes Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 7e040726850a106587485c21bdacc0bfc8a0cbed) Change-Id: I7b1370668faeeb7597288b29dde5bc6d63c95be6 Signed-off-by: Greg Hackmann --- include/uapi/linux/eventpoll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 1c3154913a39..bc96b14dfb2c 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,6 +26,19 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Epoll event masks */ +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE (1 << 28) -- GitLab From 7be19854549142665ac6de1bb50f084ffb38839d Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Thu, 11 Jan 2018 11:16:32 +0800 Subject: [PATCH 1019/1398] ANDROID: sched: EAS: check energy_aware() before calling select_energy_cpu_brute() in up-migrate path In up-migrate path, select_energy_cpu_brute() was called directly without checking energy_aware(). This will make select_energy_cpu_brute() always worked even disabling energy_aware() on the asymmetric cpu capacity system. Signed-off-by: Ke Wang --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 37ab2105e839..b0573f2d8646 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10306,7 +10306,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p) int active_balance; int cpu = task_cpu(p); - if (rq->misfit_task) { + if (energy_aware() && rq->misfit_task) { if (rq->curr->state != TASK_RUNNING || rq->curr->nr_cpus_allowed == 1) return; -- GitLab From c36c940cd4aab393c09d457e2414423ac92bd339 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Dec 2016 10:24:05 -0800 Subject: [PATCH 1020/1398] x86/asm/32: Make sync_core() handle missing CPUID on all 32-bit kernels commit 1c52d859cb2d417e7216d3e56bb7fea88444cec9 upstream. We support various non-Intel CPUs that don't have the CPUID instruction, so the M486 test was wrong. For now, fix it with a big hammer: handle missing CPUID on all 32-bit CPUs. Reported-by: One Thousand Gnomes Signed-off-by: Andy Lutomirski Cc: Juergen Gross Cc: Peter Zijlstra Cc: Brian Gerst Cc: Matthew Whitehead Cc: Borislav Petkov Cc: Henrique de Moraes Holschuh Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: xen-devel Link: http://lkml.kernel.org/r/685bd083a7c036f7769510b6846315b17d6ba71f.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner Cc: "Zhang, Ning A" Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e40b19ca486e..353f038ec645 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -596,7 +596,7 @@ static inline void sync_core(void) { int tmp; -#ifdef CONFIG_M486 +#ifdef CONFIG_X86_32 /* * Do a CPUID if available, otherwise do a jump. The jump * can conveniently enough be the jump around CPUID. -- GitLab From fb39345e73141e4ae4529168965c37024cb3acb3 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 22 Jan 2018 15:44:51 -0500 Subject: [PATCH 1021/1398] orangefs: use list_for_each_entry_safe in purge_waiting_ops commit 0afc0decf247f65b7aba666a76a0a68adf4bc435 upstream. set_op_state_purged can delete the op. Signed-off-by: Martin Brandenburg Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/waitqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index abcfa3fa9992..f61b00887481 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -28,10 +28,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s */ void purge_waiting_ops(void) { - struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); - list_for_each_entry(op, &orangefs_request_list, list) { + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), -- GitLab From 5c26ee198fcacda157918a9b211ba1f111e1ed9b Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 22 Jan 2018 15:44:52 -0500 Subject: [PATCH 1022/1398] orangefs: initialize op on loop restart in orangefs_devreq_read commit a0ec1ded22e6a6bc41981fae22406835b006a66e upstream. In orangefs_devreq_read, there is a loop which picks an op off the list of pending ops. If the loop fails to find an op, there is nothing to read, and it returns EAGAIN. If the op has been given up on, the loop is restarted via a goto. The bug is that the variable which the found op is written to is not reinitialized, so if there are no more eligible ops on the list, the code runs again on the already handled op. This is triggered by interrupting a process while the op is being copied to the client-core. It's a fairly small window, but it's there. Signed-off-by: Martin Brandenburg Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/devorangefs-req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index fe2cbeb90772..939aa066e1ca 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -161,7 +161,7 @@ static ssize_t orangefs_devreq_read(struct file *file, struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - struct orangefs_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ @@ -181,6 +181,7 @@ static ssize_t orangefs_devreq_read(struct file *file, } restart: + cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { -- GitLab From ce601a07bc504b4748f8e7a34896684f79514e51 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:49 -0700 Subject: [PATCH 1023/1398] usbip: prevent vhci_hcd driver from leaking a socket pointer address commit 2f2d0088eb93db5c649d2a5e34a3800a8a935fc5 upstream. When a client has a USB device attached over IP, the vhci_hcd driver is locally leaking a socket pointer address via the /sys/devices/platform/vhci_hcd/status file (world-readable) and in debug output when "usbip --debug port" is run. Fix it to not leak. The socket pointer address is not used at the moment and it was made visible as a convenient way to find IP address from socket pointer address by looking up /proc/net/{tcp,tcp6}. As this opens a security hole, the fix replaces socket pointer address with sockfd. Reported-by: Secunia Research Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.h | 1 + drivers/usb/usbip/vhci_sysfs.c | 25 +++++++++++++++---------- tools/usb/usbip/libsrc/vhci_driver.c | 8 ++++---- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index 9f490375ac92..f0b955f8504e 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -271,6 +271,7 @@ struct usbip_device { /* lock for status */ spinlock_t lock; + int sockfd; struct socket *tcp_socket; struct task_struct *tcp_rx; diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index b96e5b189269..c287ccc78fde 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -49,13 +49,17 @@ static ssize_t status_show_vhci(int pdev_nr, char *out) /* * output example: - * port sta spd dev socket local_busid - * 0000 004 000 00000000 c5a7bb80 1-2.3 - * 0001 004 000 00000000 d8cee980 2-3.4 + * port sta spd dev sockfd local_busid + * 0000 004 000 00000000 000003 1-2.3 + * 0001 004 000 00000000 000004 2-3.4 * - * IP address can be retrieved from a socket pointer address by looking - * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a - * port number and its peer IP address. + * Output includes socket fd instead of socket pointer address to + * avoid leaking kernel memory address in: + * /sys/devices/platform/vhci_hcd.0/status and in debug output. + * The socket pointer address is not used at the moment and it was + * made visible as a convenient way to find IP address from socket + * pointer address by looking up /proc/net/{tcp,tcp6}. As this opens + * a security hole, the change is made to use sockfd instead. */ for (i = 0; i < VHCI_HC_PORTS; i++) { struct vhci_device *vdev = &vhci->vdev[i]; @@ -68,13 +72,13 @@ static ssize_t status_show_vhci(int pdev_nr, char *out) if (vdev->ud.status == VDEV_ST_USED) { out += sprintf(out, "%03u %08x ", vdev->speed, vdev->devid); - out += sprintf(out, "%16p %s", - vdev->ud.tcp_socket, + out += sprintf(out, "%06u %s", + vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { out += sprintf(out, "000 00000000 "); - out += sprintf(out, "0000000000000000 0-0"); + out += sprintf(out, "000000 0-0"); } out += sprintf(out, "\n"); @@ -125,7 +129,7 @@ static ssize_t status_show(struct device *dev, int pdev_nr; out += sprintf(out, - "port sta spd dev socket local_busid\n"); + "port sta spd dev sockfd local_busid\n"); pdev_nr = status_name_to_id(attr->attr.name); if (pdev_nr < 0) @@ -324,6 +328,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr, vdev->devid = devid; vdev->speed = speed; + vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; vdev->ud.status = VDEV_ST_NOTASSIGNED; diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index ad9204773533..1274f326242c 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -55,12 +55,12 @@ static int parse_status(const char *value) while (*c != '\0') { int port, status, speed, devid; - unsigned long socket; + int sockfd; char lbusid[SYSFS_BUS_ID_SIZE]; - ret = sscanf(c, "%d %d %d %x %lx %31s\n", + ret = sscanf(c, "%d %d %d %x %u %31s\n", &port, &status, &speed, - &devid, &socket, lbusid); + &devid, &sockfd, lbusid); if (ret < 5) { dbg("sscanf failed: %d", ret); @@ -69,7 +69,7 @@ static int parse_status(const char *value) dbg("port %d status %d speed %d devid %x", port, status, speed, devid); - dbg("socket %lx lbusid %s", socket, lbusid); + dbg("sockfd %u lbusid %s", sockfd, lbusid); /* if a device is connected, look at it */ -- GitLab From 853c39f239eb89cfd91ee03257c624aaac0575c6 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Mon, 27 Feb 2017 10:31:04 +0200 Subject: [PATCH 1024/1398] usbip: Fix implicit fallthrough warning commit cfd6ed4537a9e938fa76facecd4b9cd65b6d1563 upstream. GCC 7 now warns when switch statements fall through implicitly, and with -Werror enabled in configure.ac, that makes these tools unbuildable. We fix this by notifying the compiler that this particular case statement is meant to fall through. Reviewed-by: Peter Senna Tschudin Signed-off-by: Jonathan Dieter Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c index d7599d943529..73d8eee8130b 100644 --- a/tools/usb/usbip/src/usbip.c +++ b/tools/usb/usbip/src/usbip.c @@ -176,6 +176,8 @@ int main(int argc, char *argv[]) break; case '?': printf("usbip: invalid option\n"); + /* Terminate after printing error */ + /* FALLTHRU */ default: usbip_usage(); goto out; -- GitLab From 69e78e7214e39c084fcafdfb83b4d13d21b0008b Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Mon, 27 Feb 2017 10:31:03 +0200 Subject: [PATCH 1025/1398] usbip: Fix potential format overflow in userspace tools commit e5dfa3f902b9a642ae8c6997d57d7c41e384a90b upstream. The usbip userspace tools call sprintf()/snprintf() and don't check for the return value which can lead the paths to overflow, truncating the final file in the path. More urgently, GCC 7 now warns that these aren't checked with -Wformat-overflow, and with -Werror enabled in configure.ac, that makes these tools unbuildable. This patch fixes these problems by replacing sprintf() with snprintf() in one place and adding checks for the return value of snprintf(). Reviewed-by: Peter Senna Tschudin Signed-off-by: Jonathan Dieter Acked-by: Shuah Khan Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/usbip_common.c | 9 ++++++- tools/usb/usbip/libsrc/usbip_host_common.c | 28 ++++++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c index ac73710473de..1517a232ab18 100644 --- a/tools/usb/usbip/libsrc/usbip_common.c +++ b/tools/usb/usbip/libsrc/usbip_common.c @@ -215,9 +215,16 @@ int read_usb_interface(struct usbip_usb_device *udev, int i, struct usbip_usb_interface *uinf) { char busid[SYSFS_BUS_ID_SIZE]; + int size; struct udev_device *sif; - sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i); + size = snprintf(busid, sizeof(busid), "%s:%d.%d", + udev->busid, udev->bConfigurationValue, i); + if (size < 0 || (unsigned int)size >= sizeof(busid)) { + err("busid length %i >= %lu or < 0", size, + (long unsigned)sizeof(busid)); + return -1; + } sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid); if (!sif) { diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index 9d415228883d..6ff7b601f854 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -40,13 +40,20 @@ struct udev *udev_context; static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) { char status_attr_path[SYSFS_PATH_MAX]; + int size; int fd; int length; char status; int value = 0; - snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status", - udev->path); + size = snprintf(status_attr_path, sizeof(status_attr_path), + "%s/usbip_status", udev->path); + if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) { + err("usbip_status path length %i >= %lu or < 0", size, + (long unsigned)sizeof(status_attr_path)); + return -1; + } + fd = open(status_attr_path, O_RDONLY); if (fd < 0) { @@ -218,6 +225,7 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) { char attr_name[] = "usbip_sockfd"; char sockfd_attr_path[SYSFS_PATH_MAX]; + int size; char sockfd_buff[30]; int ret; @@ -237,10 +245,20 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) } /* only the first interface is true */ - snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", - edev->udev.path, attr_name); + size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", + edev->udev.path, attr_name); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) { + err("exported device path length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_attr_path)); + return -1; + } - snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) { + err("socket length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_buff)); + return -1; + } ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff, strlen(sockfd_buff)); -- GitLab From 40bf2c0c1c9ec9c3a17afac43fcd18b39759defd Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 16 Jan 2018 19:30:14 +0100 Subject: [PATCH 1026/1398] can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once commit 8cb68751c115d176ec851ca56ecfbb411568c9e8 upstream. If an invalid CAN frame is received, from a driver or from a tun interface, a Kernel warning is generated. This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a kernel, bootet with panic_on_warn, does not panic. A printk seems to be more appropriate here. Reported-by: syzbot+4386709c0c1284dca827@syzkaller.appspotmail.com Suggested-by: Dmitry Vyukov Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Signed-off-by: Greg Kroah-Hartman --- net/can/af_can.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 5488e4a6ccd0..6ff7df79e006 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -722,13 +722,12 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!net_eq(dev_net(dev), &init_net))) goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN, - "PF_CAN: dropped non conform CAN skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); goto drop; + } can_receive(skb, dev); return NET_RX_SUCCESS; -- GitLab From 41e4aa17bc02430db764b748e990bdc392347f0d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 16 Jan 2018 19:30:14 +0100 Subject: [PATCH 1027/1398] can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once commit d4689846881d160a4d12a514e991a740bcb5d65a upstream. If an invalid CANFD frame is received, from a driver or from a tun interface, a Kernel warning is generated. This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a kernel, bootet with panic_on_warn, does not panic. A printk seems to be more appropriate here. Reported-by: syzbot+e3b775f40babeff6e68b@syzkaller.appspotmail.com Suggested-by: Dmitry Vyukov Acked-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Signed-off-by: Greg Kroah-Hartman --- net/can/af_can.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 6ff7df79e006..ac1552d8b4ad 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -745,13 +745,12 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!net_eq(dev_net(dev), &init_net))) goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN, - "PF_CAN: dropped non conform CAN FD skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); goto drop; + } can_receive(skb, dev); return NET_RX_SUCCESS; -- GitLab From 45ee9d5e97a4c6814a166c4ddf5c7c3764084270 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 4 Jan 2018 18:24:33 +0000 Subject: [PATCH 1028/1398] KVM: arm/arm64: Check pagesize when allocating a hugepage at Stage 2 commit c507babf10ead4d5c8cca704539b170752a8ac84 upstream. KVM only supports PMD hugepages at stage 2 but doesn't actually check that the provided hugepage memory pagesize is PMD_SIZE before populating stage 2 entries. In cases where the backing hugepage size is smaller than PMD_SIZE (such as when using contiguous hugepages), KVM can end up creating stage 2 mappings that extend beyond the supplied memory. Fix this by checking for the pagesize of userspace vma before creating PMD hugepage at stage 2. Fixes: 66b3923a1a0f77a ("arm64: hugetlb: add support for PTE contiguous bit") Signed-off-by: Punit Agrawal Cc: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2206e0e00934..2a35c1963f6d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma) && !logging_active) { + if (vma_kernel_pagesize(vma) && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { -- GitLab From 318e17d09cbc40593ce42194ece3ecd1a464cc5e Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Tue, 25 Apr 2017 16:44:03 -0500 Subject: [PATCH 1029/1398] Prevent timer value 0 for MWAITX commit 88d879d29f9cc0de2d930b584285638cdada6625 upstream. Newer hardware has uncovered a bug in the software implementation of using MWAITX for the delay function. A value of 0 for the timer is meant to indicate that a timeout will not be used to exit MWAITX. On newer hardware this can result in MWAITX never returning, resulting in NMI soft lockup messages being printed. On older hardware, some of the other conditions under which MWAITX can exit masked this issue. The AMD APM does not currently document this and will be updated. Please refer to http://marc.info/?l=kvm&m=148950623231140 for information regarding NMI soft lockup messages on an AMD Ryzen 1800X. This has been root-caused as a 0 passed to MWAITX causing it to wait indefinitely. This change has the added benefit of avoiding the unnecessary setup of MONITORX/MWAITX when the delay value is zero. Signed-off-by: Janakarajan Natarajan Link: http://lkml.kernel.org/r/1493156643-29366-1-git-send-email-Janakarajan.Natarajan@amd.com Signed-off-by: Thomas Gleixner Signed-off-by: Davidlohr Bueso Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/delay.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620b..9758524ee99f 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -93,6 +93,13 @@ static void delay_mwaitx(unsigned long __loops) { u64 start, end, delay, loops = __loops; + /* + * Timer value of 0 causes MWAITX to wait indefinitely, unless there + * is a store on the memory monitored by MONITORX. + */ + if (loops == 0) + return; + start = rdtsc_ordered(); for (;;) { -- GitLab From f5aaa5a2836d86e3b6559200422c153a4dfb6d66 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 28 Oct 2016 09:45:28 +0100 Subject: [PATCH 1030/1398] drivers: base: cacheinfo: fix x86 with CONFIG_OF enabled commit fac51482577d5e05bbb0efa8d602a3c2111098bf upstream. With CONFIG_OF enabled on x86, we get the following error on boot: " Failed to find cpu0 device node Unable to detect cache hierarchy from DT for CPU 0 " and the cacheinfo fails to get populated in the corresponding sysfs entries. This is because cache_setup_of_node looks for of_node for setting up the shared cpu_map without checking that it's already populated in the architecture specific callback. In order to indicate that the shared cpu_map is already populated, this patch introduces a boolean `cpu_map_populated` in struct cpu_cacheinfo that can be used by the generic code to skip cache_shared_cpu_map_setup. This patch also sets that boolean for x86. Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 ++ drivers/base/cacheinfo.c | 3 +++ include/linux/cacheinfo.h | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index de6626c18e42..be6337156502 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, &id4_regs); __cache_cpumap_setup(cpu, idx, &id4_regs); } + this_cpu_ci->cpu_map_populated = true; + return 0; } diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e9fd32e91668..ecde8957835a 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -106,6 +106,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) unsigned int index; int ret; + if (this_cpu_ci->cpu_map_populated) + return 0; + ret = cache_setup_of_node(cpu); if (ret) return ret; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2189935075b4..a951fd10aaaa 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -71,6 +71,7 @@ struct cpu_cacheinfo { struct cacheinfo *info_list; unsigned int num_levels; unsigned int num_leaves; + bool cpu_map_populated; }; /* -- GitLab From 1d8c402e0c46ce630746e081c904068af455b1e5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 28 Oct 2016 09:45:29 +0100 Subject: [PATCH 1031/1398] drivers: base: cacheinfo: fix boot error message when acpi is enabled commit 55877ef45fbd7f975d078426866b7d1a2435dcc3 upstream. ARM64 enables both CONFIG_OF and CONFIG_ACPI and the firmware can pass both ACPI tables and the device tree. Based on the kernel parameter, one of the two will be chosen. If acpi is enabled, then device tree is not unflattened. Currently ARM64 platforms report: " Failed to find cpu0 device node Unable to detect cache hierarchy from DT for CPU 0 " which is incorrect when booting with ACPI. Also latest ACPI v6.1 has no support for cache properties/hierarchy. This patch adds check for unflattened device tree and also returns as "not supported" if ACPI is runtime enabled. It also removes the reference to DT from the error message as the cache hierarchy can be detected from the firmware(OF/DT/ACPI) Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index ecde8957835a..70e13cf06ed0 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -16,6 +16,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +#include #include #include #include @@ -104,12 +105,16 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int index; - int ret; + int ret = 0; if (this_cpu_ci->cpu_map_populated) return 0; - ret = cache_setup_of_node(cpu); + if (of_have_populated_dt()) + ret = cache_setup_of_node(cpu); + else if (!acpi_disabled) + /* No cache property/hierarchy support yet in ACPI */ + ret = -ENOTSUPP; if (ret) return ret; @@ -206,8 +211,7 @@ static int detect_cache_attributes(unsigned int cpu) */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { - pr_warn("Unable to detect cache hierarchy from DT for CPU %d\n", - cpu); + pr_warn("Unable to detect cache hierarchy for CPU %d\n", cpu); goto free_ci; } return 0; -- GitLab From c57664bd12997742da5e12556e328e1ec0b5b654 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:51 -0700 Subject: [PATCH 1032/1398] mm/mmap.c: do not blow on PROT_NONE MAP_FIXED holes in the stack commit 561b5e0709e4a248c67d024d4d94b6e31e3edf2f upstream. Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") has introduced a regression in some rust and Java environments which are trying to implement their own stack guard page. They are punching a new MAP_FIXED mapping inside the existing stack Vma. This will confuse expand_{downwards,upwards} into thinking that the stack expansion would in fact get us too close to an existing non-stack vma which is a correct behavior wrt safety. It is a real regression on the other hand. Let's work around the problem by considering PROT_NONE mapping as a part of the stack. This is a gros hack but overflowing to such a mapping would trap anyway an we only can hope that usespace knows what it is doing and handle it propely. Fixes: 1be7107fbe18 ("mm: larger stack guard gap, between vmas") Link: http://lkml.kernel.org/r/20170705182849.GA18027@dhcp22.suse.cz Signed-off-by: Michal Hocko Debugged-by: Vlastimil Babka Cc: Ben Hutchings Cc: Willy Tarreau Cc: Oleg Nesterov Cc: Rik van Riel Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 5b48adb4aa56..45ac5b973459 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2240,7 +2240,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) gap_addr = TASK_SIZE; next = vma->vm_next; - if (next && next->vm_start < gap_addr) { + if (next && next->vm_start < gap_addr && + (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(next->vm_flags & VM_GROWSUP)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ @@ -2324,7 +2325,8 @@ int expand_downwards(struct vm_area_struct *vma, if (gap_addr > address) return -ENOMEM; prev = vma->vm_prev; - if (prev && prev->vm_end > gap_addr) { + if (prev && prev->vm_end > gap_addr && + (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(prev->vm_flags & VM_GROWSDOWN)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ -- GitLab From bc0e2174b092638f1146d792a8be71ab90a7e56d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 12 May 2017 15:46:26 -0700 Subject: [PATCH 1033/1398] hwpoison, memcg: forcibly uncharge LRU pages commit 18365225f0440d09708ad9daade2ec11275c3df9 upstream. Laurent Dufour has noticed that hwpoinsoned pages are kept charged. In his particular case he has hit a bad_page("page still charged to cgroup") when onlining a hwpoison page. While this looks like something that shouldn't happen in the first place because onlining hwpages and returning them to the page allocator makes only little sense it shows a real problem. hwpoison pages do not get freed usually so we do not uncharge them (at least not since commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API")). Each charge pins memcg (since e8ea14cc6ead ("mm: memcontrol: take a css reference for each charged page")) as well and so the mem_cgroup and the associated state will never go away. Fix this leak by forcibly uncharging a LRU hwpoisoned page in delete_from_lru_cache(). We also have to tweak uncharge_list because it cannot rely on zero ref count for these pages. [akpm@linux-foundation.org: coding-style fixes] Fixes: 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") Link: http://lkml.kernel.org/r/20170502185507.GB19165@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Laurent Dufour Tested-by: Laurent Dufour Reviewed-by: Balbir Singh Reviewed-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 +- mm/memory-failure.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2a800c4a39bd..50088150fc17 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5531,7 +5531,7 @@ static void uncharge_list(struct list_head *page_list) next = page->lru.next; VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); if (!page->mem_cgroup) continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ce7d416edab7..5aa71a82ca73 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -535,6 +535,13 @@ static int delete_from_lru_cache(struct page *p) */ ClearPageActive(p); ClearPageUnevictable(p); + + /* + * Poisoned page might never drop its ref count to 0 so we have + * to uncharge it manually from its memcg. + */ + mem_cgroup_uncharge(p); + /* * drop the page count elevated by isolate_lru_page() */ -- GitLab From 714c19ef57a5271c14ab6d1b5878f5662a0b3034 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 10 Jul 2017 15:49:44 -0700 Subject: [PATCH 1034/1398] cma: fix calculation of aligned offset commit e048cb32f69038aa1c8f11e5c1b331be4181659d upstream. The align_offset parameter is used by bitmap_find_next_zero_area_off() to represent the offset of map's base from the previous alignment boundary; the function ensures that the returned index, plus the align_offset, honors the specified align_mask. The logic introduced by commit b5be83e308f7 ("mm: cma: align to physical address, not CMA region position") has the cma driver calculate the offset to the *next* alignment boundary. In most cases, the base alignment is greater than that specified when making allocations, resulting in a zero offset whether we align up or down. In the example given with the commit, the base alignment (8MB) was half the requested alignment (16MB) so the math also happened to work since the offset is 8MB in both directions. However, when requesting allocations with an alignment greater than twice that of the base, the returned index would not be correctly aligned. Also, the align_order arguments of cma_bitmap_aligned_mask() and cma_bitmap_aligned_offset() should not be negative so the argument type was made unsigned. Fixes: b5be83e308f7 ("mm: cma: align to physical address, not CMA region position") Link: http://lkml.kernel.org/r/20170628170742.2895-1-opendmb@gmail.com Signed-off-by: Angus Clark Signed-off-by: Doug Berger Acked-by: Gregory Fong Cc: Doug Berger Cc: Angus Clark Cc: Laura Abbott Cc: Vlastimil Babka Cc: Greg Kroah-Hartman Cc: Lucas Stach Cc: Catalin Marinas Cc: Shiraz Hashim Cc: Jaewon Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- mm/cma.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index c960459eda7e..397687fc51f9 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct cma *cma) } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, - int align_order) + unsigned int align_order) { if (align_order <= cma->order_per_bit) return 0; @@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, } /* - * Find a PFN aligned to the specified order and return an offset represented in - * order_per_bits. + * Find the offset of the base PFN from the specified align_order. + * The value returned is represented in order_per_bits. */ static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, - int align_order) + unsigned int align_order) { - if (align_order <= cma->order_per_bit) - return 0; - - return (ALIGN(cma->base_pfn, (1UL << align_order)) - - cma->base_pfn) >> cma->order_per_bit; + return (cma->base_pfn & ((1UL << align_order) - 1)) + >> cma->order_per_bit; } static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, -- GitLab From 685cce58f1c2e4fb5dbc891edf3e1bbca217ed3e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 15 Nov 2017 17:38:30 -0800 Subject: [PATCH 1035/1398] mm, page_alloc: fix potential false positive in __zone_watermark_ok commit b050e3769c6b4013bb937e879fc43bf1847ee819 upstream. Since commit 97a16fc82a7c ("mm, page_alloc: only enforce watermarks for order-0 allocations"), __zone_watermark_ok() check for high-order allocations will shortcut per-migratetype free list checks for ALLOC_HARDER allocations, and return true as long as there's free page of any migratetype. The intention is that ALLOC_HARDER can allocate from MIGRATE_HIGHATOMIC free lists, while normal allocations can't. However, as a side effect, the watermark check will then also return true when there are pages only on the MIGRATE_ISOLATE list, or (prior to CMA conversion to ZONE_MOVABLE) on the MIGRATE_CMA list. Since the allocation cannot actually obtain isolated pages, and might not be able to obtain CMA pages, this can result in a false positive. The condition should be rare and perhaps the outcome is not a fatal one. Still, it's better if the watermark check is correct. There also shouldn't be a performance tradeoff here. Link: http://lkml.kernel.org/r/20171102125001.23708-1-vbabka@suse.cz Fixes: 97a16fc82a7c ("mm, page_alloc: only enforce watermarks for order-0 allocations") Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Joonsoo Kim Cc: Rik van Riel Cc: David Rientjes Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fbc38888252b..546713b3f762 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2821,9 +2821,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, if (!area->nr_free) continue; - if (alloc_harder) - return true; - for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { if (!list_empty(&area->free_list[mt])) return true; @@ -2835,6 +2832,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif + if (alloc_harder && + !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + return true; } return false; } -- GitLab From 542cde0e3cc27bd4d6cbfa596d9278d3c97bc193 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 14 Dec 2016 15:06:07 -0800 Subject: [PATCH 1036/1398] ipc: msg, make msgrcv work with LONG_MIN commit 999898355e08ae3b92dfd0a08db706e0c6703d30 upstream. When LONG_MIN is passed to msgrcv, one would expect to recieve any message. But convert_mode does *msgtyp = -*msgtyp and -LONG_MIN is undefined. In particular, with my gcc -LONG_MIN produces -LONG_MIN again. So handle this case properly by assigning LONG_MAX to *msgtyp if LONG_MIN was specified as msgtyp to msgrcv. This code: long msg[] = { 100, 200 }; int m = msgget(IPC_PRIVATE, IPC_CREAT | 0644); msgsnd(m, &msg, sizeof(msg), 0); msgrcv(m, &msg, sizeof(msg), LONG_MIN, 0); produces currently nothing: msgget(IPC_PRIVATE, IPC_CREAT|0644) = 65538 msgsnd(65538, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, 0) = 0 msgrcv(65538, ... Except a UBSAN warning: UBSAN: Undefined behaviour in ipc/msg.c:745:13 negation of -9223372036854775808 cannot be represented in type 'long int': With the patch, I see what I expect: msgget(IPC_PRIVATE, IPC_CREAT|0644) = 0 msgsnd(0, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, 0) = 0 msgrcv(0, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, -9223372036854775808, 0) = 16 Link: http://lkml.kernel.org/r/20161024082633.10148-1-jslaby@suse.cz Signed-off-by: Jiri Slaby Cc: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ipc/msg.c b/ipc/msg.c index e12307d0c920..ff10d43b5184 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -763,7 +763,10 @@ static inline int convert_mode(long *msgtyp, int msgflg) if (*msgtyp == 0) return SEARCH_ANY; if (*msgtyp < 0) { - *msgtyp = -*msgtyp; + if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */ + *msgtyp = LONG_MAX; + else + *msgtyp = -*msgtyp; return SEARCH_LESSEQUAL; } if (msgflg & MSG_EXCEPT) -- GitLab From 3a53accd9c397f836858defa475720a65b5dd662 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Dec 2016 02:27:31 +0100 Subject: [PATCH 1037/1398] ACPI / scan: Prefer devices without _HID/_CID for _ADR matching commit c2a6bbaf0c5f90463a7011a295bbdb7e33c80b51 upstream. The way acpi_find_child_device() works currently is that, if there are two (or more) devices with the same _ADR value in the same namespace scope (which is not specifically allowed by the spec and the OS behavior in that case is not defined), the first one of them found to be present (with the help of _STA) will be returned. This covers the majority of cases, but is not sufficient if some of the devices in question have a _HID (or _CID) returning some valid ACPI/PNP device IDs (which is disallowed by the spec) and the ASL writers' expectation appears to be that the OS will match devices without a valid ACPI/PNP device ID against a given bus address first. To cover this special case as well, modify find_child_checks() to prefer devices without ACPI/PNP device IDs over devices that have them. Suggested-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Tested-by: Hans de Goede Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 73c9c7fa9001..f06317d6fc38 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; /* - * If the device has a _HID (or _CID) returning a valid ACPI/PNP - * device ID, it is better to make it look less attractive here, so that - * the other device with the same _ADR value (that may not have a valid - * device ID) can be matched going forward. [This means a second spec - * violation in a row, so whatever we do here is best effort anyway.] + * If the device has a _HID returning a valid ACPI/PNP device ID, it is + * better to make it look less attractive here, so that the other device + * with the same _ADR value (that may not have a valid device ID) can be + * matched going forward. [This means a second spec violation in a row, + * so whatever we do here is best effort anyway.] */ - return sta_present && list_empty(&adev->pnp.ids) ? + return sta_present && !adev->pnp.type.platform_id ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } -- GitLab From 2915f16bdce204621695e7a0dfcd5f73b120cccb Mon Sep 17 00:00:00 2001 From: Seunghun Han Date: Wed, 26 Apr 2017 16:18:08 +0800 Subject: [PATCH 1038/1398] ACPICA: Namespace: fix operand cache leak commit 3b2d69114fefa474fca542e51119036dceb4aa6f upstream. ACPICA commit a23325b2e583556eae88ed3f764e457786bf4df6 I found some ACPI operand cache leaks in ACPI early abort cases. Boot log of ACPI operand cache leak is as follows: >[ 0.174332] ACPI: Added _OSI(Module Device) >[ 0.175504] ACPI: Added _OSI(Processor Device) >[ 0.176010] ACPI: Added _OSI(3.0 _SCP Extensions) >[ 0.177032] ACPI: Added _OSI(Processor Aggregator Device) >[ 0.178284] ACPI: SCI (IRQ16705) allocation failed >[ 0.179352] ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20160930/evevent-131) >[ 0.180008] ACPI: Unable to start the ACPI Interpreter >[ 0.181125] ACPI Error: Could not remove SCI handler (20160930/evmisc-281) >[ 0.184068] kmem_cache_destroy Acpi-Operand: Slab cache still has objects >[ 0.185358] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-rc3 #2 >[ 0.186820] Hardware name: innotek gmb_h virtual_box/virtual_box, BIOS virtual_box 12/01/2006 >[ 0.188000] Call Trace: >[ 0.188000] ? dump_stack+0x5c/0x7d >[ 0.188000] ? kmem_cache_destroy+0x224/0x230 >[ 0.188000] ? acpi_sleep_proc_init+0x22/0x22 >[ 0.188000] ? acpi_os_delete_cache+0xa/0xd >[ 0.188000] ? acpi_ut_delete_caches+0x3f/0x7b >[ 0.188000] ? acpi_terminate+0x5/0xf >[ 0.188000] ? acpi_init+0x288/0x32e >[ 0.188000] ? __class_create+0x4c/0x80 >[ 0.188000] ? video_setup+0x7a/0x7a >[ 0.188000] ? do_one_initcall+0x4e/0x1b0 >[ 0.188000] ? kernel_init_freeable+0x194/0x21a >[ 0.188000] ? rest_init+0x80/0x80 >[ 0.188000] ? kernel_init+0xa/0x100 >[ 0.188000] ? ret_from_fork+0x25/0x30 When early abort is occurred due to invalid ACPI information, Linux kernel terminates ACPI by calling acpi_terminate() function. The function calls acpi_ns_terminate() function to delete namespace data and ACPI operand cache (acpi_gbl_module_code_list). But the deletion code in acpi_ns_terminate() function is wrapped in ACPI_EXEC_APP definition, therefore the code is only executed when the definition exists. If the define doesn't exist, ACPI operand cache (acpi_gbl_module_code_list) is leaked, and stack dump is shown in kernel log. This causes a security threat because the old kernel (<= 4.9) shows memory locations of kernel functions in stack dump, therefore kernel ASLR can be neutralized. To fix ACPI operand leak for enhancing security, I made a patch which removes the ACPI_EXEC_APP define in acpi_ns_terminate() function for executing the deletion code unconditionally. Link: https://github.com/acpica/acpica/commit/a23325b2 Signed-off-by: Seunghun Han Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Acked-by: Lee, Chun-Yi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/nsutils.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c index 691814dfed31..943702dd9517 100644 --- a/drivers/acpi/acpica/nsutils.c +++ b/drivers/acpi/acpica/nsutils.c @@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle) void acpi_ns_terminate(void) { acpi_status status; + union acpi_operand_object *prev; + union acpi_operand_object *next; ACPI_FUNCTION_TRACE(ns_terminate); -#ifdef ACPI_EXEC_APP - { - union acpi_operand_object *prev; - union acpi_operand_object *next; + /* Delete any module-level code blocks */ - /* Delete any module-level code blocks */ - - next = acpi_gbl_module_code_list; - while (next) { - prev = next; - next = next->method.mutex; - prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */ - acpi_ut_remove_reference(prev); - } + next = acpi_gbl_module_code_list; + while (next) { + prev = next; + next = next->method.mutex; + prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */ + acpi_ut_remove_reference(prev); } -#endif /* * Free the entire namespace -- all nodes and all objects -- GitLab From 2c3184ea80322347287bc7e57f782d77f478e73c Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sun, 3 Dec 2017 12:12:45 -0800 Subject: [PATCH 1039/1398] netfilter: nfnetlink_cthelper: Add missing permission checks commit 4b380c42f7d00a395feede754f0bc2292eebe6e5 upstream. The capability check in nfnetlink_rcv() verifies that the caller has CAP_NET_ADMIN in the namespace that "owns" the netlink socket. However, nfnl_cthelper_list is shared by all net namespaces on the system. An unprivileged user can create user and net namespaces in which he holds CAP_NET_ADMIN to bypass the netlink_net_capable() check: $ nfct helper list nfct v1.4.4: netlink error: Operation not permitted $ vpnns -- nfct helper list { .name = ftp, .queuenum = 0, .l3protonum = 2, .l4protonum = 6, .priv_data_len = 24, .status = enabled, }; Add capable() checks in nfnetlink_cthelper, as this is cleaner than trying to generalize the solution. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso Acked-by: Michal Kubecek Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 28d065394c09..3f499126727c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -392,6 +393,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth; int ret = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -595,6 +599,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth; bool tuple_set = false; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_cthelper_dump_table, @@ -661,6 +668,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth, *n; int j = 0, ret; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); -- GitLab From 898eeca02a55e354c42a7aa5cdfebf16c3742f44 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Tue, 5 Dec 2017 15:42:41 -0800 Subject: [PATCH 1040/1398] netfilter: xt_osf: Add missing permission checks commit 916a27901de01446bcf57ecca4783f6cff493309 upstream. The capability check in nfnetlink_rcv() verifies that the caller has CAP_NET_ADMIN in the namespace that "owns" the netlink socket. However, xt_osf_fingers is shared by all net namespaces on the system. An unprivileged user can create user and net namespaces in which he holds CAP_NET_ADMIN to bypass the netlink_net_capable() check: vpnns -- nfnl_osf -f /tmp/pf.os vpnns -- nfnl_osf -f /tmp/pf.os -d These non-root operations successfully modify the systemwide OS fingerprint list. Add new capable() checks so that they can't. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso Acked-by: Michal Kubecek Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_osf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b5810..b589a62e68a2 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *kf = NULL, *sf; int err = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; @@ -113,6 +117,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *sf; int err = -ENOENT; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; -- GitLab From b7d25282b75ea7da56ca7b1a11f7a9c4970fc5e0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 22 Jun 2017 16:47:34 -0400 Subject: [PATCH 1041/1398] reiserfs: fix race in prealloc discard commit 08db141b5313ac2f64b844fb5725b8d81744b417 upstream. The main loop in __discard_prealloc is protected by the reiserfs write lock which is dropped across schedules like the BKL it replaced. The problem is that it checks the value, calls a routine that schedules, and then adjusts the state. As a result, two threads that are calling reiserfs_prealloc_discard at the same time can race when one calls reiserfs_free_prealloc_block, the lock is dropped, and the other calls reiserfs_free_prealloc_block with the same block number. In the right circumstances, it can cause the prealloc count to go negative. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/bitmap.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index dc198bc64c61..73705d4bb069 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -513,9 +513,17 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th, "inode has negative prealloc blocks count."); #endif while (ei->i_prealloc_count > 0) { - reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); - ei->i_prealloc_block++; + b_blocknr_t block_to_free; + + /* + * reiserfs_free_prealloc_block can drop the write lock, + * which could allow another caller to free the same block. + * We can protect against it by modifying the prealloc + * state before calling it. + */ + block_to_free = ei->i_prealloc_block++; ei->i_prealloc_count--; + reiserfs_free_prealloc_block(th, inode, block_to_free); dirty = 1; } if (dirty) -- GitLab From 0ccfbd4d6f02ff010dcc59e439f19864ad3a4efa Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 22 Jun 2017 16:35:04 -0400 Subject: [PATCH 1042/1398] reiserfs: don't preallocate blocks for extended attributes commit 54930dfeb46e978b447af0fb8ab4e181c1bf9d7a upstream. Most extended attributes will fit in a single block. More importantly, we drop the reference to the inode while holding the transaction open so the preallocated blocks aren't released. As a result, the inode may be evicted before it's removed from the transaction's prealloc list which can cause memory corruption. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 73705d4bb069..edc8ef78b63f 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1136,7 +1136,7 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) hint->prealloc_size = 0; if (!hint->formatted_node && hint->preallocate) { - if (S_ISREG(hint->inode->i_mode) + if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode) && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options. preallocmin * hint->inode->i_sb->s_blocksize) -- GitLab From 7b50205cf8b9c7bd10572c97f7ca9fecfd9aec7b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 13 Jun 2017 13:35:51 +0200 Subject: [PATCH 1043/1398] fs/fcntl: f_setown, avoid undefined behaviour commit fc3dc67471461c0efcb1ed22fb7595121d65fad9 upstream. fcntl(0, F_SETOWN, 0x80000000) triggers: UBSAN: Undefined behaviour in fs/fcntl.c:118:7 negation of -2147483648 cannot be represented in type 'int': CPU: 1 PID: 18261 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1 ... Call Trace: ... [] ? f_setown+0x1d8/0x200 [] ? SyS_fcntl+0x999/0xf30 [] ? entry_SYSCALL_64_fastpath+0x23/0xc1 Fix that by checking the arg parameter properly (against INT_MAX) before "who = -who". And return immediatelly with -EINVAL in case it is wrong. Note that according to POSIX we can return EINVAL: http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html [EINVAL] The cmd argument is F_SETOWN and the value of the argument is not valid as a process or process group identifier. [v2] returns an error, v1 used to fail silently [v3] implement proper check for the bad value INT_MIN Signed-off-by: Jiri Slaby Cc: Jeff Layton Cc: "J. Bruce Fields" Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/fcntl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fcntl.c b/fs/fcntl.c index 1493ceb0477d..ec03cf620fd7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -114,6 +114,10 @@ void f_setown(struct file *filp, unsigned long arg, int force) int who = arg; type = PIDTYPE_PID; if (who < 0) { + /* avoid overflow below */ + if (who == INT_MIN) + return; + type = PIDTYPE_PGID; who = -who; } -- GitLab From c41bb027ed63db9e7a6b2c4c3c5e2e4df1ccb8cf Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 9 Oct 2017 13:33:19 +0200 Subject: [PATCH 1044/1398] scsi: libiscsi: fix shifting of DID_REQUEUE host byte commit eef9ffdf9cd39b2986367bc8395e2772bc1284ba upstream. The SCSI host byte should be shifted left by 16 in order to have scsi_decide_disposition() do the right thing (.i.e. requeue the command). Signed-off-by: Johannes Thumshirn Fixes: 661134ad3765 ("[SCSI] libiscsi, bnx2i: make bound ep check common") Cc: Lee Duncan Cc: Hannes Reinecke Cc: Bart Van Assche Cc: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index a530f08592cd..4abd3fce5ab6 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1727,7 +1727,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { reason = FAILURE_SESSION_IN_RECOVERY; - sc->result = DID_REQUEUE; + sc->result = DID_REQUEUE << 16; goto fault; } -- GitLab From e62b0c661f65e985327f4bc542688630265b0311 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 24 Jan 2018 15:28:17 +0100 Subject: [PATCH 1045/1398] Revert "module: Add retpoline tag to VERMAGIC" commit 5132ede0fe8092b043dae09a7cc32b8ae7272baa upstream. This reverts commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12. Turns out distros do not want to make retpoline as part of their "ABI", so this patch should not have been merged. Sorry Andi, this was my fault, I suggested it when your original patch was the "correct" way of doing this instead. Reported-by: Jiri Kosina Fixes: 6cfb521ac0d5 ("module: Add retpoline tag to VERMAGIC") Acked-by: Andi Kleen Cc: Thomas Gleixner Cc: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/vermagic.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index a3d04934aa96..6f8fbcf10dfb 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -24,16 +24,10 @@ #ifndef MODULE_ARCH_VERMAGIC #define MODULE_ARCH_VERMAGIC "" #endif -#ifdef RETPOLINE -#define MODULE_VERMAGIC_RETPOLINE "retpoline " -#else -#define MODULE_VERMAGIC_RETPOLINE "" -#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ - MODULE_ARCH_VERMAGIC \ - MODULE_VERMAGIC_RETPOLINE + MODULE_ARCH_VERMAGIC -- GitLab From 19a7db1e2ef38865a704ea4dfd178b02a8026ada Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:51:51 -0700 Subject: [PATCH 1046/1398] mm: fix 100% CPU kswapd busyloop on unreclaimable nodes commit c73322d098e4b6f5f0f0fa1330bf57e218775539 upstream. Patch series "mm: kswapd spinning on unreclaimable nodes - fixes and cleanups". Jia reported a scenario in which the kswapd of a node indefinitely spins at 100% CPU usage. We have seen similar cases at Facebook. The kernel's current method of judging its ability to reclaim a node (or whether to back off and sleep) is based on the amount of scanned pages in proportion to the amount of reclaimable pages. In Jia's and our scenarios, there are no reclaimable pages in the node, however, and the condition for backing off is never met. Kswapd busyloops in an attempt to restore the watermarks while having nothing to work with. This series reworks the definition of an unreclaimable node based not on scanning but on whether kswapd is able to actually reclaim pages in MAX_RECLAIM_RETRIES (16) consecutive runs. This is the same criteria the page allocator uses for giving up on direct reclaim and invoking the OOM killer. If it cannot free any pages, kswapd will go to sleep and leave further attempts to direct reclaim invocations, which will either make progress and re-enable kswapd, or invoke the OOM killer. Patch #1 fixes the immediate problem Jia reported, the remainder are smaller fixlets, cleanups, and overall phasing out of the old method. Patch #6 is the odd one out. It's a nice cleanup to get_scan_count(), and directly related to #5, but in itself not relevant to the series. If the whole series is too ambitious for 4.11, I would consider the first three patches fixes, the rest cleanups. This patch (of 9): Jia He reports a problem with kswapd spinning at 100% CPU when requesting more hugepages than memory available in the system: $ echo 4000 >/proc/sys/vm/nr_hugepages top - 13:42:59 up 3:37, 1 user, load average: 1.09, 1.03, 1.01 Tasks: 1 total, 1 running, 0 sleeping, 0 stopped, 0 zombie %Cpu(s): 0.0 us, 12.5 sy, 0.0 ni, 85.5 id, 2.0 wa, 0.0 hi, 0.0 si, 0.0 st KiB Mem: 31371520 total, 30915136 used, 456384 free, 320 buffers KiB Swap: 6284224 total, 115712 used, 6168512 free. 48192 cached Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 76 root 20 0 0 0 0 R 100.0 0.000 217:17.29 kswapd3 At that time, there are no reclaimable pages left in the node, but as kswapd fails to restore the high watermarks it refuses to go to sleep. Kswapd needs to back away from nodes that fail to balance. Up until commit 1d82de618ddd ("mm, vmscan: make kswapd reclaim in terms of nodes") kswapd had such a mechanism. It considered zones whose theoretically reclaimable pages it had reclaimed six times over as unreclaimable and backed away from them. This guard was erroneously removed as the patch changed the definition of a balanced node. However, simply restoring this code wouldn't help in the case reported here: there *are* no reclaimable pages that could be scanned until the threshold is met. Kswapd would stay awake anyway. Introduce a new and much simpler way of backing off. If kswapd runs through MAX_RECLAIM_RETRIES (16) cycles without reclaiming a single page, make it back off from the node. This is the same number of shots direct reclaim takes before declaring OOM. Kswapd will go to sleep on that node until a direct reclaimer manages to reclaim some pages, thus proving the node reclaimable again. [hannes@cmpxchg.org: check kswapd failure against the cumulative nr_reclaimed count] Link: http://lkml.kernel.org/r/20170306162410.GB2090@cmpxchg.org [shakeelb@google.com: fix condition for throttle_direct_reclaim] Link: http://lkml.kernel.org/r/20170314183228.20152-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20170228214007.5621-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Shakeel Butt Reported-by: Jia He Tested-by: Jia He Acked-by: Michal Hocko Acked-by: Hillf Danton Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Dmitry Shmidt Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 2 ++ mm/internal.h | 6 ++++++ mm/page_alloc.c | 9 ++------ mm/vmscan.c | 47 ++++++++++++++++++++++++++++-------------- mm/vmstat.c | 2 +- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 490f5a83f947..e3d7754f25f0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -633,6 +633,8 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_classzone_idx; + int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; diff --git a/mm/internal.h b/mm/internal.h index 34a5459e5989..3e2d01694747 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -73,6 +73,12 @@ static inline void set_page_refcounted(struct page *page) extern unsigned long highest_memmap_pfn; +/* + * Maximum number of reclaim retries without progress before the OOM + * killer is consider the only way forward. + */ +#define MAX_RECLAIM_RETRIES 16 + /* * in mm/vmscan.c: */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 546713b3f762..94018ea5f935 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3421,12 +3421,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return false; } -/* - * Maximum number of reclaim retries without any progress before OOM killer - * is consider as the only way to move forward. - */ -#define MAX_RECLAIM_RETRIES 16 - /* * Checks whether it makes sense to retry the reclaim to make a forward progress * for the given allocation request. @@ -4385,7 +4379,8 @@ void show_free_areas(unsigned int filter) K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), node_page_state(pgdat, NR_PAGES_SCANNED), - !pgdat_reclaimable(pgdat) ? "yes" : "no"); + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? + "yes" : "no"); } for_each_populated_zone(zone) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 30a88b945a44..f118dc23f662 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2606,6 +2606,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); + /* + * Kswapd gives up on balancing particular nodes after too + * many failures to reclaim anything from them and goes to + * sleep. On reclaim progress, reset the failure counter. A + * successful direct reclaim run will revive a dormant kswapd. + */ + if (reclaimable) + pgdat->kswapd_failures = 0; + return reclaimable; } @@ -2680,10 +2689,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; - if (sc->priority != DEF_PRIORITY && - !pgdat_reclaimable(zone->zone_pgdat)) - continue; /* Let kswapd poll it */ - /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. @@ -2820,7 +2825,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, return 0; } -static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) +static bool allow_direct_reclaim(pg_data_t *pgdat) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; @@ -2828,6 +2833,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) int i; bool wmark_ok; + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; if (!managed_zone(zone) || @@ -2908,7 +2916,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; - if (pfmemalloc_watermark_ok(pgdat)) + if (allow_direct_reclaim(pgdat)) goto out; break; } @@ -2930,14 +2938,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, */ if (!(gfp_mask & __GFP_FS)) { wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat), HZ); + allow_direct_reclaim(pgdat), HZ); goto check_pending; } /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat)); + allow_direct_reclaim(pgdat)); check_pending: if (fatal_signal_pending(current)) @@ -3116,7 +3124,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) /* * The throttled processes are normally woken up in balance_pgdat() as - * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * soon as allow_direct_reclaim() is true. But there is a potential * race between when kswapd checks the watermarks and a process gets * throttled. There is also a potential race if processes get * throttled, kswapd wakes, a large process exits thereby balancing the @@ -3130,6 +3138,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= classzone_idx; i++) { struct zone *zone = pgdat->node_zones + i; @@ -3216,9 +3228,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); do { + unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; - sc.nr_reclaimed = 0; sc.reclaim_idx = classzone_idx; /* @@ -3297,7 +3309,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && - pfmemalloc_watermark_ok(pgdat)) + allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ @@ -3308,10 +3320,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ - if (raise_priority || !sc.nr_reclaimed) + nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; + if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); + if (!sc.nr_reclaimed) + pgdat->kswapd_failures++; + out: /* * Return the order kswapd stopped reclaiming at as @@ -3511,6 +3527,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) if (!waitqueue_active(&pgdat->kswapd_wait)) return; + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return; + /* Only wake kswapd if all zones are unbalanced */ for (z = 0; z <= classzone_idx; z++) { zone = pgdat->node_zones + z; @@ -3781,9 +3801,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) return NODE_RECLAIM_FULL; - if (!pgdat_reclaimable(pgdat)) - return NODE_RECLAIM_FULL; - /* * Do not scan if the allocation should not be delayed. */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 6a088df04b29..3863b5d6d598 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1421,7 +1421,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n node_unreclaimable: %u" "\n start_pfn: %lu" "\n node_inactive_ratio: %u", - !pgdat_reclaimable(zone->zone_pgdat), + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, zone->zone_start_pfn, zone->zone_pgdat->inactive_ratio); seq_putc(m, '\n'); -- GitLab From 42f0aba58e00aedc395460e621896532d009c64f Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 19 Jan 2018 09:43:39 -0800 Subject: [PATCH 1047/1398] Input: trackpoint - force 3 buttons if 0 button is reported commit f5d07b9e98022d50720e38aa936fc11c67868ece upstream. Lenovo introduced trackpoint compatible sticks with minimum PS/2 commands. They supposed to reply with 0x02, 0x03, or 0x04 in response to the "Read Extended ID" command, so we would know not to try certain extended commands. Unfortunately even some trackpoints reporting the original IBM version (0x01 firmware 0x0e) now respond with incorrect data to the "Get Extended Buttons" command: thinkpad_acpi: ThinkPad BIOS R0DET87W (1.87 ), EC unknown thinkpad_acpi: Lenovo ThinkPad E470, model 20H1004SGE psmouse serio2: trackpoint: IBM TrackPoint firmware: 0x0e, buttons: 0/0 Since there are no trackpoints without buttons, let's assume the trackpoint has 3 buttons when we get 0 response to the extended buttons query. Signed-off-by: Aaron Ma Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196253 Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/trackpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 7e2dc5e56632..0b49f29bf0da 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -383,6 +383,9 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) { psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); button_info = 0x33; + } else if (!button_info) { + psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); -- GitLab From d680db722516dbf2d564d2118127270b88b1b663 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 25 Jan 2018 19:39:44 -0500 Subject: [PATCH 1048/1398] orangefs: fix deadlock; do not write i_size in read_iter commit 6793f1c450b1533a5e9c2493490de771d38b24f9 upstream. After do_readv_writev, the inode cache is invalidated anyway, so i_size will never be read. It will be fetched from the server which will also know about updates from other machines. Fixes deadlock on 32-bit SMP. See https://marc.info/?l=linux-fsdevel&m=151268557427760&w=2 Signed-off-by: Martin Brandenburg Cc: Al Viro Cc: Mike Marshall Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/file.c | 7 ++----- fs/orangefs/orangefs-kernel.h | 11 ----------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 02cc6139ec90..5b2cbe567365 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -446,7 +446,7 @@ ssize_t orangefs_inode_read(struct inode *inode, static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - loff_t pos = *(&iocb->ki_pos); + loff_t pos = iocb->ki_pos; ssize_t rc = 0; BUG_ON(iocb->private); @@ -485,9 +485,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite } } - if (file->f_pos > i_size_read(file->f_mapping->host)) - orangefs_i_size_write(file->f_mapping->host, file->f_pos); - rc = generic_write_checks(iocb, iter); if (rc <= 0) { @@ -501,7 +498,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite * pos to the end of the file, so we will wait till now to set * pos... */ - pos = *(&iocb->ki_pos); + pos = iocb->ki_pos; rc = do_readv_writev(ORANGEFS_IO_WRITE, file, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 45dd8f27b2ac..f28381a7cd12 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -570,17 +570,6 @@ do { \ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) -static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) -{ -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_lock(inode); -#endif - i_size_write(inode, i_size); -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_unlock(inode); -#endif -} - static inline void orangefs_set_timeout(struct dentry *dentry) { unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; -- GitLab From 1be7d46e775c2fbec57bbce7190b66fbb7c58d1b Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sun, 20 Aug 2017 13:26:04 +0200 Subject: [PATCH 1049/1398] um: link vmlinux with -no-pie commit 883354afbc109c57f925ccc19840055193da0cc0 upstream. Debian's gcc defaults to pie. The global Makefile already defines the -fno-pie option. Link UML dynamic kernel image also with -no-pie to fix the build. Signed-off-by: Thomas Meyer Signed-off-by: Richard Weinberger Cc: Bernie Innocenti Signed-off-by: Greg Kroah-Hartman --- arch/um/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 0ca46ededfc7..9c150ccb35d2 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -117,7 +117,7 @@ archheaders: archprepare: include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ $(call cc-option, -fno-stack-protector,) \ -- GitLab From 9a0be5afbfbb1d14efdc98a6615fc52082243bd1 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 26 Jan 2018 16:23:02 +0000 Subject: [PATCH 1050/1398] vsyscall: Fix permissions for emulate mode with KAISER/PTI The backport of KAISER to 4.4 turned vsyscall emulate mode into native mode. Add a vsyscall_pgprot variable to hold the correct page protections, like Borislav and Hugh did for 3.2 and 3.18. Cc: Borislav Petkov Cc: Hugh Dickins Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vsyscall/vsyscall_64.c | 7 ++++--- arch/x86/include/asm/vsyscall.h | 1 + arch/x86/mm/kaiser.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 6bb7e92c6d50..0174290b2857 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode = #else EMULATE; #endif +unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL; static int __init vsyscall_setup(char *str) { @@ -336,11 +337,11 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); + if (vsyscall_mode != NATIVE) + vsyscall_pgprot = __PAGE_KERNEL_VVAR; if (vsyscall_mode != NONE) __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + __pgprot(vsyscall_pgprot)); BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 4865e10dbb55..9ee85066f407 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,6 +13,7 @@ extern void map_vsyscall(void); */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); extern bool vsyscall_enabled(void); +extern unsigned long vsyscall_pgprot; #else static inline void map_vsyscall(void) {} static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index a8ade08a9bf5..ec678aafa3f8 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -344,7 +344,7 @@ void __init kaiser_init(void) if (vsyscall_enabled()) kaiser_add_user_map_early((void *)VSYSCALL_ADDR, PAGE_SIZE, - __PAGE_KERNEL_VSYSCALL); + vsyscall_pgprot); for_each_possible_cpu(cpu) { void *percpu_vaddr = __per_cpu_user_mapped_start + -- GitLab From 5f6c581bcb3cd5a248acbbf8a91930d13d6474f1 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 8 Mar 2017 19:03:44 +0100 Subject: [PATCH 1051/1398] eventpoll.h: add missing epoll event masks commit 7e040726850a106587485c21bdacc0bfc8a0cbed upstream. [resend due to me forgetting to cc: linux-api the first time around I posted these back on Feb 23] From: Greg Kroah-Hartman For some reason these values are not in the uapi header file, so any libc has to define it themselves. To prevent them from needing to do this, just have the kernel provide the correct values. Reported-by: Elliott Hughes Signed-off-by: Greg Hackmann Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/eventpoll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 1c3154913a39..bc96b14dfb2c 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,6 +26,19 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Epoll event masks */ +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE (1 << 28) -- GitLab From 5bb5ae9718f64c3fe09f2210a7a5628055529425 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 26 Jan 2018 15:14:16 +0300 Subject: [PATCH 1052/1398] dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state [ Upstream commit dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 ] ccid2_hc_tx_rto_expire() timer callback always restarts the timer again and can run indefinitely (unless it is stopped outside), and after commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"), which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from dccp_destroy_sock() to sk_destruct(), this started to happen quite often. The timer prevents releasing the socket, as a result, sk_destruct() won't be called. Found with LTP/dccp_ipsec tests running on the bonding device, which later couldn't be unloaded after the tests were completed: unregister_netdevice: waiting for bond0 to become free. Usage count = 148 Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation") Signed-off-by: Alexey Kodanev Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 5e3a7302f774..7753681195c1 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_pr_debug("RTO_EXPIRE\n"); + if (sk->sk_state == DCCP_CLOSED) + goto out; + /* back-off timer */ hc->tx_rto <<= 1; if (hc->tx_rto > DCCP_RTO_MAX) -- GitLab From 8b0d3e81cdecb92b12c9d7924749a6f62f855790 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 22 Jan 2018 20:06:42 +0000 Subject: [PATCH 1053/1398] ipv6: Fix getsockopt() for sockets with default IPV6_AUTOFLOWLABEL [ Upstream commit e9191ffb65d8e159680ce0ad2224e1acbde6985c ] Commit 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl setting") removed the initialisation of ipv6_pinfo::autoflowlabel and added a second flag to indicate whether this field or the net namespace default should be used. The getsockopt() handling for this case was not updated, so it currently returns 0 for all sockets for which IPV6_AUTOFLOWLABEL is not explicitly enabled. Fix it to return the effective value, whether that has been set at the socket or net namespace level. Fixes: 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl ...") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 615ce0abba9c..e64210c98c2b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -290,6 +290,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 388584b8ff31..6d000c0001fa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -156,7 +156,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) { if (!np->autoflowlabel_set) return ip6_default_np_autolabel(net); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6e3871c7f8f7..bcea985dd76b 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1316,7 +1316,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = np->autoflowlabel; + val = ip6_autoflowlabel(sock_net(sk), np); break; default: -- GitLab From fb50d8c9169ee0e90cd7a733d65cb86876d795c7 Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Wed, 10 Jan 2018 12:45:10 -0500 Subject: [PATCH 1054/1398] ipv6: fix udpv6 sendmsg crash caused by too small MTU [ Upstream commit 749439bfac6e1a2932c582e2699f91d329658196 ] The logic in __ip6_append_data() assumes that the MTU is at least large enough for the headers. A device's MTU may be adjusted after being added while sendmsg() is processing data, resulting in __ip6_append_data() seeing any MTU. For an mtu smaller than the size of the fragmentation header, the math results in a negative 'maxfraglen', which causes problems when refragmenting any previous skb in the skb_write_queue, leaving it possibly malformed. Instead sendmsg returns EINVAL when the mtu is calculated to be less than IPV6_MIN_MTU. Found by syzkaller: kernel BUG at ./include/linux/skbuff.h:2064! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d0b68580 task.stack: ffff8801ac6b8000 RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline] RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216 RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000 RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0 RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000 R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8 R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000 FS: 00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip6_finish_skb include/net/ipv6.h:911 [inline] udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093 udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x352/0x5a0 net/socket.c:1750 SyS_sendto+0x40/0x50 net/socket.c:1718 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512e9 RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9 RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005 RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69 R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000 Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570 RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570 Reported-by: syzbot Signed-off-by: Mike Maloney Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6d000c0001fa..af98bbe7af0f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1260,14 +1260,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } + if (mtu < IPV6_MIN_MTU) + return -EINVAL; cork->base.fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->base.flags |= IPCORK_ALLFRAG; -- GitLab From c2ceff11b46e6eed2a3138811864aaa645754127 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jan 2018 22:31:18 -0800 Subject: [PATCH 1055/1398] ipv6: ip6_make_skb() needs to clear cork.base.dst [ Upstream commit 95ef498d977bf44ac094778fd448b98af158a3e6 ] In my last patch, I missed fact that cork.base.dst was not initialized in ip6_make_skb() : If ip6_setup_cork() returns an error, we might attempt a dst_release() on some random pointer. Fixes: 862c03ee1deb ("ipv6: fix possible mem leaks in ipv6_make_skb()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index af98bbe7af0f..2e3db3619858 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1800,6 +1800,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.flags = 0; cork.base.addr = 0; cork.base.opt = NULL; + cork.base.dst = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); if (err) { -- GitLab From 283498b4ca3534777cd28d17ebffd2e8f3fb0716 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Mon, 15 Jan 2018 13:24:28 -0500 Subject: [PATCH 1056/1398] lan78xx: Fix failure in USB Full Speed [ Upstream commit a5b1379afbfabf91e3a689e82ac619a7157336b3 ] Fix initialize the uninitialized tx_qlen to an appropriate value when USB Full Speed is used. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9c257ffedb15..c53385a0052f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2197,6 +2197,7 @@ static int lan78xx_reset(struct lan78xx_net *dev) buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; dev->rx_qlen = 4; + dev->tx_qlen = 4; } ret = lan78xx_write_reg(dev, BURST_CAP, buf); -- GitLab From 0ae16964f2154266f411b639ef101869ac3ae0b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 19 Jan 2018 11:50:46 +0100 Subject: [PATCH 1057/1398] net: igmp: fix source address check for IGMPv3 reports [ Upstream commit ad23b750933ea7bf962678972a286c78a8fa36aa ] Commit "net: igmp: Use correct source address on IGMPv3 reports" introduced a check to validate the source address of locally generated IGMPv3 packets. Instead of checking the local interface address directly, it uses inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the local subnet (or equal to the point-to-point address if used). This breaks for point-to-point interfaces, so check against ifa->ifa_local directly. Cc: Kevin Cernekee Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") Reported-by: Sebastian Gottschall Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7bff0c65046f..9c7a4cea1628 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev, return htonl(INADDR_ANY); for_ifa(in_dev) { - if (inet_ifa_match(fl4->saddr, ifa)) + if (fl4->saddr == ifa->ifa_local) return fl4->saddr; } endfor_ifa(in_dev); -- GitLab From a44d91150f3365968f6d83abfb76c4dd92bda168 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2018 19:59:19 -0800 Subject: [PATCH 1058/1398] net: qdisc_pkt_len_init() should be more robust [ Upstream commit 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a ] Without proper validation of DODGY packets, we might very well feed qdisc_pkt_len_init() with invalid GSO packets. tcp_hdrlen() might access out-of-bound data, so let's use skb_header_pointer() and proper checks. Whole story is described in commit d0c081b49137 ("flow_dissector: properly cap thoff field") We have the goal of validating DODGY packets earlier in the stack, so we might very well revert this fix in the future. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Jason Wang Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 09007a71c8dd..67b5d4d8acb1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3083,10 +3083,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, -- GitLab From cf67be7a1a21c4d15593d7bacff5a59e30749b74 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 18 Jan 2018 16:14:26 -0500 Subject: [PATCH 1059/1398] net: tcp: close sock if net namespace is exiting [ Upstream commit 4ee806d51176ba7b8ff1efd81f271d7252e03a1d ] When a tcp socket is closed, if it detects that its net namespace is exiting, close immediately and do not wait for FIN sequence. For normal sockets, a reference is taken to their net namespace, so it will never exit while the socket is open. However, kernel sockets do not take a reference to their net namespace, so it may begin exiting while the kernel socket is still open. In this case if the kernel socket is a tcp socket, it will stay open trying to complete its close sequence. The sock's dst(s) hold a reference to their interface, which are all transferred to the namespace's loopback interface when the real interfaces are taken down. When the namespace tries to take down its loopback interface, it hangs waiting for all references to the loopback interface to release, which results in messages like: unregister_netdevice: waiting for lo to become free. Usage count = 1 These messages continue until the socket finally times out and closes. Since the net namespace cleanup holds the net_mutex while calling its registered pernet callbacks, any new net namespace initialization is blocked until the current net namespace finishes exiting. After this change, the tcp socket notices the exiting net namespace, and closes immediately, releasing its dst(s) and their reference to the loopback interface, which lets the net namespace continue exiting. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 Signed-off-by: Dan Streetman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/net_namespace.h | 10 ++++++++++ net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_timer.c | 15 +++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0940598c002f..23102da24dd9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -213,6 +213,11 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } +static inline int check_net(const struct net *net) +{ + return atomic_read(&net->count) != 0; +} + void net_drop_ns(void *); #else @@ -237,6 +242,11 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 05d2bde00864..7efa6b062049 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2215,6 +2215,9 @@ void tcp_close(struct sock *sk, long timeout) tcp_send_active_reset(sk, GFP_ATOMIC); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + } else if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_set_state(sk, TCP_CLOSE); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74db43b47917..69523389f067 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *sk) * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * + * Also close if our net namespace is exiting; in that case there is no + * hope of ever communicating again since all netns interfaces are already + * down (or about to be down), and we need to release our dst references, + * which have been moved to the netns loopback interface, so the namespace + * can finish exiting. This condition is only possible if we are a kernel + * socket, as those do not hold references to the namespace. + * * Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. + * 3. If our net namespace is exiting. */ static int tcp_out_of_resources(struct sock *sk, bool do_reset) { @@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } + + if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_done(sk); + return 1; + } + return 0; } -- GitLab From 1bd21b158e07e0b8c5a2ce832305a0ebfe42c480 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 22 Jan 2018 18:06:37 +0100 Subject: [PATCH 1060/1398] pppoe: take ->needed_headroom of lower device into account on xmit [ Upstream commit 02612bb05e51df8489db5e94d0cf8d1c81f87b0c ] In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom was probably fine before the introduction of ->needed_headroom in commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom"). But now, virtual devices typically advertise the size of their overhead in dev->needed_headroom, so we must also take it into account in skb_reserve(). Allocation size of skb is also updated to take dev->needed_tailroom into account and replace the arbitrary 32 bytes with the real size of a PPPoE header. This issue was discovered by syzbot, who connected a pppoe socket to a gre device which had dev->header_ops->create == ipgre_header and dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any headroom, and dev_hard_header() crashed when ipgre_header() tried to prepend its header to skb->data. skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24 head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted 4.15.0-rc7-next-20180115+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100 RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282 RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000 RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0 R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180 FS: 00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_under_panic net/core/skbuff.c:114 [inline] skb_push+0xce/0xf0 net/core/skbuff.c:1714 ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879 dev_hard_header include/linux/netdevice.h:2723 [inline] pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg+0xca/0x110 net/socket.c:640 sock_write_iter+0x31a/0x5d0 net/socket.c:909 call_write_iter include/linux/fs.h:1775 [inline] do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 do_iter_write+0x154/0x540 fs/read_write.c:932 vfs_writev+0x18a/0x340 fs/read_write.c:977 do_writev+0xfc/0x2a0 fs/read_write.c:1012 SYSC_writev fs/read_write.c:1085 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1082 entry_SYSCALL_64_fastpath+0x29/0xa0 Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like interfaces, but reserving space for ->needed_headroom is a more fundamental issue that needs to be addressed first. Same problem exists for __pppoe_xmit(), which also needs to take dev->needed_headroom into account in skb_cow_head(). Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4ddae8118c85..dc36c2ec1d10 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, struct pppoe_hdr *ph; struct net_device *dev; char *start; + int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { @@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, if (total_len > (dev->mtu + dev->hard_header_len)) goto end; - - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, - 0, GFP_KERNEL); + hlen = LL_RESERVED_SPACE(dev); + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; @@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) /* Copy the data if there is no space for the header or if it's * read-only. */ - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); -- GitLab From 0f51492d1bd5f7376d8175edd266b808d58df21f Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 26 Jan 2018 01:53:26 +0100 Subject: [PATCH 1061/1398] r8169: fix memory corruption on retrieval of hardware statistics. [ Upstream commit a78e93661c5fd30b9e1dee464b2f62f966883ef7 ] Hardware statistics retrieval hurts in tight invocation loops. Avoid extraneous write and enforce strict ordering of writes targeted to the tally counters dump area address registers. Signed-off-by: Francois Romieu Tested-by: Oliver Freyermuth Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2c4350a1c629..298b74ebc1e9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -2222,19 +2222,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) void __iomem *ioaddr = tp->mmio_addr; dma_addr_t paddr = tp->counters_phys_addr; u32 cmd; - bool ret; RTL_W32(CounterAddrHigh, (u64)paddr >> 32); + RTL_R32(CounterAddrHigh); cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(CounterAddrLow, cmd); RTL_W32(CounterAddrLow, cmd | counter_cmd); - ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); - - RTL_W32(CounterAddrLow, 0); - RTL_W32(CounterAddrHigh, 0); - - return ret; + return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); } static bool rtl8169_reset_counters(struct net_device *dev) -- GitLab From 8e3534ea657e8e072e480b4a36c743458c5c852d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:02:00 +0800 Subject: [PATCH 1062/1398] sctp: do not allow the v4 socket to bind a v4mapped v6 address [ Upstream commit c5006b8aa74599ce19104b31d322d2ea9ff887cc ] The check in sctp_sockaddr_af is not robust enough to forbid binding a v4mapped v6 addr on a v4 socket. The worse thing is that v4 socket's bind_verify would not convert this v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4 socket bound a v6 addr. This patch is to fix it by doing the common sa.sa_family check first, then AF_INET check for v4mapped v6 addrs. Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.") Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com Acked-by: Neil Horman Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7181ce6c62bf..b9260d4029d8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -332,16 +332,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (len < sizeof (struct sockaddr)) return NULL; + if (!opt->pf->af_supported(addr->sa.sa_family, opt)) + return NULL; + /* V4 mapped address are really of AF_INET family */ if (addr->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { - if (!opt->pf->af_supported(AF_INET, opt)) - return NULL; - } else { - /* Does this PF support this AF? */ - if (!opt->pf->af_supported(addr->sa.sa_family, opt)) - return NULL; - } + ipv6_addr_v4mapped(&addr->v6.sin6_addr) && + !opt->pf->af_supported(AF_INET, opt)) + return NULL; /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); -- GitLab From 2f056e7def4236bf5279da38410c93acee164d5a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:01:36 +0800 Subject: [PATCH 1063/1398] sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf [ Upstream commit a0ff660058b88d12625a783ce9e5c1371c87951f ] After commit cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep"), it may change to lock another sk if the asoc has been peeled off in sctp_wait_for_sndbuf. However, the asoc's new sk could be already closed elsewhere, as it's in the sendmsg context of the old sk that can't avoid the new sk's closing. If the sk's last one refcnt is held by this asoc, later on after putting this asoc, the new sk will be freed, while under it's own lock. This patch is to revert that commit, but fix the old issue by returning error under the old sk's lock. Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep") Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9260d4029d8..c472b8391dde 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -83,7 +83,7 @@ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len, struct sock **orig_sk); + size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1956,7 +1956,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { /* sk can be changed by peel off when waiting for buf. */ - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) { if (err == -ESRCH) { /* asoc is already dead. */ @@ -7439,12 +7439,12 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len, struct sock **orig_sk) + size_t msg_len) { struct sock *sk = asoc->base.sk; - int err = 0; long current_timeo = *timeo_p; DEFINE_WAIT(wait); + int err = 0; pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); @@ -7473,17 +7473,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); - if (sk != asoc->base.sk) { - release_sock(sk); - sk = asoc->base.sk; - lock_sock(sk); - } + if (sk != asoc->base.sk) + goto do_error; *timeo_p = current_timeo; } out: - *orig_sk = sk; finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ -- GitLab From 0e52703d0746ee35326623c5442e9eec0139ffeb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 10 Jan 2018 12:50:25 -0800 Subject: [PATCH 1064/1398] tipc: fix a memory leak in tipc_nl_node_get_link() [ Upstream commit 59b36613e85fb16ebf9feaf914570879cd5c2a21 ] When tipc_node_find_by_name() fails, the nlmsg is not freed. While on it, switch to a goto label to properly free it. Fixes: be9c086715c ("tipc: narrow down exposure of struct tipc_node") Reported-by: Dmitry Vyukov Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/node.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 27753325e06e..5b3e1ea37b6d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1848,36 +1848,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) { err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } else { int bearer_id; struct tipc_node *node; struct tipc_link *link; node = tipc_node_find_by_name(net, name, &bearer_id); - if (!node) - return -EINVAL; + if (!node) { + err = -EINVAL; + goto err_free; + } tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; + err = -EINVAL; + goto err_free; } err = __tipc_nl_add_link(net, &msg, link, 0); tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; } int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) -- GitLab From 66c16a22e3b141152c2bc85236b48372b2b1e984 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Jan 2018 16:06:37 -0500 Subject: [PATCH 1065/1398] vmxnet3: repair memory leak [ Upstream commit 848b159835ddef99cc4193083f7e786c3992f580 ] with the introduction of commit b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info is improperly handled. While it is heap allocated when an rx queue is setup, and freed when torn down, an old line of code in vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0] being set to NULL prior to its being freed, causing a memory leak, which eventually exhausts the system on repeated create/destroy operations (for example, when the mtu of a vmxnet3 interface is changed frequently. Fix is pretty straight forward, just move the NULL set to after the free. Tested by myself with successful results Applies to net, and should likely be queued for stable, please Signed-off-by: Neil Horman Reported-By: boyang@redhat.com CC: boyang@redhat.com CC: Shrikrishna Khare CC: "VMware, Inc." CC: David S. Miller Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ef83ae3b0a44..4afba17e2403 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, rq->rx_ring[i].basePA); rq->rx_ring[i].base = NULL; } - rq->buf_info[i] = NULL; } if (rq->data_ring.base) { @@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, (rq->rx_ring[0].size + rq->rx_ring[1].size); dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], rq->buf_info_pa); + rq->buf_info[0] = rq->buf_info[1] = NULL; } } -- GitLab From 014510b11781c7ba17b4e2c25720ce15233c712f Mon Sep 17 00:00:00 2001 From: Jim Westfall Date: Sun, 14 Jan 2018 04:18:50 -0800 Subject: [PATCH 1066/1398] net: Allow neigh contructor functions ability to modify the primary_key [ Upstream commit 096b9854c04df86f03b38a97d40b6506e5730919 ] Use n->primary_key instead of pkey to account for the possibility that a neigh constructor function may have modified the primary_key value. Signed-off-by: Jim Westfall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f45f6198851f..7b315663f840 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -496,7 +496,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, n1 != NULL; n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { - if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { + if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); rc = n1; -- GitLab From 260eb694b5a4bd3424a7c445128fb5a784e95e3d Mon Sep 17 00:00:00 2001 From: Jim Westfall Date: Sun, 14 Jan 2018 04:18:51 -0800 Subject: [PATCH 1067/1398] ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY [ Upstream commit cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 ] Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices to avoid making an entry for every remote ip the device needs to talk to. This used the be the old behavior but became broken in a263b3093641f (ipv4: Make neigh lookups directly in output packet path) and later removed in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point devices) because it was broken. Signed-off-by: Jim Westfall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/arp.h | 3 +++ net/ipv4/arp.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c..1b3f86981757 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -19,6 +19,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = INADDR_ANY; + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 51b27ae09fbd..e60517eb1c3a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) static int arp_constructor(struct neighbour *neigh) { - __be32 addr = *(__be32 *)neigh->primary_key; + __be32 addr; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; + u32 inaddr_any = INADDR_ANY; + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); + + addr = *(__be32 *)neigh->primary_key; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) { -- GitLab From 00f9e47c6f9d9d25e1bf9cd5f58652d74e36d567 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 10 Jan 2018 16:24:45 +0100 Subject: [PATCH 1068/1398] ppp: unlock all_ppp_mutex before registering device [ Upstream commit 0171c41835591e9aa2e384b703ef9a6ae367c610 ] ppp_dev_uninit(), which is the .ndo_uninit() handler of PPP devices, needs to lock pn->all_ppp_mutex. Therefore we mustn't call register_netdevice() with pn->all_ppp_mutex already locked, or we'd deadlock in case register_netdevice() fails and calls .ndo_uninit(). Fortunately, we can unlock pn->all_ppp_mutex before calling register_netdevice(). This lock protects pn->units_idr, which isn't used in the device registration process. However, keeping pn->all_ppp_mutex locked during device registration did ensure that no device in transient state would be published in pn->units_idr. In practice, unlocking it before calling register_netdevice() doesn't change this property: ppp_unit_register() is called with 'ppp_mutex' locked and all searches done in pn->units_idr hold this lock too. Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") Reported-and-tested-by: syzbot+367889b9c9e279219175@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index b883af93929c..fc4c2ccc3d22 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1002,17 +1002,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) if (!ifname_is_set) snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + mutex_unlock(&pn->all_ppp_mutex); + ret = register_netdevice(ppp->dev); if (ret < 0) goto err_unit; atomic_inc(&ppp_unit_count); - mutex_unlock(&pn->all_ppp_mutex); - return 0; err_unit: + mutex_lock(&pn->all_ppp_mutex); unit_put(&pn->units_idr, ppp->file.index); err: mutex_unlock(&pn->all_ppp_mutex); -- GitLab From 1711ba166e5f7148bc41b5e24f7f282ffc055515 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 19 Jan 2018 20:23:50 +0100 Subject: [PATCH 1069/1398] be2net: restore properly promisc mode after queues reconfiguration [ Upstream commit 52acf06451930eb4cefabd5ecea56e2d46c32f76 ] The commit 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") modified be_update_queues() so the IFACE (HW representation of the netdevice) is destroyed and then re-created. This causes a regression because potential promiscuous mode is not restored properly during be_open() because the driver thinks that the HW has promiscuous mode already enabled. Note that Lancer is not affected by this bug because RX-filter flags are disabled during be_close() for this chipset. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Fixes: 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 1644896568c4..b2eeecb26939 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4733,6 +4733,15 @@ int be_update_queues(struct be_adapter *adapter) be_schedule_worker(adapter); + /* + * The IF was destroyed and re-created. We need to clear + * all promiscuous flags valid for the destroyed IF. + * Without this promisc mode is not restored during + * be_open() because the driver thinks that it is + * already enabled in HW. + */ + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + if (netif_running(netdev)) status = be_open(netdev); -- GitLab From cc99c6d59adf7daf62522b09e25af72267c997c8 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 18 Jan 2018 20:51:12 +0300 Subject: [PATCH 1070/1398] ip6_gre: init dev->mtu and dev->hard_header_len correctly [ Upstream commit 128bb975dc3c25d00de04e503e2fe0a780d04459 ] Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") moved dev->mtu initialization from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a result, the previously set values, before ndo_init(), are reset in the following cases: * rtnl_create_link() can update dev->mtu from IFLA_MTU parameter. * ip6gre_tnl_link_config() is invoked before ndo_init() in netlink and ioctl setup, so ndo_init() can reset MTU adjustments with the lower device MTU as well, dev->mtu and dev->hard_header_len. Not applicable for ip6gretap because it has one more call to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init(). Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]' parameter if a user sets it manually on a device creation, and fix the second one by moving ip6gre_tnl_link_config() call after register_netdevice(). Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c46066c5dc27..db2613b4a049 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, 1); if (register_netdevice(dev) < 0) goto failed_free; + ip6gre_tnl_link_config(nt, 1); + /* Can use a lockless transmit, unless we generate output sequences */ if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; @@ -1263,7 +1264,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { - struct ip6_tnl *tunnel; int ret; ret = ip6gre_tunnel_init_common(dev); @@ -1272,10 +1272,6 @@ static int ip6gre_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - tunnel = netdev_priv(dev); - - ip6gre_tnl_link_config(tunnel, 1); - return 0; } @@ -1370,7 +1366,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); dev->features |= GRE6_FEATURES; dev->hw_features |= GRE6_FEATURES; @@ -1396,6 +1391,11 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + dev_hold(dev); ip6gre_tunnel_link(ign, nt); -- GitLab From 3110e2134c9718a1f3c091873769d1d9b621bb87 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 19 Jan 2018 09:29:18 -0500 Subject: [PATCH 1071/1398] gso: validate gso_type in GSO handlers [ Upstream commit 121d57af308d0cf943f08f4738d24d3966c38cd9 ] Validate gso_type during segmentation as SKB_GSO_DODGY sources may pass packets where the gso_type does not match the contents. Syzkaller was able to enter the SCTP gso handler with a packet of gso_type SKB_GSO_TCPV4. On entry of transport layer gso handlers, verify that the gso_type matches the transport protocol. Fixes: 90017accff61 ("sctp: Add GSO support") Link: http://lkml.kernel.org/r/<001a1137452496ffc305617e5fe0@google.com> Reported-by: syzbot+fee64147a25aecd48055@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Acked-by: Jason Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_offload.c | 3 +++ net/ipv4/udp_offload.c | 3 +++ net/ipv6/tcpv6_offload.c | 3 +++ net/ipv6/udp_offload.c | 3 +++ net/sctp/offload.c | 3 +++ 5 files changed, 15 insertions(+) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea86..366b1becff9d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6401574cd638..f4f616eaaeb8 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -205,6 +205,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7d378c032cb..2bd2087bd105 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -55,6 +55,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 4f5a2b580aa5..6300f28c9588 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -44,6 +44,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; -- GitLab From 1105145cb3d5eee496de1d55aaea160c78e1c5b5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 24 Jan 2018 10:02:09 +0100 Subject: [PATCH 1072/1398] mlxsw: spectrum_router: Don't log an error on missing neighbor [ Upstream commit 1ecdaea02ca6bfacf2ecda500dc1af51e9780c42 ] Driver periodically samples all neighbors configured in device in order to update the kernel regarding their state. When finding an entry configured in HW that doesn't show in neigh_lookup() driver logs an error message. This introduces a race when removing multiple neighbors - it's possible that a given entry would still be configured in HW as its removal is still being processed but is already removed from the kernel's neighbor tables. Simply remove the error message and gracefully accept such events. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Fixes: 60f040ca11b9 ("mlxsw: spectrum_router: Periodically dump active IPv6 neighbours") Signed-off-by: Yuval Mintz Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 8aa91ddff287..16556011d571 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -765,11 +765,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, dipn = htonl(dip); dev = mlxsw_sp->rifs[rif]->dev; n = neigh_lookup(&arp_tbl, &dipn, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); neigh_event_send(n, NULL); -- GitLab From 18717ee28ef5c0285ff969d1e9357529a8a9233f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 15 Jan 2018 11:37:29 -0800 Subject: [PATCH 1073/1398] tun: fix a memory leak for tfile->tx_array [ Upstream commit 4df0bfc79904b7169dc77dcce44598b1545721f9 ] tfile->tun could be detached before we close the tun fd, via tun_detach_all(), so it should not be used to check for tfile->tx_array. As Jason suggested, we probably have to clean it up unconditionally both in __tun_deatch() and tun_detach_all(), but this requires to check if it is initialized or not. Currently skb_array_cleanup() doesn't have such a check, so I check it in the caller and introduce a helper function, it is a bit ugly but we can always improve it in net-next. Reported-by: Dmitry Vyukov Fixes: 1576d9860599 ("tun: switch to use skb array for tx") Cc: Jason Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 518cbfbc8b65..eb6dc28e5e52 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -525,6 +525,14 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } +static void tun_cleanup_tx_array(struct tun_file *tfile) +{ + if (tfile->tx_array.ring.queue) { + skb_array_cleanup(&tfile->tx_array); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + } +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -566,8 +574,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) - skb_array_cleanup(&tfile->tx_array); + tun_cleanup_tx_array(tfile); sock_put(&tfile->sk); } } @@ -606,11 +613,13 @@ static void tun_detach_all(struct net_device *dev) /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } BUG_ON(tun->numdisabled != 0); @@ -2363,6 +2372,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + return 0; } -- GitLab From eecfa2eeefe3caef4c8d6b2284f6066c76ab26a1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jan 2018 14:21:13 -0800 Subject: [PATCH 1074/1398] flow_dissector: properly cap thoff field [ Upstream commit d0c081b49137cd3200f2023c0875723be66e7ce5 ] syzbot reported yet another crash [1] that is caused by insufficient validation of DODGY packets. Two bugs are happening here to trigger the crash. 1) Flow dissection leaves with incorrect thoff field. 2) skb_probe_transport_header() sets transport header to this invalid thoff, even if pointing after skb valid data. 3) qdisc_pkt_len_init() reads out-of-bound data because it trusts tcp_hdrlen(skb) Possible fixes : - Full flow dissector validation before injecting bad DODGY packets in the stack. This approach was attempted here : https://patchwork.ozlabs.org/patch/ 861874/ - Have more robust functions in the core. This might be needed anyway for stable versions. This patch fixes the flow dissection issue. [1] CPU: 1 PID: 3144 Comm: syzkaller271204 Not tainted 4.15.0-rc4-mm1+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:355 [inline] kasan_report+0x23b/0x360 mm/kasan/report.c:413 __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:432 __tcp_hdrlen include/linux/tcp.h:35 [inline] tcp_hdrlen include/linux/tcp.h:40 [inline] qdisc_pkt_len_init net/core/dev.c:3160 [inline] __dev_queue_xmit+0x20d3/0x2200 net/core/dev.c:3465 dev_queue_xmit+0x17/0x20 net/core/dev.c:3554 packet_snd net/packet/af_packet.c:2943 [inline] packet_sendmsg+0x3ad5/0x60a0 net/packet/af_packet.c:2968 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 sock_write_iter+0x31a/0x5d0 net/socket.c:907 call_write_iter include/linux/fs.h:1776 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x684/0x970 fs/read_write.c:482 vfs_write+0x189/0x510 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0xef/0x220 fs/read_write.c:581 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 34fad54c2537 ("net: __skb_flow_dissect() must cap its return value") Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 32e4e0158846..862d63ec56e4 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -550,8 +550,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb, out_good: ret = true; - key_control->thoff = (u16)nhoff; out: + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; @@ -559,7 +559,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, out_bad: ret = false; - key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); goto out; } EXPORT_SYMBOL(__skb_flow_dissect); -- GitLab From dc1932c69835928dcb6259c744043dd03028cdee Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Mon, 22 Jan 2018 14:12:52 +0800 Subject: [PATCH 1075/1398] perf/x86/amd/power: Do not load AMD power module on !AMD platforms commit 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 upstream. The AMD power module can be loaded on non AMD platforms, but unload fails with the following Oops: BUG: unable to handle kernel NULL pointer dereference at (null) IP: __list_del_entry_valid+0x29/0x90 Call Trace: perf_pmu_unregister+0x25/0xf0 amd_power_pmu_exit+0x1c/0xd23 [power] SyS_delete_module+0x1a8/0x2b0 ? exit_to_usermode_loop+0x8f/0xb0 entry_SYSCALL_64_fastpath+0x20/0x83 Return -ENODEV instead of 0 from the module init function if the CPU does not match. Fixes: c7ab62bfbe0e ("perf/x86/amd/power: Add AMD accumulated power reporting mechanism") Signed-off-by: Xiao Liang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180122061252.6394-1-xiliang@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 9842270ed2f2..21a4e4127f43 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void) int ret; if (!x86_match_cpu(cpu_match)) - return 0; + return -ENODEV; if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) return -ENODEV; -- GitLab From 9f3a6cadf494f378b7839de14063793259ad5626 Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Tue, 23 Jan 2018 11:41:32 +0100 Subject: [PATCH 1076/1398] x86/microcode/intel: Extend BDW late-loading further with LLC size check commit 7e702d17ed138cf4ae7c00e8c00681ed464587c7 upstream. Commit b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") reduced the impact of erratum BDF90 for Broadwell model 79. The impact can be reduced further by checking the size of the last level cache portion per core. Tony: "The erratum says the problem only occurs on the large-cache SKUs. So we only need to avoid the update if we are on a big cache SKU that is also running old microcode." For more details, see erratum BDF90 in document #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family Specification Update) from September 2017. Fixes: b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") Signed-off-by: Jia Zhang Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Tony Luck Link: https://lkml.kernel.org/r/1516321542-31161-1-git-send-email-zhang.jia@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ac3e636ad586..f90f17610f62 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -40,6 +40,9 @@ #include #include +/* last level cache size per core */ +static int llc_size_per_core; + /* * Temporary microcode blobs pointers storage. We note here during early load * the pointers to microcode blobs we've got from whatever storage (detached @@ -1053,12 +1056,14 @@ static bool is_blacklisted(unsigned int cpu) /* * Late loading on model 79 with microcode revision less than 0x0b000021 - * may result in a system hang. This behavior is documented in item - * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + * and LLC size per core bigger than 2.5MB may result in a system hang. + * This behavior is documented in item BDF90, #334165 (Intel Xeon + * Processor E7-8800/4800 v4 Product Family). */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && c->x86_mask == 0x01 && + llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); @@ -1125,6 +1130,15 @@ static struct microcode_ops microcode_intel_ops = { .microcode_fini_cpu = microcode_fini_cpu, }; +static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) +{ + u64 llc_size = c->x86_cache_size * 1024; + + do_div(llc_size, c->x86_max_cores); + + return (int)llc_size; +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1135,6 +1149,8 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + llc_size_per_core = calc_llc_size_per_core(c); + return µcode_intel_ops; } -- GitLab From c98ff7299b404f110167883695f81080723e6e15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Jan 2018 14:54:32 +0100 Subject: [PATCH 1077/1398] hrtimer: Reset hrtimer cpu base proper on CPU hotplug commit d5421ea43d30701e03cadc56a38854c36a8b4433 upstream. The hrtimer interrupt code contains a hang detection and mitigation mechanism, which prevents that a long delayed hrtimer interrupt causes a continous retriggering of interrupts which prevent the system from making progress. If a hang is detected then the timer hardware is programmed with a certain delay into the future and a flag is set in the hrtimer cpu base which prevents newly enqueued timers from reprogramming the timer hardware prior to the chosen delay. The subsequent hrtimer interrupt after the delay clears the flag and resumes normal operation. If such a hang happens in the last hrtimer interrupt before a CPU is unplugged then the hang_detected flag is set and stays that way when the CPU is plugged in again. At that point the timer hardware is not armed and it cannot be armed because the hang_detected flag is still active, so nothing clears that flag. As a consequence the CPU does not receive hrtimer interrupts and no timers expire on that CPU which results in RCU stalls and other malfunctions. Clear the flag along with some other less critical members of the hrtimer cpu base to ensure starting from a clean state when a CPU is plugged in. Thanks to Paul, Sebastian and Anna-Maria for their help to get down to the root cause of that hard to reproduce heisenbug. Once understood it's trivial and certainly justifies a brown paperbag. Fixes: 41d2e4949377 ("hrtimer: Tune hrtimer_interrupt hang logic") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Sewior Cc: Anna-Maria Gleixner Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801261447590.2067@nanos Signed-off-by: Greg Kroah-Hartman --- kernel/time/hrtimer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index eeb7f2f5698d..54fd2fed36e9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -652,7 +652,9 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next.tv64 = KTIME_MAX; + base->hang_detected = 0; base->hres_active = 0; + base->next_timer = NULL; } /* @@ -1610,6 +1612,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) timerqueue_init_head(&cpu_base->clock_base[i].active); } + cpu_base->active_bases = 0; cpu_base->cpu = cpu; hrtimer_init_hres(cpu_base); return 0; -- GitLab From c964ad34f6d9c5c907e5cc2f1a855d044346aa8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jan 2018 02:48:54 +0100 Subject: [PATCH 1078/1398] x86: bpf_jit: small optimization in emit_bpf_tail_call() [ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ] Saves 4 bytes replacing following instructions : lea rax, [rsi + rdx * 8 + offsetof(...)] mov rax, qword ptr [rax] cmp rax, 0 by : mov rax, [rsi + rdx * 8 + offsetof(...)] test rax, rax Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 15f743615923..ece29e27faa0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -281,7 +281,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 47 /* number of bytes to jump */ +#define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 36 +#define OFFSET2 32 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->ptrs[index]; */ - EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); - EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) * goto out; */ - EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ #define OFFSET3 10 EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; -- GitLab From 5226bb3b95515d7f6f2e1c11ac78b612e0056342 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jan 2018 02:48:55 +0100 Subject: [PATCH 1079/1398] bpf: fix bpf_tail_call() x64 JIT [ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ] - bpf prog_array just like all other types of bpf array accepts 32-bit index. Clarify that in the comment. - fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes - tighten corresponding check in the interpreter to stay consistent The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and though JIT code is wrong it will check bounds correctly. Hence two fixes tags. All other JITs don't have this problem. Signed-off-by: Alexei Starovoitov Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 4 ++-- kernel/bpf/core.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ece29e27faa0..7840331d3056 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -278,9 +278,9 @@ static void emit_bpf_tail_call(u8 **pprog) /* if (index >= array->map.max_entries) * goto out; */ - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ #define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa6d98154106..ab9576b3bde5 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -715,7 +715,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; - u64 index = BPF_R3; + u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; -- GitLab From a3d6dd6a66c1bf01a36926705db4687c7d0d4734 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jan 2018 02:48:56 +0100 Subject: [PATCH 1080/1398] bpf: introduce BPF_JIT_ALWAYS_ON config [ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ] The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715. A quote from goolge project zero blog: "At this point, it would normally be necessary to locate gadgets in the host kernel code that can be used to actually leak data by reading from an attacker-controlled location, shifting and masking the result appropriately and then using the result of that as offset to an attacker-controlled address for a load. But piecing gadgets together and figuring out which ones work in a speculation context seems annoying. So instead, we decided to use the eBPF interpreter, which is built into the host kernel - while there is no legitimate way to invoke it from inside a VM, the presence of the code in the host kernel's text section is sufficient to make it usable for the attack, just like with ordinary ROP gadgets." To make attacker job harder introduce BPF_JIT_ALWAYS_ON config option that removes interpreter from the kernel in favor of JIT-only mode. So far eBPF JIT is supported by: x64, arm64, arm32, sparc64, s390, powerpc64, mips64 The start of JITed program is randomized and code page is marked as read-only. In addition "constant blinding" can be turned on with net.core.bpf_jit_harden v2->v3: - move __bpf_prog_ret0 under ifdef (Daniel) v1->v2: - fix init order, test_bpf and cBPF (Daniel's feedback) - fix offloaded bpf (Jakub's feedback) - add 'return 0' dummy in case something can invoke prog->bpf_func - retarget bpf tree. For bpf-next the patch would need one extra hunk. It will be sent when the trees are merged back to net-next Considered doing: int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT; but it seems better to land the patch as-is and in bpf-next remove bpf_jit_enable global variable from all JITs, consolidate in one place and remove this jit_init() function. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 7 +++++++ kernel/bpf/core.c | 18 ++++++++++++++++++ lib/test_bpf.c | 11 +++++++---- net/core/filter.c | 6 ++---- net/core/sysctl_net_core.c | 6 ++++++ net/socket.c | 9 +++++++++ 6 files changed, 49 insertions(+), 8 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 34407f15e6d3..b331feeabda4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1609,6 +1609,13 @@ config BPF_SYSCALL Enable the bpf() system call that allows to manipulate eBPF programs and maps via file descriptors. +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid + speculative execution of BPF instructions by the interpreter + config SHMEM bool "Use full shmem filesystem" if EXPERT default y diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ab9576b3bde5..64c4b13952f0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) } EXPORT_SYMBOL_GPL(__bpf_call_base); +#ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -923,6 +924,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) } STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ +#else +static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn) +{ + return 0; +} +#endif + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { @@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { +#ifndef CONFIG_BPF_JIT_ALWAYS_ON fp->bpf_func = (void *) __bpf_prog_run; +#else + fp->bpf_func = (void *) __bpf_prog_ret0; +#endif /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during @@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * be JITed, but falls back to the interpreter. */ fp = bpf_int_jit_compile(fp); +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + if (!fp->jited) { + *err = -ENOTSUPP; + return fp; + } +#endif bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2e385026915c..98da7520a6aa 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5646,9 +5646,8 @@ static struct bpf_prog *generate_filter(int which, int *err) return NULL; } } - /* We don't expect to fail. */ if (*err) { - pr_cont("FAIL to attach err=%d len=%d\n", + pr_cont("FAIL to prog_create err=%d len=%d\n", *err, fprog.len); return NULL; } @@ -5671,6 +5670,10 @@ static struct bpf_prog *generate_filter(int which, int *err) * checks. */ fp = bpf_prog_select_runtime(fp, err); + if (*err) { + pr_cont("FAIL to select_runtime err=%d\n", *err); + return NULL; + } break; } @@ -5856,8 +5859,8 @@ static __init int test_bpf(void) pass_cnt++; continue; } - - return err; + err_cnt++; + continue; } pr_cont("jited:%u ", fp->jited); diff --git a/net/core/filter.c b/net/core/filter.c index 4eb4ce0aeef4..6e0d17bc8382 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1005,11 +1005,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) */ goto out_err_free; - /* We are guaranteed to never error here with cBPF to eBPF - * transitions, since there's no issue with type compatibility - * checks on program arrays. - */ fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err_free; kfree(old_prog); return fp; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a7f05f0130e8..1b4619008c4e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, +#ifndef CONFIG_BPF_JIT_ALWAYS_ON .proc_handler = proc_dointvec +#else + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, +#endif }, # ifdef CONFIG_HAVE_EBPF_JIT { diff --git a/net/socket.c b/net/socket.c index 05f13b24572c..bd3b33988ee0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2548,6 +2548,15 @@ static int __init sock_init(void) core_initcall(sock_init); /* early initcall */ +static int __init jit_init(void) +{ +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + bpf_jit_enable = 1; +#endif + return 0; +} +pure_initcall(jit_init); + #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { -- GitLab From fcabc6d008856356258f86e96bfcf3806acf9f38 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 29 Jan 2018 02:48:57 +0100 Subject: [PATCH 1081/1398] bpf: arsh is not supported in 32 bit alu thus reject it [ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ] The following snippet was throwing an 'unknown opcode cc' warning in BPF interpreter: 0: (18) r0 = 0x0 2: (7b) *(u64 *)(r10 -16) = r0 3: (cc) (u32) r0 s>>= (u32) r0 4: (95) exit Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X} generation, not all of them do and interpreter does neither. We can leave existing ones and implement it later in bpf-next for the remaining ones, but reject this properly in verifier for the time being. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 19c44cf59bb2..8e0bf890ffb7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1843,6 +1843,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { + verbose("BPF_ARSH not supported for 32 bit ALU\n"); + return -EINVAL; + } + if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; -- GitLab From 5cb917aa1f1e03df9a4c29b363e3900d73508fa8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 29 Jan 2018 02:48:58 +0100 Subject: [PATCH 1082/1398] bpf: avoid false sharing of map refcount with max_entries [ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ] In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds speculation") also change the layout of struct bpf_map such that false sharing of fast-path members like max_entries is avoided when the maps reference counter is altered. Therefore enforce them to be placed into separate cachelines. pahole dump after change: struct bpf_map { const struct bpf_map_ops * ops; /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ u32 pages; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ bool unpriv_array; /* 56 1 */ /* XXX 7 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ struct user_struct * user; /* 64 8 */ atomic_t refcnt; /* 72 4 */ atomic_t usercnt; /* 76 4 */ struct work_struct work; /* 80 32 */ char name[16]; /* 112 16 */ /* --- cacheline 2 boundary (128 bytes) --- */ /* size: 128, cachelines: 2, members: 17 */ /* sum members: 121, holes: 1, sum holes: 7 */ }; Now all entries in the first cacheline are read only throughout the life time of the map, set up once during map creation. Overall struct size and number of cachelines doesn't change from the reordering. struct bpf_map is usually first member and embedded in map structs in specific map implementations, so also avoid those members to sit at the end where it could potentially share the cacheline with first map values e.g. in the array since remote CPUs could trigger map updates just as well for those (easily dirtying members like max_entries intentionally as well) while having subsequent values in cache. Quoting from Google's Project Zero blog [1]: Additionally, at least on the Intel machine on which this was tested, bouncing modified cache lines between cores is slow, apparently because the MESI protocol is used for cache coherence [8]. Changing the reference counter of an eBPF array on one physical CPU core causes the cache line containing the reference counter to be bounced over to that CPU core, making reads of the reference counter on all other CPU cores slow until the changed reference counter has been written back to memory. Because the length and the reference counter of an eBPF array are stored in the same cache line, this also means that changing the reference counter on one physical CPU core causes reads of the eBPF array's length to be slow on other physical CPU cores (intentional false sharing). While this doesn't 'control' the out-of-bounds speculation through masking the index as in commit b2157399cc98, triggering a manipulation of the map's reference counter is really trivial, so lets not allow to easily affect max_entries from it. Splitting to separate cachelines also generally makes sense from a performance perspective anyway in that fast-path won't have a cache miss if the map gets pinned, reused in other progs, etc out of control path, thus also avoids unintentional false sharing. [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75ffd3b2149e..7995940d4187 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,7 +36,10 @@ struct bpf_map_ops { }; struct bpf_map { - atomic_t refcnt; + /* 1st cacheline with read-mostly members of which some + * are also accessed in fast-path (e.g. ops, max_entries). + */ + const struct bpf_map_ops *ops ____cacheline_aligned; enum bpf_map_type map_type; u32 key_size; u32 value_size; @@ -44,10 +47,15 @@ struct bpf_map { u32 map_flags; u32 pages; bool unpriv_array; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; + /* 7 bytes hole */ + + /* 2nd cacheline with misc members to avoid false sharing + * particularly with refcounting. + */ + struct user_struct *user ____cacheline_aligned; + atomic_t refcnt; atomic_t usercnt; + struct work_struct work; }; struct bpf_map_type_list { -- GitLab From 4606077802f2c6ef7aff5185d9f7d99a50784ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jan 2018 02:48:59 +0100 Subject: [PATCH 1083/1398] bpf: fix divides by zero [ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ] Divides by zero are not nice, lets avoid them if possible. Also do_div() seems not needed when dealing with 32bit operands, but this seems a minor detail. Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 64c4b13952f0..879ca844ba1d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -642,7 +642,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) DST = tmp; CONT; ALU_MOD_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; DST = do_div(tmp, (u32) SRC); @@ -661,7 +661,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; do_div(tmp, (u32) SRC); -- GitLab From 265d7657c9baf09d57eb386d0374e912e9649626 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jan 2018 02:49:00 +0100 Subject: [PATCH 1084/1398] bpf: fix 32-bit divide by zero [ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ] due to some JITs doing if (src_reg == 0) check in 64-bit mode for div/mod operations mask upper 32-bits of src register before doing the check Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.") Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 18 ++++++++++++++++++ net/core/filter.c | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8e0bf890ffb7..5c56ff9eeed2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3391,6 +3391,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || + insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { + /* due to JIT bugs clear upper 32-bits of src register + * before div/mod operation + */ + insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); + insn_buf[1] = *insn; + cnt = 2; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; diff --git a/net/core/filter.c b/net/core/filter.c index 6e0d17bc8382..e8c89d2d2bc0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -441,6 +441,10 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, convert_bpf_extensions(fp, &insn)) break; + if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || + fp->code == (BPF_ALU | BPF_MOD | BPF_X)) + *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; -- GitLab From f531fbb06a56361d4a9807bbb16db4facc8537d3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 29 Jan 2018 02:49:01 +0100 Subject: [PATCH 1085/1398] bpf: reject stores into ctx via st and xadd [ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ] Alexei found that verifier does not reject stores into context via BPF_ST instead of BPF_STX. And while looking at it, we also should not allow XADD variant of BPF_STX. The context rewriter is only assuming either BPF_LDX_MEM- or BPF_STX_MEM-type operations, thus reject anything other than that so that assumptions in the rewriter properly hold. Add test cases as well for BPF selftests. Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5c56ff9eeed2..076e4a0ff95e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -702,6 +702,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); } +static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; + + return reg->type == PTR_TO_CTX; +} + static int check_ptr_alignment(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int off, int size) { @@ -896,6 +903,12 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } + if (is_ctx_reg(env, insn->dst_reg)) { + verbose("BPF_XADD stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); @@ -3012,6 +3025,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + if (is_ctx_reg(env, insn->dst_reg)) { + verbose("BPF_ST stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -- GitLab From f12d0602633decf073796f3aaa59eec7ff2da9e2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 22 Jan 2018 20:11:06 +0000 Subject: [PATCH 1086/1398] nfsd: auth: Fix gid sorting when rootsquash enabled commit 1995266727fa8143897e89b55f5d3c79aa828420 upstream. Commit bdcf0a423ea1 ("kernel: make groups_sort calling a responsibility group_info allocators") appears to break nfsd rootsquash in a pretty major way. It adds a call to groups_sort() inside the loop that copies/squashes gids, which means the valid gids are sorted along with the following garbage. The net result is that the highest numbered valid gids are replaced with any lower-valued garbage gids, possibly including 0. We should sort only once, after filling in all the gids. Fixes: bdcf0a423ea1 ("kernel: make groups_sort calling a responsibility ...") Signed-off-by: Ben Hutchings Acked-by: J. Bruce Fields Signed-off-by: Linus Torvalds Cc: Wolfgang Walter Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/auth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 75f942ae5176..81c018e5c31e 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -59,10 +59,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; - - /* Each thread allocates its own gi, no race */ - groups_sort(gi); } + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } else { gi = get_group_info(rqgi); } -- GitLab From 6c6f924f9c6294944ee6efb1bbd8cdb853582e50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 31 Jan 2018 12:55:57 +0100 Subject: [PATCH 1087/1398] Linux 4.9.79 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8a6f158a1176..4a7e6dff1c2e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 3e3abc76c509688a47f30e507f65e8188357ba1f Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 29 Jan 2018 21:31:21 -0800 Subject: [PATCH 1088/1398] ANDROID: sdcardfs: Use lower getattr times/size We now use the lower filesystem's getattr for time and size related information. Change-Id: I3dd05614a0c2837a13eeb033444fbdf070ddce2a Signed-off-by: Daniel Rosenberg Bug: 72007585 --- fs/sdcardfs/inode.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 87392331646a..9cdb396d9d37 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -823,8 +823,8 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct return err; } -static int sdcardfs_fillattr(struct vfsmount *mnt, - struct inode *inode, struct kstat *stat) +static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, + struct kstat *lower_stat, struct kstat *stat) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); struct sdcardfs_inode_data *top = top_data_get(info); @@ -840,12 +840,12 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, stat->uid = make_kuid(&init_user_ns, top->d_uid); stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top)); stat->rdev = inode->i_rdev; - stat->size = i_size_read(inode); - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; - stat->blksize = (1 << inode->i_blkbits); - stat->blocks = inode->i_blocks; + stat->size = lower_stat->size; + stat->atime = lower_stat->atime; + stat->mtime = lower_stat->mtime; + stat->ctime = lower_stat->ctime; + stat->blksize = lower_stat->blksize; + stat->blocks = lower_stat->blocks; data_put(top); return 0; } @@ -871,8 +871,7 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, goto out; sdcardfs_copy_and_fix_attrs(d_inode(dentry), d_inode(lower_path.dentry)); - err = sdcardfs_fillattr(mnt, d_inode(dentry), stat); - stat->blocks = lower_stat.blocks; + err = sdcardfs_fillattr(mnt, d_inode(dentry), &lower_stat, stat); out: sdcardfs_put_lower_path(dentry, &lower_path); return err; -- GitLab From bf71113b96a21aacb6e4bce8a85a34e0919d0362 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 23 Jan 2018 17:51:26 -0800 Subject: [PATCH 1089/1398] Revert "ANDROID: sdcardfs: notify lower file of opens" This reverts commit f18c44dc552e2ed0655ed5ec49b578da4dd30588. Instead of calling notify within sdcardfs, which reverse the order of notifications during an open with truncate, we'll make fs_notify worry about it. Change-Id: Ic634401c0f223500066300a4df8b1453a0b35b60 Bug: 70706497 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index dd76ecf33cf3..5ac0b0bbb0ec 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -18,7 +18,6 @@ * General Public License. */ -#include #include "sdcardfs.h" #ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE #include @@ -260,7 +259,6 @@ static int sdcardfs_open(struct inode *inode, struct file *file) fput(lower_file); /* fput calls dput for lower_dentry */ } } else { - fsnotify_open(lower_file); sdcardfs_set_lower_file(file, lower_file); } -- GitLab From 3d7d3cd5f654b545c51ee14274788e14d33d2433 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 23 Jan 2018 15:02:50 -0800 Subject: [PATCH 1090/1398] ANDROID: fsnotify: Notify lower fs of open If the filesystem being watched supports d_canonical_path, notify the lower filesystem of the open as well. Change-Id: I2b1739e068afbaf5eb39950516072bff8345ebfe Signed-off-by: Daniel Rosenberg Bug: 70706497 --- include/linux/fsnotify.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index e5f03a4d8430..ff51592c4792 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -213,12 +213,20 @@ static inline void fsnotify_modify(struct file *file) static inline void fsnotify_open(struct file *file) { struct path *path = &file->f_path; + struct path lower_path; struct inode *inode = path->dentry->d_inode; + __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; + if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) { + path->dentry->d_op->d_canonical_path(path, &lower_path); + fsnotify_parent(&lower_path, NULL, mask); + fsnotify(lower_path.dentry->d_inode, mask, &lower_path, FSNOTIFY_EVENT_PATH, NULL, 0); + path_put(&lower_path); + } fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } -- GitLab From 47af77b1dced92d853913694415c7f6a8bbcf4e3 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 1 Feb 2018 16:52:22 -0800 Subject: [PATCH 1091/1398] ANDROID: sdcardfs: Protect set_top If the top is changed while we're attempting to use it, it's possible that the reference will be put while we are in the process of grabbing a reference. Now we grab a spinlock to protect grabbing our reference count. Additionally, we now set the inode_info's top value to point to it's own data when initializing, which makes tracking changes easier. Change-Id: If15748c786ce4c0480ab8c5051a92523aff284d2 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 28 +++++++++++++--------------- fs/sdcardfs/main.c | 6 ++---- fs/sdcardfs/sdcardfs.h | 26 ++++++++++++++++++-------- fs/sdcardfs/super.c | 3 +++ 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index fffaad4701cf..0b3b22334e54 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -32,23 +32,20 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) ci->data->under_android = pi->data->under_android; ci->data->under_cache = pi->data->under_cache; ci->data->under_obb = pi->data->under_obb; - set_top(ci, pi->top_data); } /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, - uid_t uid, bool under_android, - struct sdcardfs_inode_data *top) + uid_t uid) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); info->data->perm = perm; info->data->userid = userid; info->data->d_uid = uid; - info->data->under_android = under_android; + info->data->under_android = false; info->data->under_cache = false; info->data->under_obb = false; - set_top(info, top); } /* While renaming, there is a point where we want the path from dentry, @@ -58,8 +55,8 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name) { struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); - struct sdcardfs_inode_data *parent_data = - SDCARDFS_I(d_inode(parent))->data; + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_data *parent_data = parent_info->data; appid_t appid; unsigned long user_num; int err; @@ -80,13 +77,15 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, inherit_derived_state(d_inode(parent), d_inode(dentry)); /* Files don't get special labels */ - if (!S_ISDIR(d_inode(dentry)->i_mode)) + if (!S_ISDIR(d_inode(dentry)->i_mode)) { + set_top(info, parent_info); return; + } /* Derive custom permissions based on parent and current node */ switch (parent_data->perm) { case PERM_INHERIT: case PERM_ANDROID_PACKAGE_CACHE: - /* Already inherited above */ + set_top(info, parent_info); break; case PERM_PRE_ROOT: /* Legacy internal layout places users at top level */ @@ -96,7 +95,6 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, info->data->userid = 0; else info->data->userid = user_num; - set_top(info, info->data); break; case PERM_ROOT: /* Assume masked off by default. */ @@ -104,24 +102,24 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID; info->data->under_android = true; - set_top(info, info->data); + } else { + set_top(info, parent_info); } break; case PERM_ANDROID: if (qstr_case_eq(name, &q_data)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_DATA; - set_top(info, info->data); } else if (qstr_case_eq(name, &q_obb)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_OBB; info->data->under_obb = true; - set_top(info, info->data); /* Single OBB directory is always shared */ } else if (qstr_case_eq(name, &q_media)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_MEDIA; - set_top(info, info->data); + } else { + set_top(info, parent_info); } break; case PERM_ANDROID_OBB: @@ -132,13 +130,13 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, if (appid != 0 && !is_excluded(name->name, parent_data->userid)) info->data->d_uid = multiuser_get_uid(parent_data->userid, appid); - set_top(info, info->data); break; case PERM_ANDROID_PACKAGE: if (qstr_case_eq(name, &q_cache)) { info->data->perm = PERM_ANDROID_PACKAGE_CACHE; info->data->under_cache = true; } + set_top(info, parent_info); break; } } diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index ac27bb301c5e..e4fd3fbb05e6 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -341,13 +341,11 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, mutex_lock(&sdcardfs_super_list_lock); if (sb_info->options.multiuser) { setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, - sb_info->options.fs_user_id, AID_ROOT, - false, SDCARDFS_I(d_inode(sb->s_root))->data); + sb_info->options.fs_user_id, AID_ROOT); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); } else { setup_derived_state(d_inode(sb->s_root), PERM_ROOT, - sb_info->options.fs_user_id, AID_ROOT, - false, SDCARDFS_I(d_inode(sb->s_root))->data); + sb_info->options.fs_user_id, AID_ROOT); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fixup_tmp_permissions(d_inode(sb->s_root)); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 3da9fe94b772..610466ad153d 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -201,6 +201,7 @@ struct sdcardfs_inode_info { struct sdcardfs_inode_data *data; /* top folder for ownership */ + spinlock_t top_lock; struct sdcardfs_inode_data *top_data; struct inode vfs_inode; @@ -380,7 +381,12 @@ static inline struct sdcardfs_inode_data *data_get( static inline struct sdcardfs_inode_data *top_data_get( struct sdcardfs_inode_info *info) { - return data_get(info->top_data); + struct sdcardfs_inode_data *top_data; + + spin_lock(&info->top_lock); + top_data = data_get(info->top_data); + spin_unlock(&info->top_lock); + return top_data; } extern void data_release(struct kref *ref); @@ -402,15 +408,20 @@ static inline void release_own_data(struct sdcardfs_inode_info *info) } static inline void set_top(struct sdcardfs_inode_info *info, - struct sdcardfs_inode_data *top) + struct sdcardfs_inode_info *top_owner) { - struct sdcardfs_inode_data *old_top = info->top_data; + struct sdcardfs_inode_data *old_top; + struct sdcardfs_inode_data *new_top = NULL; + + if (top_owner) + new_top = top_data_get(top_owner); - if (top) - data_get(top); - info->top_data = top; + spin_lock(&info->top_lock); + old_top = info->top_data; + info->top_data = new_top; if (old_top) data_put(old_top); + spin_unlock(&info->top_lock); } static inline int get_gid(struct vfsmount *mnt, @@ -516,8 +527,7 @@ struct limit_search { }; extern void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, bool under_android, - struct sdcardfs_inode_data *top); + userid_t userid, uid_t uid); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name); extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit); diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 87d6f836592e..cffcdb11cb8a 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -215,6 +215,9 @@ static struct inode *sdcardfs_alloc_inode(struct super_block *sb) i->data = d; kref_init(&d->refcount); + i->top_data = d; + spin_lock_init(&i->top_lock); + kref_get(&d->refcount); i->vfs_inode.i_version = 1; return &i->vfs_inode; -- GitLab From 56bc086358cac1a2949783646eabd57447b9d672 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Jan 2018 16:26:00 -0800 Subject: [PATCH 1092/1398] loop: fix concurrent lo_open/lo_release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ae6650163c66a7eff1acd6eb8b0f752dcfa8eba5 upstream. 范龙飞 reports that KASAN can report a use-after-free in __lock_acquire. The reason is due to insufficient serialization in lo_release(), which will continue to use the loop device even after it has decremented the lo_refcnt to zero. In the meantime, another process can come in, open the loop device again as it is being shut down. Confusion ensues. Reported-by: 范龙飞 Signed-off-by: Linus Torvalds Signed-off-by: Jens Axboe Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 24d6cefceb32..402254d26247 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1558,9 +1558,8 @@ static int lo_open(struct block_device *bdev, fmode_t mode) return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo = disk->private_data; int err; if (atomic_dec_return(&lo->lo_refcnt)) @@ -1586,6 +1585,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_ctl_mutex); } +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); + mutex_unlock(&loop_index_mutex); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, -- GitLab From 20e6f5bdf542cb2510bd54e69eed6fe395f6c026 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:10 +0300 Subject: [PATCH 1093/1398] net/mlx5: Define interface bits for fencing UMR wqe commit 1410a90ae449061b7e1ae19d275148f36948801b upstream. HW can implement UMR wqe re-transmission in various ways. Thus, add HCA cap to distinguish the needed fence for UMR to make sure that the wqe wouldn't fail on mkey checks. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford Cc: Marta Rybczynska Signed-off-by: Greg Kroah-Hartman --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 25ed105bbcfb..20ee90c47cd5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -737,6 +737,12 @@ enum { MLX5_CAP_PORT_TYPE_ETH = 0x1, }; +enum { + MLX5_CAP_UMR_FENCE_STRONG = 0x0, + MLX5_CAP_UMR_FENCE_SMALL = 0x1, + MLX5_CAP_UMR_FENCE_NONE = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x80]; @@ -838,7 +844,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 striding_rq[0x1]; u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0xa]; + u8 reserved_at_205[0x5]; + u8 umr_fence[0x2]; + u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; -- GitLab From 2a7076e71575b741f61f33f93be5ea284242a603 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:11 +0300 Subject: [PATCH 1094/1398] RDMA/mlx5: set UMR wqe fence according to HCA cap commit 6e8484c5cf07c7ee632587e98c1a12d319dacb7c upstream. Cache the needed umr_fence and set the wqe ctrl segmennt accordingly. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Cc: Marta Rybczynska Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +- drivers/infiniband/hw/mlx5/qp.c | 59 +++++++++++----------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a2120ff0ef4c..5e29fbd3a5a0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2575,6 +2575,18 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return ret; } +static u8 mlx5_get_umr_fence(u8 umr_fence_cap) +{ + switch (umr_fence_cap) { + case MLX5_CAP_UMR_FENCE_NONE: + return MLX5_FENCE_MODE_NONE; + case MLX5_CAP_UMR_FENCE_SMALL: + return MLX5_FENCE_MODE_INITIATOR_SMALL; + default: + return MLX5_FENCE_MODE_STRONG_ORDERING; + } +} + static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; @@ -3101,6 +3113,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); + if (MLX5_CAP_GEN(mdev, imaicl)) { dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 86e1e08125ff..d5cc954e8ac2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -345,7 +345,7 @@ struct mlx5_ib_qp { struct mlx5_ib_wq rq; u8 sq_signal_bits; - u8 fm_cache; + u8 next_fence; struct mlx5_ib_wq sq; /* serialize qp state modifications @@ -643,6 +643,7 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2665414b4875..fdd156101a72 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3755,24 +3755,6 @@ static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, } } -static u8 get_fence(u8 fence, struct ib_send_wr *wr) -{ - if (unlikely(wr->opcode == IB_WR_LOCAL_INV && - wr->send_flags & IB_SEND_FENCE)) - return MLX5_FENCE_MODE_STRONG_ORDERING; - - if (unlikely(fence)) { - if (wr->send_flags & IB_SEND_FENCE) - return MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - return fence; - } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { - return MLX5_FENCE_MODE_FENCE; - } - - return 0; -} - static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, struct ib_send_wr *wr, unsigned *idx, @@ -3801,8 +3783,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u8 next_fence, - u32 mlx5_opcode) + int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -3810,7 +3791,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp, mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; - qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) ctrl->signature = wq_sig(ctrl); @@ -3870,7 +3850,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - fence = qp->fm_cache; num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); @@ -3887,6 +3866,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } + if (wr->opcode == IB_WR_LOCAL_INV || + wr->opcode == IB_WR_REG_MR) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -3913,7 +3905,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; case IB_WR_LOCAL_INV: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); set_linv_wr(qp, &seg, &size); @@ -3921,7 +3912,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_REG_MR: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size); @@ -3944,9 +3934,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error @@ -3971,9 +3960,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { @@ -3983,7 +3971,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); @@ -3993,9 +3980,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4100,8 +4087,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - get_fence(fence, wr), next_fence, + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) -- GitLab From efe3f94f83d2b03abf2fc071ec59bc668761bac5 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 21 Dec 2017 11:11:31 +1030 Subject: [PATCH 1095/1398] tools/gpio: Fix build error with musl libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1696784eb7b52b13b62d160c028ef2c2c981d4f2 upstream. The GPIO tools build fails when using a buildroot toolchain that uses musl as it's C library: arm-broomstick-linux-musleabi-gcc -Wp,-MD,./.gpio-event-mon.o.d \ -Wp,-MT,gpio-event-mon.o -O2 -Wall -g -D_GNU_SOURCE \ -Iinclude -D"BUILD_STR(s)=#s" -c -o gpio-event-mon.o gpio-event-mon.c gpio-event-mon.c:30:6: error: unknown type name ‘u_int32_t’; did you mean ‘uint32_t’? u_int32_t handleflags, ^~~~~~~~~ uint32_t The glibc headers installed on my laptop include sys/types.h in unistd.h, but it appears that musl does not. Fixes: 97f69747d8b1 ("tools/gpio: add the gpio-event-mon tool") Signed-off-by: Joel Stanley Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- tools/gpio/gpio-event-mon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1c14c2595158..4b36323ea64b 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,6 +23,7 @@ #include #include #include +#include #include int monitor_device(const char *device_name, -- GitLab From 241c04f75e90b6e0eaf099d53c68a77ed5767835 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 12 Jan 2018 13:16:08 +0100 Subject: [PATCH 1096/1398] gpio: stmpe: i2c transfer are forbiden in atomic context commit b888fb6f2a278442933e3bfab70262e9a5365fb3 upstream. Move the workaround from stmpe_gpio_irq_unmask() which is executed in atomic context to stmpe_gpio_irq_sync_unlock() which is not. It fixes the following issue: [ 1.500000] BUG: scheduling while atomic: swapper/1/0x00000002 [ 1.500000] CPU: 0 PID: 1 Comm: swapper Not tainted 4.15.0-rc2-00020-gbd4301f-dirty #28 [ 1.520000] Hardware name: STM32 (Device Tree Support) [ 1.520000] [<0000bfc9>] (unwind_backtrace) from [<0000b347>] (show_stack+0xb/0xc) [ 1.530000] [<0000b347>] (show_stack) from [<0001fc49>] (__schedule_bug+0x39/0x58) [ 1.530000] [<0001fc49>] (__schedule_bug) from [<00168211>] (__schedule+0x23/0x2b2) [ 1.550000] [<00168211>] (__schedule) from [<001684f7>] (schedule+0x57/0x64) [ 1.550000] [<001684f7>] (schedule) from [<0016a513>] (schedule_timeout+0x137/0x164) [ 1.550000] [<0016a513>] (schedule_timeout) from [<00168b91>] (wait_for_common+0x8d/0xfc) [ 1.570000] [<00168b91>] (wait_for_common) from [<00139753>] (stm32f4_i2c_xfer+0xe9/0xfe) [ 1.580000] [<00139753>] (stm32f4_i2c_xfer) from [<00138545>] (__i2c_transfer+0x111/0x148) [ 1.590000] [<00138545>] (__i2c_transfer) from [<001385cf>] (i2c_transfer+0x53/0x70) [ 1.590000] [<001385cf>] (i2c_transfer) from [<001388a5>] (i2c_smbus_xfer+0x12f/0x36e) [ 1.600000] [<001388a5>] (i2c_smbus_xfer) from [<00138b49>] (i2c_smbus_read_byte_data+0x1f/0x2a) [ 1.610000] [<00138b49>] (i2c_smbus_read_byte_data) from [<00124fdd>] (__stmpe_reg_read+0xd/0x24) [ 1.620000] [<00124fdd>] (__stmpe_reg_read) from [<001252b3>] (stmpe_reg_read+0x19/0x24) [ 1.630000] [<001252b3>] (stmpe_reg_read) from [<0002c4d1>] (unmask_irq+0x17/0x22) [ 1.640000] [<0002c4d1>] (unmask_irq) from [<0002c57f>] (irq_startup+0x6f/0x78) [ 1.650000] [<0002c57f>] (irq_startup) from [<0002b7a1>] (__setup_irq+0x319/0x47c) [ 1.650000] [<0002b7a1>] (__setup_irq) from [<0002bad3>] (request_threaded_irq+0x6b/0xe8) [ 1.660000] [<0002bad3>] (request_threaded_irq) from [<0002d0b9>] (devm_request_threaded_irq+0x3b/0x6a) [ 1.670000] [<0002d0b9>] (devm_request_threaded_irq) from [<001446e7>] (mmc_gpiod_request_cd_irq+0x49/0x8a) [ 1.680000] [<001446e7>] (mmc_gpiod_request_cd_irq) from [<0013d45d>] (mmc_start_host+0x49/0x60) [ 1.690000] [<0013d45d>] (mmc_start_host) from [<0013e40b>] (mmc_add_host+0x3b/0x54) [ 1.700000] [<0013e40b>] (mmc_add_host) from [<00148119>] (mmci_probe+0x4d1/0x60c) [ 1.710000] [<00148119>] (mmci_probe) from [<000f903b>] (amba_probe+0x7b/0xbe) [ 1.720000] [<000f903b>] (amba_probe) from [<001170e5>] (driver_probe_device+0x169/0x1f8) [ 1.730000] [<001170e5>] (driver_probe_device) from [<001171b7>] (__driver_attach+0x43/0x5c) [ 1.740000] [<001171b7>] (__driver_attach) from [<0011618d>] (bus_for_each_dev+0x3d/0x46) [ 1.740000] [<0011618d>] (bus_for_each_dev) from [<001165cd>] (bus_add_driver+0xcd/0x124) [ 1.740000] [<001165cd>] (bus_add_driver) from [<00117713>] (driver_register+0x4d/0x7a) [ 1.760000] [<00117713>] (driver_register) from [<001fc765>] (do_one_initcall+0xbd/0xe8) [ 1.770000] [<001fc765>] (do_one_initcall) from [<001fc88b>] (kernel_init_freeable+0xfb/0x134) [ 1.780000] [<001fc88b>] (kernel_init_freeable) from [<00167ee3>] (kernel_init+0x7/0x9c) [ 1.790000] [<00167ee3>] (kernel_init) from [<00009b65>] (ret_from_fork+0x11/0x2c) Signed-off-by: Alexandre TORGUE Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-stmpe.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index adba614b3965..abb5a2752511 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -190,6 +190,16 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) }; int i, j; + /* + * STMPE1600: to be able to get IRQ from pins, + * a read must be done on GPMR register, or a write in + * GPSR or GPCR registers + */ + if (stmpe->partnum == STMPE1600) { + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + } + for (i = 0; i < CACHE_NR_REGS; i++) { /* STMPE801 and STMPE1600 don't have RE and FE registers */ if ((stmpe->partnum == STMPE801 || @@ -227,21 +237,11 @@ static void stmpe_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(gc); - struct stmpe *stmpe = stmpe_gpio->stmpe; int offset = d->hwirq; int regoffset = offset / 8; int mask = BIT(offset % 8); stmpe_gpio->regs[REG_IE][regoffset] |= mask; - - /* - * STMPE1600 workaround: to be able to get IRQ from pins, - * a read must be done on GPMR register, or a write in - * GPSR or GPCR registers - */ - if (stmpe->partnum == STMPE1600) - stmpe_reg_read(stmpe, - stmpe->regs[STMPE_IDX_GPMR_LSB + regoffset]); } static void stmpe_dbg_show_one(struct seq_file *s, -- GitLab From cc1fa4a7b653a4f0a4c95a26ce842d699e8e4b1a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 22 Jan 2018 13:19:28 +0100 Subject: [PATCH 1097/1398] gpio: Fix kernel stack leak to userspace commit 24bd3efc9d1efb5f756a7c6f807a36ddb6adc671 upstream. The GPIO event descriptor was leaking kernel stack to userspace because we don't zero the variable before use. Ooops. Fix this. Reported-by: Arnd Bergmann Reviewed-by: Bartosz Golaszewski Reviewed-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 063d176baa24..f3c3680963b9 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -705,6 +705,9 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) struct gpioevent_data ge; int ret, level; + /* Do not leak kernel stack to userspace */ + memset(&ge, 0, sizeof(ge)); + ge.timestamp = ktime_get_real_ns(); level = gpiod_get_value_cansleep(le->desc); -- GitLab From f1803207b5eab7fc9540f633b3bb73a75ad82494 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 26 Nov 2017 00:16:46 +0100 Subject: [PATCH 1098/1398] crypto: ecdh - fix typo in KPP dependency of CRYPTO_ECDH commit b5b9007730ce1d90deaf25d7f678511550744bdc upstream. This fixes a typo in the CRYPTO_KPP dependency of CRYPTO_ECDH. Fixes: 3c4b23901a0c ("crypto: ecdh - Add ECDH software support") Signed-off-by: Hauke Mehrtens Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 84d71482bf08..ab0d93ab5695 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -120,7 +120,7 @@ config CRYPTO_DH config CRYPTO_ECDH tristate "ECDH algorithm" - select CRYTPO_KPP + select CRYPTO_KPP help Generic implementation of the ECDH algorithm -- GitLab From 95259cb008ba7ec88d3869db33377a111a6d9dee Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 18 Jan 2018 20:41:09 +0100 Subject: [PATCH 1099/1398] crypto: aesni - handle zero length dst buffer commit 9c674e1e2f9e24fa4392167efe343749008338e0 upstream. GCM can be invoked with a zero destination buffer. This is possible if the AAD and the ciphertext have zero lengths and only the tag exists in the source buffer (i.e. a source buffer cannot be zero). In this case, the GCM cipher only performs the authentication and no decryption operation. When the destination buffer has zero length, it is possible that no page is mapped to the SG pointing to the destination. In this case, sg_page(req->dst) is an invalid access. Therefore, page accesses should only be allowed if the req->dst->length is non-zero which is the indicator that a page must exist. This fixes a crash that can be triggered by user space via AF_ALG. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index aa8b0672f87a..d9ae404f08c9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -906,7 +906,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) if (sg_is_last(req->src) && req->src->offset + req->src->length <= PAGE_SIZE && - sg_is_last(req->dst) && ++ sg_is_last(req->dst) && req->dst->length && req->dst->offset + req->dst->length <= PAGE_SIZE) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); -- GitLab From 1ce8e52f6f368f9d62b6320357bf7c26a1480ecb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:33 +0000 Subject: [PATCH 1100/1398] crypto: sha3-generic - fixes for alignment and big endian operation commit c013cee99d5a18aec8c71fee8f5f41369cd12595 upstream. Ensure that the input is byte swabbed before injecting it into the SHA3 transform. Use the get_unaligned() accessor for this so that we don't perform unaligned access inadvertently on architectures that do not support that. Fixes: 53964b9ee63b7075 ("crypto: sha3 - Add SHA-3 hash algorithm") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/sha3_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 7e8ed96236ce..a68be626017c 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -18,6 +18,7 @@ #include #include #include +#include #define KECCAK_ROUNDS 24 @@ -149,7 +150,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int i; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) src)[i]; + sctx->st[i] ^= get_unaligned_le64(src + 8 * i); keccakf(sctx->st); done += sctx->rsiz; @@ -174,7 +175,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) sctx->buf[sctx->rsiz - 1] |= 0x80; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) sctx->buf)[i]; + sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i); keccakf(sctx->st); -- GitLab From b7edc45f3adace20d74788dd4d5a085c134ba6be Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 2 Jan 2018 08:55:25 +0100 Subject: [PATCH 1101/1398] crypto: af_alg - whitelist mask and type commit bb30b8848c85e18ca7e371d0a869e94b3e383bdf upstream. The user space interface allows specifying the type and mask field used to allocate the cipher. Only a subset of the possible flags are intended for user space. Therefore, white-list the allowed flags. In case the user space caller uses at least one non-allowed flag, EINVAL is returned. Reported-by: syzbot Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f5e18c2a4852..ca50eeb13097 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(af_alg_release_parent); static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - const u32 forbidden = CRYPTO_ALG_INTERNAL; + const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct sockaddr_alg *sa = (void *)uaddr; @@ -157,6 +157,10 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) void *private; int err; + /* If caller uses non-allowed flag, return error. */ + if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed)) + return -EINVAL; + if (sock->state == SS_CONNECTED) return -EINVAL; @@ -175,9 +179,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (IS_ERR(type)) return PTR_ERR(type); - private = type->bind(sa->salg_name, - sa->salg_feat & ~forbidden, - sa->salg_mask & ~forbidden); + private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask); if (IS_ERR(private)) { module_put(type->owner); return PTR_ERR(private); -- GitLab From ddba3c67a5b81955e34563d225a3f117354a247e Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Thu, 7 Dec 2017 12:31:56 -0800 Subject: [PATCH 1102/1398] HID: wacom: EKR: ensure devres groups at higher indexes are released commit 791ae273731fa85d3332e45064dab177ae663e80 upstream. Background: ExpressKey Remotes communicate their events via usb dongle. Each dongle can hold up to 5 pairings at one time and one EKR (identified by its serial number) can unfortunately be paired with its dongle more than once. The pairing takes place in a round-robin fashion. Input devices are only created once per EKR, when a new serial number is seen in the list of pairings. However, if a device is created for a "higher" paring index and subsequently a second pairing occurs at a lower pairing index, unpairing the remote with that serial number from any pairing index will currently cause a driver crash. This occurs infrequently, as two remotes are necessary to trigger this bug and most users have only one remote. As an illustration, to trigger the bug you need to have two remotes, and pair them in this order: 1. slot 0 -> remote 1 (input device created for remote 1) 2. slot 1 -> remote 1 (duplicate pairing - no device created) 3. slot 2 -> remote 1 (duplicate pairing - no device created) 4. slot 3 -> remote 1 (duplicate pairing - no device created) 5. slot 4 -> remote 2 (input device created for remote 2) 6. slot 0 -> remote 2 (1 destroyed and recreated at slot 1) 7. slot 1 -> remote 2 (1 destroyed and recreated at slot 2) 8. slot 2 -> remote 2 (1 destroyed and recreated at slot 3) 9. slot 3 -> remote 2 (1 destroyed and not recreated) 10. slot 4 -> remote 2 (2 was already in this slot so no changes) 11. slot 0 -> remote 1 (The current code sees remote 2 was paired over in one of the dongle slots it occupied and attempts to remove all information about remote 2 [1]. It calls wacom_remote_destroy_one for remote 2, but the destroy function assumes the lowest index is where the remote's input device was created. The code "cleans up" the other remote 2 pairings including the one which the input device was based on, assuming they were were just duplicate pairings. However, the cleanup doesn't call the devres release function for the input device that was created in slot 4). This issue is fixed by this commit. [1] Remote 2 should subsequently be re-created on the next packet from the EKR at the lowest numbered slot that it occupies (here slot 1). Fixes: f9036bd43602 ("HID: wacom: EKR: use devres groups to manage resources") Signed-off-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_sys.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index d72dfb2bbdb8..7a4d39ce51d9 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2192,23 +2192,23 @@ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index) int i; unsigned long flags; - spin_lock_irqsave(&remote->remote_lock, flags); - remote->remotes[index].registered = false; - spin_unlock_irqrestore(&remote->remote_lock, flags); + for (i = 0; i < WACOM_MAX_REMOTES; i++) { + if (remote->remotes[i].serial == serial) { - if (remote->remotes[index].battery.battery) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index].battery.bat_desc); + spin_lock_irqsave(&remote->remote_lock, flags); + remote->remotes[i].registered = false; + spin_unlock_irqrestore(&remote->remote_lock, flags); - if (remote->remotes[index].group.name) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index]); + if (remote->remotes[i].battery.battery) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i].battery.bat_desc); + + if (remote->remotes[i].group.name) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i]); - for (i = 0; i < WACOM_MAX_REMOTES; i++) { - if (remote->remotes[i].serial == serial) { remote->remotes[i].serial = 0; remote->remotes[i].group.name = NULL; - remote->remotes[i].registered = false; remote->remotes[i].battery.battery = NULL; wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN; } -- GitLab From 517931760e696c779e4d24424633c334a6447450 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:58:27 -0800 Subject: [PATCH 1103/1398] power: reset: zx-reboot: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 348c7cf5fcbcb68838255759d4cb45d039af36d2 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/power/reset/zx-reboot.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/zx-reboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/power/reset/zx-reboot.c b/drivers/power/reset/zx-reboot.c index b0b1eb3a78c2..76153ac0706c 100644 --- a/drivers/power/reset/zx-reboot.c +++ b/drivers/power/reset/zx-reboot.c @@ -81,3 +81,7 @@ static struct platform_driver zx_reboot_driver = { }, }; module_platform_driver(zx_reboot_driver); + +MODULE_DESCRIPTION("ZTE SoCs reset driver"); +MODULE_AUTHOR("Jun Nie "); +MODULE_LICENSE("GPL v2"); -- GitLab From cb1a0b51d1031ce335d16d7528cace677f105106 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:54:52 -0800 Subject: [PATCH 1104/1398] gpio: iop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 97b03136e1b637d7a9d2274c099e44ecf23f1103 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-iop.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-iop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-iop.c b/drivers/gpio/gpio-iop.c index 98c7ff2a76e7..8d62db447ec1 100644 --- a/drivers/gpio/gpio-iop.c +++ b/drivers/gpio/gpio-iop.c @@ -58,3 +58,7 @@ static int __init iop3xx_gpio_init(void) return platform_driver_register(&iop3xx_gpio_driver); } arch_initcall(iop3xx_gpio_init); + +MODULE_DESCRIPTION("GPIO handling for Intel IOP3xx processors"); +MODULE_AUTHOR("Lennert Buytenhek "); +MODULE_LICENSE("GPL"); -- GitLab From e29997d55268c9d4d4fc2bb7833f83dc5b220cd5 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:54:26 -0800 Subject: [PATCH 1105/1398] gpio: ath79: add missing MODULE_DESCRIPTION/LICENSE commit 539340f37e6d6ed4cd93e8e18c9b2e4eafd4b842 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-ath79.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION is also added. Signed-off-by: Jesse Chan Acked-by: Alban Bedel Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ath79.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c index dc37dbe4b46d..a83e97e15c14 100644 --- a/drivers/gpio/gpio-ath79.c +++ b/drivers/gpio/gpio-ath79.c @@ -323,3 +323,6 @@ static struct platform_driver ath79_gpio_driver = { }; module_platform_driver(ath79_gpio_driver); + +MODULE_DESCRIPTION("Atheros AR71XX/AR724X/AR913X GPIO API support"); +MODULE_LICENSE("GPL v2"); -- GitLab From 3a98d0753928cb8260930f2b651ecef47a0c3037 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:57:13 -0800 Subject: [PATCH 1106/1398] mtd: nand: denali_pci: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit d822401d1c6898a4a4ee03977b78b8cec402e88a upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/mtd/nand/denali_pci.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Masahiro Yamada Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/denali_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c index de31514df282..d38527e0a2f2 100644 --- a/drivers/mtd/nand/denali_pci.c +++ b/drivers/mtd/nand/denali_pci.c @@ -119,3 +119,7 @@ static struct pci_driver denali_pci_driver = { }; module_pci_driver(denali_pci_driver); + +MODULE_DESCRIPTION("PCI driver for Denali NAND controller"); +MODULE_AUTHOR("Intel Corporation and its suppliers"); +MODULE_LICENSE("GPL v2"); -- GitLab From 30942f91b5a57a1ca7557998e901367901d1bccf Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 12 Dec 2017 14:31:30 -0500 Subject: [PATCH 1107/1398] igb: Free IRQs when device is hotplugged commit 888f22931478a05bc81ceb7295c626e1292bf0ed upstream. Recently I got a Caldigit TS3 Thunderbolt 3 dock, and noticed that upon hotplugging my kernel would immediately crash due to igb: [ 680.825801] kernel BUG at drivers/pci/msi.c:352! [ 680.828388] invalid opcode: 0000 [#1] SMP [ 680.829194] Modules linked in: igb(O) thunderbolt i2c_algo_bit joydev vfat fat btusb btrtl btbcm btintel bluetooth ecdh_generic hp_wmi sparse_keymap rfkill wmi_bmof iTCO_wdt intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul snd_pcm rtsx_pci_ms mei_me snd_timer memstick snd pcspkr mei soundcore i2c_i801 tpm_tis psmouse shpchp wmi tpm_tis_core tpm video hp_wireless acpi_pad rtsx_pci_sdmmc mmc_core crc32c_intel serio_raw rtsx_pci mfd_core xhci_pci xhci_hcd i2c_hid i2c_core [last unloaded: igb] [ 680.831085] CPU: 1 PID: 78 Comm: kworker/u16:1 Tainted: G O 4.15.0-rc3Lyude-Test+ #6 [ 680.831596] Hardware name: HP HP ZBook Studio G4/826B, BIOS P71 Ver. 01.03 06/09/2017 [ 680.832168] Workqueue: kacpi_hotplug acpi_hotplug_work_fn [ 680.832687] RIP: 0010:free_msi_irqs+0x180/0x1b0 [ 680.833271] RSP: 0018:ffffc9000030fbf0 EFLAGS: 00010286 [ 680.833761] RAX: ffff8803405f9c00 RBX: ffff88033e3d2e40 RCX: 000000000000002c [ 680.834278] RDX: 0000000000000000 RSI: 00000000000000ac RDI: ffff880340be2178 [ 680.834832] RBP: 0000000000000000 R08: ffff880340be1ff0 R09: ffff8803405f9c00 [ 680.835342] R10: 0000000000000000 R11: 0000000000000040 R12: ffff88033d63a298 [ 680.835822] R13: ffff88033d63a000 R14: 0000000000000060 R15: ffff880341959000 [ 680.836332] FS: 0000000000000000(0000) GS:ffff88034f440000(0000) knlGS:0000000000000000 [ 680.836817] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 680.837360] CR2: 000055e64044afdf CR3: 0000000001c09002 CR4: 00000000003606e0 [ 680.837954] Call Trace: [ 680.838853] pci_disable_msix+0xce/0xf0 [ 680.839616] igb_reset_interrupt_capability+0x5d/0x60 [igb] [ 680.840278] igb_remove+0x9d/0x110 [igb] [ 680.840764] pci_device_remove+0x36/0xb0 [ 680.841279] device_release_driver_internal+0x157/0x220 [ 680.841739] pci_stop_bus_device+0x7d/0xa0 [ 680.842255] pci_stop_bus_device+0x2b/0xa0 [ 680.842722] pci_stop_bus_device+0x3d/0xa0 [ 680.843189] pci_stop_and_remove_bus_device+0xe/0x20 [ 680.843627] trim_stale_devices+0xf3/0x140 [ 680.844086] trim_stale_devices+0x94/0x140 [ 680.844532] trim_stale_devices+0xa6/0x140 [ 680.845031] ? get_slot_status+0x90/0xc0 [ 680.845536] acpiphp_check_bridge.part.5+0xfe/0x140 [ 680.846021] acpiphp_hotplug_notify+0x175/0x200 [ 680.846581] ? free_bridge+0x100/0x100 [ 680.847113] acpi_device_hotplug+0x8a/0x490 [ 680.847535] acpi_hotplug_work_fn+0x1a/0x30 [ 680.848076] process_one_work+0x182/0x3a0 [ 680.848543] worker_thread+0x2e/0x380 [ 680.848963] ? process_one_work+0x3a0/0x3a0 [ 680.849373] kthread+0x111/0x130 [ 680.849776] ? kthread_create_worker_on_cpu+0x50/0x50 [ 680.850188] ret_from_fork+0x1f/0x30 [ 680.850601] Code: 43 14 85 c0 0f 84 d5 fe ff ff 31 ed eb 0f 83 c5 01 39 6b 14 0f 86 c5 fe ff ff 8b 7b 10 01 ef e8 b7 e4 d2 ff 48 83 78 70 00 74 e3 <0f> 0b 49 8d b5 a0 00 00 00 e8 62 6f d3 ff e9 c7 fe ff ff 48 8b [ 680.851497] RIP: free_msi_irqs+0x180/0x1b0 RSP: ffffc9000030fbf0 As it turns out, normally the freeing of IRQs that would fix this is called inside of the scope of __igb_close(). However, since the device is already gone by the point we try to unregister the netdevice from the driver due to a hotplug we end up seeing that the netif isn't present and thus, forget to free any of the device IRQs. So: make sure that if we're in the process of dismantling the netdev, we always allow __igb_close() to be called so that IRQs may be freed normally. Additionally, only allow igb_close() to be called from __igb_close() if it hasn't already been called for the given adapter. Signed-off-by: Lyude Paul Fixes: 9474933caf21 ("igb: close/suspend race in netif_device_detach") Cc: Todd Fujinaka Cc: Stephen Hemminger Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ca54f7684668..3a61491421b1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3273,7 +3273,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) int igb_close(struct net_device *netdev) { - if (netif_device_present(netdev)) + if (netif_device_present(netdev) || netdev->dismantle) return __igb_close(netdev, false); return 0; } -- GitLab From 036c227cdd1ca8e5e7e59e18ac87de3003f90d23 Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 10 Nov 2017 02:05:06 +0100 Subject: [PATCH 1108/1398] drm/vc4: Account for interrupts in flight [ Upstream commit 253696ccd613fbdaa5aba1de44c461a058e0a114 ] Synchronously disable the IRQ to make the following cancel_work_sync invocation effective. An interrupt in flight could enqueue further overflow mem work. As we free the binner BO immediately following vc4_irq_uninstall this caused a NULL pointer dereference in the work callback vc4_overflow_mem_work. Link: https://github.com/anholt/linux/issues/114 Signed-off-by: Stefan Schake Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1510275907-993-2-git-send-email-stschake@gmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 094bc6a475c1..d45a7c0a7915 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,6 +208,9 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + /* Undo the effects of a previous vc4_irq_uninstall. */ + enable_irq(dev->irq); + /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); @@ -225,6 +228,9 @@ vc4_irq_uninstall(struct drm_device *dev) /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); + /* Finish any interrupt handler still in flight. */ + disable_irq(dev->irq); + cancel_work_sync(&vc4->overflow_mem_work); } -- GitLab From 82e57cdce058713d3546f525140599c3b9b339fb Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Tue, 7 Nov 2017 15:17:55 +0530 Subject: [PATCH 1109/1398] cpupowerutils: bench - Fix cpu online check [ Upstream commit 53d1cd6b125fb9d69303516a1179ebc3b72f797a ] cpupower_is_cpu_online was incorrectly checking for 0. This patch fixes this by checking for 1 when the cpu is online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/bench/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51ba..2bb3eef7d5c1 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; -- GitLab From d8f75b4c7f259307ab9d0f4f304476e2b701c6ef Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 15 Nov 2017 14:10:02 +0530 Subject: [PATCH 1110/1398] cpupower : Fix cpupower working when cpu0 is offline [ Upstream commit dbdc468f35ee827cab2753caa1c660bdb832243a ] cpuidle_monitor used to assume that cpu0 is always online which is not a valid assumption on POWER machines. This patch fixes this by getting the cpu on which the current thread is running, instead of always using cpu0 for monitoring which may not be online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf..5b3205f16217 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); -- GitLab From 60d9b22b1ffc09a28be556e31caf7cdffe0df7ed Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:33 +0200 Subject: [PATCH 1111/1398] KVM: x86: emulator: Return to user-mode on L1 CPL=0 emulation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1f4dcb3b213235e642088709a1c54964d23365e9 ] On this case, handle_emulation_failure() fills kvm_run with internal-error information which it expects to be delivered to user-mode for further processing. However, the code reports a wrong return-value which makes KVM to never return to user-mode on this scenario. Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to userspace") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d3f80cccb9aa..6c11a98860d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5308,7 +5308,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); -- GitLab From 114de9bfefa5aee862815b0ae7453d40bf5278c2 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:34 +0200 Subject: [PATCH 1112/1398] KVM: x86: Don't re-execute instruction when not passing CR2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b8ae63798cb97e785a667ff27e43fa6220cb734 ] In case of instruction-decode failure or emulation failure, x86_emulate_instruction() will call reexecute_instruction() which will attempt to use the cr2 value passed to x86_emulate_instruction(). However, when x86_emulate_instruction() is called from emulate_instruction(), cr2 is not passed (passed as 0) and therefore it doesn't make sense to execute reexecute_instruction() logic at all. Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cbd1d44da2d3..20cfeeb681c6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3ca6d15994e4..b9673bc82caa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6257,7 +6257,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; -- GitLab From ec73d16bc7ce352375052d81a22bbb53e3eccabe Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:47 -0800 Subject: [PATCH 1113/1398] KVM: X86: Fix operand/address-size during instruction decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3853be2603191829b442b64dac6ae8ba0c027bf9 ] Pedro reported: During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" instruction under KVM produces different results on both memory and the SP register depending on whether EPT support is enabled. With EPT the SP is reduced by 4 bytes (and the written value is 0-padded) but without EPT support it is only reduced by 2 bytes. The difference can be observed when the CS.DB field is 1 (32-bit) but not when it's 0 (16-bit). The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D also should be respected instead of just default operand/address-size/66H prefix/67H prefix during instruction decoding. This patch fixes it by also adjusting operand/address-size according to CS.D. Reported-by: Pedro Fonseca Tested-by: Pedro Fonseca Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Pedro Fonseca Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c8f8dd8ca0a1..6f5a3b076341 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4990,6 +4990,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5008,6 +5010,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; -- GitLab From 2f9e94ef498db9e7f4d920d9dcd042fa378e3f03 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:29 +0200 Subject: [PATCH 1114/1398] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 ] KVM uses ioapic_handled_vectors to track vectors that need to notify the IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an interrupt with old configuration is pending or running and ioapic_handled_vectors only remembers the newest configuration; thus EOI from the old interrupt is not delievered to the IOAPIC. A previous commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") addressed this issue by adding pending edge-triggered interrupts to ioapic_handled_vectors, fixing this race for edge-triggered interrupts. The commit explicitly ignored level-triggered interrupts, but this race applies to them as well: 1) IOAPIC sends a level triggered interrupt vector to VCPU0 2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC to route the vector to VCPU1. The reconfiguration rewrites only the upper 32 bits of the IOREDTBLn register. (Causes KVM to update ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) 3) VCPU0 sends EOI for the vector, but it's not delievered to the IOAPIC because the ioapic_handled_vectors doesn't include the vector. 4) New interrupts are not delievered to VCPU1 because remote_irr bit is set forever. Therefore, the correct behavior is to add all pending and running interrupts to ioapic_handled_vectors. This commit introduces a slight performance hit similar to commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") for the rare case that the vector is reused by a non-IOAPIC source on VCPU0. We prefer to keep solution simple and not handle this case just as the original commit does. Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d2..a7ac8688bba8 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } -- GitLab From a9f2c169366752927fd0bf8f5a9b78d3315f6c85 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:32 +0200 Subject: [PATCH 1115/1398] KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8bfec2930525808c01f038825d1df3904638631 ] Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for IOAPICs without an EOI register. They simulate the EOI message manually by changing the trigger mode to edge and then back to level, with the entry being masked during this. QEMU implements this feature in commit ed1263c363c9 ("ioapic: clear remote irr bit for edge-triggered interrupts") As a side effect, this commit removes an incorrect behavior where Remote IRR was cleared when the redirection table entry was rewritten. This is not consistent with the manual and also opens an opportunity for a strange behavior when a redirection table entry is modified from an interrupt handler that handles the same entry: The modification will clear the Remote IRR bit even though the interrupt handler is still running. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index a7ac8688bba8..4b573c8694ac 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -306,8 +306,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); -- GitLab From 1d3ab3b2964e927515b947c8b112a9a40ed789c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 15 Oct 2017 21:24:49 +0200 Subject: [PATCH 1116/1398] ACPI / bus: Leave modalias empty for devices which are not present [ Upstream commit 10809bb976648ac58194a629e3d7af99e7400297 ] Most Bay and Cherry Trail devices use a generic DSDT with all possible peripheral devices present in the DSDT, with their _STA returning 0x00 or 0x0f based on AML variables which describe what is actually present on the board. Since ACPI device objects with a 0x00 status (not present) still get an entry under /sys/bus/acpi/devices, and those entry had an acpi:PNPID modalias, userspace would end up loading modules for non present hardware. This commit fixes this by leaving the modalias empty for non present devices. This results in 10 modules less being loaded with a generic distro kernel config on my Cherry Trail test-device (a GPD pocket). Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 7b2c48fde4e2..201c7ceb7052 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the -- GitLab From 876b31fd9815c57ae0389132f3306ff31f148092 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:33 +0200 Subject: [PATCH 1117/1398] KVM: x86: ioapic: Preserve read-only values in the redirection table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b200dded0a6974a3b69599832b2203483920ab25 ] According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb ("ioapic: keep RO bits for IOAPIC entry"). Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 4b573c8694ac..5f810bb80802 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -278,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -300,6 +301,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -307,6 +311,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits &= ~0xffffffffULL; e->bits |= (u32) val; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; /* * Some OSes (Linux, Xen) assume that Remote IRR bit will -- GitLab From 6436981ba6d11dc13b242d48855944b73569739f Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 15 Nov 2017 21:17:55 +0000 Subject: [PATCH 1118/1398] cpufreq: Add Loongson machine dependencies [ Upstream commit 0d307935fefa6389eb726c6362351c162c949101 ] The MIPS loongson cpufreq drivers don't build unless configured for the correct machine type, due to dependency on machine specific architecture headers and symbols in machine specific platform code. More specifically loongson1-cpufreq.c uses RST_CPU_EN and RST_CPU, neither of which is defined in asm/mach-loongson32/regs-clk.h unless CONFIG_LOONGSON1_LS1B=y, and loongson2_cpufreq.c references loongson2_clockmod_table[], which is only defined in arch/mips/loongson64/lemote-2f/clock.c, i.e. when CONFIG_LEMOTE_MACH2F=y. Add these dependencies to Kconfig to avoid randconfig / allyesconfig build failures (e.g. when based on BMIPS which also has a cpufreq driver). Signed-off-by: James Hogan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index d8b164a7c4e5..cac26fb22891 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -273,6 +273,7 @@ endif if MIPS config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -285,6 +286,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. -- GitLab From 409982cbb5eb814532953fb996ddc27bae5282a4 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Fri, 24 Nov 2017 15:14:27 -0800 Subject: [PATCH 1119/1398] bcache: check return value of register_shrinker [ Upstream commit 6c4ca1e36cdc1a0a7a84797804b87920ccbebf51 ] register_shrinker is now __must_check, so check it to kill a warning. Caller of bch_btree_cache_alloc in super.c appropriately checks return value so this is fully plumbed through. This V2 fixes checkpatch warnings and improves the commit description, as I was too hasty getting the previous version out. Signed-off-by: Michael Lyle Reviewed-by: Vojtech Pavlik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 2efdce07247c..cac297f8170e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -803,7 +803,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } -- GitLab From 8afdbb165a792f1e89b0a2c67b54826889d3d51f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:55 -0400 Subject: [PATCH 1120/1398] drm/amdgpu: Fix SDMA load/unload sequence on HWS disabled mode [ Upstream commit cf21654b40968609779751b34e7923180968fe5b ] Fix the SDMA load and unload sequence as suggested by HW document. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 47 ++++++++++++++----- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc..47951f4775b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -367,29 +367,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -493,9 +514,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } -- GitLab From 16980affa1b09fc536657ddb933902825ead1c38 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 1 Nov 2017 19:21:56 -0400 Subject: [PATCH 1121/1398] drm/amdkfd: Fix SDMA ring buffer size calculation [ Upstream commit d12fb13f23199faa7e536acec1db49068e5a067d ] ffs function return the position of the first bit set on 1 based. (bit zero returns 1). Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index d83de985e88c..8577a563600f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -215,8 +215,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, BUG_ON(!mm || !mqd || !q); m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; -- GitLab From 8275584082063726845b7ce2acd1b9d87cdee3fa Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:57 -0400 Subject: [PATCH 1122/1398] drm/amdkfd: Fix SDMA oversubsription handling [ Upstream commit 8c946b8988acec785bcf67088b6bd0747f36d2d3 ] SDMA only supports a fixed number of queues. HWS cannot handle oversubscription. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index e1fb40b84c72..5425c68d0287 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -205,6 +205,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && -- GitLab From 03899a46c223510160632e991bad3ad234da386d Mon Sep 17 00:00:00 2001 From: zhangliping Date: Sat, 25 Nov 2017 22:02:12 +0800 Subject: [PATCH 1123/1398] openvswitch: fix the incorrect flow action alloc size [ Upstream commit 67c8d22a73128ff910e2287567132530abcf5b71 ] If we want to add a datapath flow, which has more than 500 vxlan outputs' action, we will get the following error reports: openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Actions may not be safe on all matching packets ... ... It seems that we can simply enlarge the MAX_ACTIONS_BUFSIZE to fix it, but this is not the root cause. For example, for a vxlan output action, we need about 60 bytes for the nlattr, but after it is converted to the flow action, it only occupies 24 bytes. This means that we can still support more than 1000 vxlan output actions for a single datapath flow under the the current 32k max limitation. So even if the nla_len(attr) is larger than MAX_ACTIONS_BUFSIZE, we shouldn't report EINVAL and keep it move on, as the judgement can be done by the reserve_sfa_size. Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 07925418c2a5..1668916bdbde 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1789,14 +1789,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -1869,12 +1866,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -2500,7 +2500,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); -- GitLab From 030d4676a2681a0656c205a9e58b9624a74a16f4 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 14 Nov 2017 23:20:05 +0800 Subject: [PATCH 1124/1398] mac80211: fix the update of path metric for RANN frame [ Upstream commit fbbdad5edf0bb59786a51b94a9d006bc8c2da9a2 ] The previous path metric update from RANN frame has not considered the own link metric toward the transmitting mesh STA. Fix this. Reported-by: Michael65535 Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_hwmp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index b747c9645e43..fed598a202c8 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -788,7 +788,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -799,7 +799,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -816,7 +816,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -829,7 +832,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -847,7 +850,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -867,7 +870,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); -- GitLab From 8cfb3965ebcd9e02365f8c8e9f4c9f19fd0be004 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Nov 2017 16:20:52 -0500 Subject: [PATCH 1125/1398] btrfs: fix deadlock when writing out space cache [ Upstream commit b77000ed558daa3bef0899d29bf171b8c9b5e6a8 ] If we fail to prepare our pages for whatever reason (out of memory in our case) we need to make sure to drop the block_group->data_rwsem, otherwise hilarity ensues. Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: David Sterba [ add label and use existing unlocking code ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e4b48f377d3a..c56253a1e5b4 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1253,7 +1253,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Lock all pages first so we can lock the extent safely. */ ret = io_ctl_prepare_pages(io_ctl, inode, 0); if (ret) - goto out; + goto out_unlock; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state); @@ -1346,6 +1346,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, out_nospc: cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); +out_unlock: if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); -- GitLab From 90ef2c30ebd358005d892aa2895bdcbc027ddce1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:20:15 -0400 Subject: [PATCH 1126/1398] reiserfs: remove unneeded i_version bump [ Upstream commit 9f97df50c52c2887432debb6238f4e43567386a5 ] The i_version field in reiserfs is not initialized and is only ever updated here. Nothing ever views it, so just remove it. Signed-off-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0a6ad4e71e88..e101d70d2327 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2521,7 +2521,6 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; -- GitLab From b0fa04e8429ed74b6156b8fbf329a022e701072e Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:55:05 -0800 Subject: [PATCH 1127/1398] KVM: X86: Fix softlockup when get the current kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e70b57a6ce4e8b92a56a615ae79bdb2bd66035e7 ] watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [qemu-system-x86:10185] CPU: 6 PID: 10185 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc4+ #4 RIP: 0010:kvm_get_time_scale+0x4e/0xa0 [kvm] Call Trace: get_time_ref_counter+0x5a/0x80 [kvm] kvm_hv_process_stimers+0x120/0x5f0 [kvm] kvm_arch_vcpu_ioctl_run+0x4b4/0x1690 [kvm] kvm_vcpu_ioctl+0x33a/0x620 [kvm] do_vfs_ioctl+0xa1/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1e/0xa9 This can be reproduced when running kvm-unit-tests/hyperv_stimer.flat and cpu-hotplug stress simultaneously. __this_cpu_read(cpu_tsc_khz) returns 0 (set in kvmclock_cpu_down_prep()) when the pCPU is unhotplug which results in kvm_get_time_scale() gets into an infinite loop. This patch fixes it by treating the unhotplug pCPU as not using master clock. Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c11a98860d1..e023ef981feb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1751,10 +1751,13 @@ static u64 __get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); -- GitLab From 5c0b19bd8cffec6822c92af223814712bdde42bd Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:52:21 -0800 Subject: [PATCH 1128/1398] KVM: VMX: Fix rflags cache during vCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c37c28730bb031cc8a44a130c2555c0f3efbe2d0 ] Reported by syzkaller: *** Guest State *** CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 CR3 = 0x000000002081e000 RSP = 0x000000000000fffa RIP = 0x0000000000000000 RFLAGS=0x00023000 DR7 = 0x00000000000000 ^^^^^^^^^^ ------------[ cut here ]------------ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 Call Trace: kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The failed vmentry is triggered by the following beautified testcase: #include #include #include #include #include #include #include long r[5]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_guest_debug debug = { .control = 0xf0403, .arch = { .debugreg[6] = 0x2, .debugreg[7] = 0x2 } }; ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); ioctl(r[4], KVM_RUN, 0); } which testcase tries to setup the processor specific debug registers and configure vCPU for handling guest debug events through KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set rflags in order to set TF bit if single step is needed. All regs' caches are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU reset. However, the cache of rflags is not reset during vCPU reset. The function vmx_get_rflags() returns an unreset rflags cache value since the cache is marked avail, it is 0 after boot. Vmentry fails if the rflags reserved bit 1 is 0. This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and its cache to 0x2 during vCPU reset. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b9673bc82caa..178a344f55f8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5194,7 +5194,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); -- GitLab From b5bfda0f8e29c7070f19c0cac4005fd85f37a84e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Nov 2017 12:21:07 -0800 Subject: [PATCH 1129/1398] xfs: always free inline data before resetting inode fork during ifree [ Upstream commit 98c4f78dcdd8cec112d1cbc5e9a792ee6e5ab7a6 ] In xfs_ifree, we reset the data/attr forks to extents format without bothering to free any inline data buffer that might still be around after all the blocks have been truncated off the file. Prior to commit 43518812d2 ("xfs: remove support for inlining data/extents into the inode fork") nobody noticed because the leftover inline data after truncation was small enough to fit inside the inline buffer inside the fork itself. However, now that we've removed the inline buffer, we /always/ have to free the inline data buffer or else we leak them like crazy. This test was found by turning on kmemleak for generic/001 or generic/388. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 98ca9f1b6a07..c5f2f1e3cc4b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2429,6 +2429,24 @@ xfs_ifree_cluster( return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2466,6 +2484,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; -- GitLab From c6a34556f539c69366bd7bcbeff490a6949f95ce Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Thu, 23 Nov 2017 15:18:35 +0100 Subject: [PATCH 1130/1398] xen-netfront: remove warning when unloading module [ Upstream commit 5b5971df3bc2775107ddad164018a8a8db633b81 ] v2: * Replace busy wait with wait_event()/wake_up_all() * Cannot garantee that at the time xennet_remove is called, the xen_netback state will not be XenbusStateClosed, so added a condition for that * There's a small chance for the xen_netback state is XenbusStateUnknown by the time the xen_netfront switches to Closed, so added a condition for that. When unloading module xen_netfront from guest, dmesg would output warning messages like below: [ 105.236836] xen:grant_table: WARNING: g.e. 0x903 still in use! [ 105.236839] deferring g.e. 0x903 (pfn 0x35805) This problem relies on netfront and netback being out of sync. By the time netfront revokes the g.e.'s netback didn't have enough time to free all of them, hence displaying the warnings on dmesg. The trick here is to make netfront to wait until netback frees all the g.e.'s and only then continue to cleanup for the module removal, and this is done by manipulating both device states. Signed-off-by: Eduardo Otubo Acked-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8d498a997e25..1a9dadf7b3cc 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -86,6 +86,8 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + struct netfront_stats { u64 packets; u64 bytes; @@ -2051,10 +2053,12 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: + wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: + wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2160,6 +2164,20 @@ static int xennet_remove(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + xenbus_switch_state(dev, XenbusStateClosing); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing); + + xenbus_switch_state(dev, XenbusStateClosed); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); + } + xennet_disconnect_backend(info); unregister_netdev(info->netdev); -- GitLab From c57767b60962f4c965064aadc028db55bd0e63fb Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 10 Aug 2017 10:53:53 +0200 Subject: [PATCH 1131/1398] auxdisplay: img-ascii-lcd: Only build on archs that have IOMEM [ Upstream commit 141cbfba1d0502006463aa80f57c64086226af1a ] This avoids the MODPOST error: ERROR: "devm_ioremap_resource" [drivers/auxdisplay/img-ascii-lcd.ko] undefined! Signed-off-by: Thomas Meyer Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 10e1b9eee10e..f03cf1df8d6b 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -121,6 +121,7 @@ config CFAG12864B_RATE config IMG_ASCII_LCD tristate "Imagination Technologies ASCII LCD Display" + depends on HAS_IOMEM default y if MIPS_MALTA || MIPS_SEAD3 select SYSCON help -- GitLab From 5cd3586ca8d4347d2cd658062c994b9edd09ab59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:12 -0400 Subject: [PATCH 1132/1398] nfsd: CLOSE SHOULD return the invalid special stateid for NFSv4.x (x>0) [ Upstream commit fb500a7cfee7f2f447d2bbf30cb59629feab6ac1 ] Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ebb2d7c8182..7f3a11ab4191 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,6 +63,9 @@ static const stateid_t zero_stateid = { static const stateid_t currentstateid = { .si_generation = 1, }; +static const stateid_t close_stateid = { + .si_generation = 0xffffffffU, +}; static u64 current_sessionid = 1; @@ -5407,6 +5410,11 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + /* See RFC5661 sectionm 18.2.4 */ + if (stp->st_stid.sc_client->cl_minorversion) + memcpy(&close->cl_stateid, &close_stateid, + sizeof(close->cl_stateid)); + /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: -- GitLab From 2a7d4a723d2e04f9d155d2dcf82ed64d3acc7b10 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:15 -0400 Subject: [PATCH 1133/1398] nfsd: Ensure we check stateid validity in the seqid operation checks [ Upstream commit 9271d7e509c1bfc0b9a418caec29ec8d1ac38270 ] After taking the stateid st_mutex, we want to know that the stateid still represents valid state before performing any non-idempotent actions. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7f3a11ab4191..799c18cba6a5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5178,15 +5178,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID - || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) - /* - * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step, and - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; - mutex_lock(&stp->st_mutex); + status = nfsd4_lock_ol_stateid(stp); + if (status != nfs_ok) + return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); -- GitLab From f35ab8e2eeb8bc9492cbaf4653e04eea9cb63648 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:22:48 +0300 Subject: [PATCH 1134/1398] grace: replace BUG_ON by WARN_ONCE in exit_net hook [ Upstream commit b872285751c1af010e12d02bce7069e2061a58ca ] Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs_common/grace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index fd8c9a5bcac4..e280e9c9ebf3 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -104,7 +104,9 @@ grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); - BUG_ON(!list_empty(grace_list)); + WARN_ONCE(!list_empty(grace_list), + "net %x %s: grace_list is not empty\n", + net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { -- GitLab From f25e222ccc50b0b52da98706c1e7da4ec090c4d7 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 9 Nov 2017 13:41:10 -0500 Subject: [PATCH 1135/1398] nfsd: check for use of the closed special stateid [ Upstream commit ae254dac721d44c0bfebe2795df87459e2e88219 ] Prevent the use of the closed (invalid) special stateid by clients. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 799c18cba6a5..c548190ef5ef 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -72,6 +72,7 @@ static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -4869,7 +4870,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4927,7 +4929,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, else if (typemask & NFS4_DELEG_STID) typemask |= NFS4_REVOKED_DELEG_STID; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { -- GitLab From 3b7742374f3e4d2a9891b02b13c69796d7bda090 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 13 Nov 2017 07:25:40 +0300 Subject: [PATCH 1136/1398] lockd: fix "list_add double add" caused by legacy signal interface [ Upstream commit 81833de1a46edce9ca20cfe079872ac1c20ef359 ] restart_grace() uses hardcoded init_net. It can cause to "list_add double add" in following scenario: 1) nfsd and lockd was started in several net namespaces 2) nfsd in init_net was stopped (lockd was not stopped because it have users from another net namespaces) 3) lockd got signal, called restart_grace() -> set_grace_period() and enabled lock_manager in hardcoded init_net. 4) nfsd in init_net is started again, its lockd_up() calls set_grace_period() and tries to add lock_manager into init_net 2nd time. Jeff Layton suggest: "Make it safe to call locks_start_grace multiple times on the same lock_manager. If it's already on the global grace_list, then don't try to add it again. (But we don't intentionally add twice, so for now we WARN about that case.) With this change, we also need to ensure that the nfsd4 lock manager initializes the list before we call locks_start_grace. While we're at it, move the rest of the nfsd_net initialization into nfs4_state_create_net. I see no reason to have it spread over two functions like it is today." Suggested patch was updated to generate warning in described situation. Suggested-by: Jeff Layton Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs_common/grace.c | 6 +++++- fs/nfsd/nfs4state.c | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index e280e9c9ebf3..77d136ac8909 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm) struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); - list_add(&lm->list, grace_list); + if (list_empty(&lm->list)) + list_add(&lm->list, grace_list); + else + WARN(1, "double list_add attempt detected in net %x %s\n", + net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c548190ef5ef..f463c4e0b2ea 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7012,6 +7012,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; + nn->boot_time = get_seconds(); + nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; + INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7069,9 +7073,6 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nn->boot_time = get_seconds(); - nn->grace_ended = false; - nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", -- GitLab From d5a746cf47975a30a8ffaa799db93881512530e8 Mon Sep 17 00:00:00 2001 From: Robert Lippert Date: Mon, 27 Nov 2017 15:51:55 -0800 Subject: [PATCH 1137/1398] hwmon: (pmbus) Use 64bit math for DIRECT format values [ Upstream commit bd467e4eababe4c04272c1e646f066db02734c79 ] Power values in the 100s of watt range can easily blow past 32bit math limits when processing everything in microwatts. Use 64bit math instead to avoid these issues on common 32bit ARM BMC platforms. Fixes: 442aba78728e ("hwmon: PMBus device driver") Signed-off-by: Robert Lippert Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/pmbus_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ba59eaef2e07..d013acf3f83a 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -476,8 +477,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -505,11 +506,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -629,7 +631,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -646,18 +649,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, -- GitLab From 98ae1ca7534e47508719ee54657f01df55437f62 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Nov 2017 20:46:49 +0100 Subject: [PATCH 1138/1398] bnxt_en: Fix an error handling path in 'bnxt_get_module_eeprom()' [ Upstream commit dea521a2b9f96e905fa2bb2f95e23ec00c2ec436 ] Error code returned by 'bnxt_read_sfp_module_eeprom_info()' is handled a few lines above when reading the A0 portion of the EEPROM. The same should be done when reading the A2 portion of the EEPROM. In order to correctly propagate an error, update 'rc' in this 2nd call as well, otherwise 0 (success) is returned. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a7e04ff4eaed..cde4b96f3153 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1843,8 +1843,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start, - length, data); + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + start, length, data); } return rc; } -- GitLab From 54a1fdff1b09465d4f5f06bff53fe2b1e0434673 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Nov 2017 17:13:40 +0100 Subject: [PATCH 1139/1398] xfs: fortify xfs_alloc_buftarg error handling [ Upstream commit d210a9874b8f6166579408131cb74495caff1958 ] percpu_counter_init failure path doesn't clean up &btp->bt_lru list. Call list_lru_destroy in that error path. Similarly register_shrinker error path is not handled. While it is unlikely to trigger these error path, it is not impossible especially the later might fail with large NUMAs. Let's handle the failure to make the code more robust. Noticed-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eca7baecc9f0..3f45d9867e10 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1785,22 +1785,27 @@ xfs_alloc_buftarg( btp->bt_bdi = blk_get_backing_dev_info(bdev); if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } -- GitLab From e11616d5e6c3209196c697e773cb41362419adf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Nov 2017 11:39:30 +0100 Subject: [PATCH 1140/1398] drm/amdgpu: don't try to move pinned BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6edc6910ba4cd6eab309263539c8f09b8ad772bf ] Never try to move pinned BOs during CS. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f26d1fd53bef..cb505f66d3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -416,6 +416,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate == lobj) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ -- GitLab From d47907bcac94cc455a62d50e6c71ae5341cee0ea Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Nov 2017 11:01:09 +0100 Subject: [PATCH 1141/1398] net: ethernet: xilinx: Mark XILINX_LL_TEMAC broken on 64-bit [ Upstream commit 15bfe05c8d6386f1a90e9340d15336e85e32aad6 ] On 64-bit (e.g. powerpc64/allmodconfig): drivers/net/ethernet/xilinx/ll_temac_main.c: In function 'temac_start_xmit_done': drivers/net/ethernet/xilinx/ll_temac_main.c:633:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); ^ cdmac_bd.app4 is u32, so it is too small to hold a kernel pointer. Note that several other fields in struct cdmac_bd are also too small to hold physical addresses on 64-bit platforms. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 6d68c8a8f4f2..da4ec575ccf9 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -34,6 +34,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" depends on (PPC || MICROBLAZE) + depends on !64BIT || BROKEN select PHYLIB ---help--- This driver supports the Xilinx 10/100/1000 LocalLink TEMAC -- GitLab From c4ecc2f696430bbd6c4b8bf3f114ad9a86e2c9b1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 Nov 2017 22:34:50 +0900 Subject: [PATCH 1142/1398] quota: Check for register_shrinker() failure. [ Upstream commit 88bc0ede8d35edc969350852894dc864a2dc1859 ] register_shrinker() might return -ENOMEM error since Linux 3.12. Call panic() as with other failure checks in this function if register_shrinker() failed. Fixes: 1d3d4437eae1 ("vmscan: per-node deferred work") Signed-off-by: Tetsuo Handa Cc: Jan Kara Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 1bfac28b7e7d..f9246ac4eef8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2985,7 +2985,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } -- GitLab From d2a67f7afcad12bc958f947118e94d5af05aa218 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Nov 2017 12:00:24 -0500 Subject: [PATCH 1143/1398] SUNRPC: Allow connect to return EHOSTUNREACH [ Upstream commit 4ba161a793d5f43757c35feff258d9f20a082940 ] Reported-by: Dmitry Vyukov Signed-off-by: Trond Myklebust Tested-by: Dmitry Vyukov Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e01c825bc683..d24d14ea8ba4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2381,6 +2381,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* retry with existing socket, after a delay */ -- GitLab From 7b8623841f2b3845eaa5292f0b15e7f9dd0c5f42 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 29 Nov 2017 16:11:08 -0800 Subject: [PATCH 1144/1398] kmemleak: add scheduling point to kmemleak_scan() [ Upstream commit bde5f6bc68db51128f875a756e9082a6c6ff7b4c ] kmemleak_scan() will scan struct page for each node and it can be really large and resulting in a soft lockup. We have seen a soft lockup when do scan while compile kernel: watchdog: BUG: soft lockup - CPU#53 stuck for 22s! [bash:10287] [...] Call Trace: kmemleak_scan+0x21a/0x4c0 kmemleak_write+0x312/0x350 full_proxy_write+0x5a/0xa0 __vfs_write+0x33/0x150 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x61/0x1a0 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by adding cond_resched every MAX_SCAN_SIZE. Link: http://lkml.kernel.org/r/1511439788-20099-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d1380ed93fdf..20cf3be9a5e8 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1442,6 +1442,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); -- GitLab From 5f6a0441ca0d5dbf64ee56743028b8a9ca932e48 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:19 +0300 Subject: [PATCH 1145/1398] drm/bridge: tc358767: do no fail on hi-res displays [ Upstream commit cffd2b16c01c3431a7a7dd62e722af33490fc436 ] Do not fail data rates higher than 2.7 and more than 2 lanes. Try to fall back to 2.7Gbps and 2 lanes. Acked-by: Philipp Zabel Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-2-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 44d476ea6d2e..14b4e58f727f 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) -- GitLab From 8d4bfe89aacf5dbe66fa6dd26587c7d11a9f6a7a Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:20 +0300 Subject: [PATCH 1146/1398] drm/bridge: tc358767: filter out too high modes [ Upstream commit 99fc8e963a4c0203dba26a77cf737db6081bca14 ] Pixel clock limitation for DPI is 154 MHz. Do not accept modes with higher pixel clock rate. Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-3-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 14b4e58f727f..97087f962363 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1109,7 +1109,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } -- GitLab From c55908604eccc659840e52a2c17614cfe90178bb Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:21 +0300 Subject: [PATCH 1147/1398] drm/bridge: tc358767: fix DP0_MISC register set [ Upstream commit f3b8adbe1911f66fd3cab1aaa74f0f66b7ceda25 ] Remove shift from TU_SIZE_RECOMMENDED define as it used to calculate max_tu_symbols. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-4-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 97087f962363..4d348800222e 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -716,7 +716,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) * Must be less than tu_size. */ max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: -- GitLab From 1bdfc52c331a149e393fb199f7e361fcb8e3c9ad Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:22 +0300 Subject: [PATCH 1148/1398] drm/bridge: tc358767: fix timing calculations [ Upstream commit 66d1c3b94d5d59e4325e61a78d520f92c043d645 ] Fields in HTIM01 and HTIM02 regs should be even. Recomended thresh_dly value is max_tu_symbol. Remove set of VPCTRL0.VSDELAY as it is related to DSI input interface. Currently driver supports only DPI. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-5-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 4d348800222e..821b93f68eef 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -659,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -668,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -693,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -709,13 +722,6 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | BPC_8); -- GitLab From 8ae615fecee51e7ec7ee62898b2d7072764e18d6 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:23 +0300 Subject: [PATCH 1149/1398] drm/bridge: tc358767: fix AUXDATAn registers access [ Upstream commit 9217c1abbc145a77d65c476cf2004a3df02104c7 ] First four bytes should go to DP0_AUXWDATA0. Due to bug if len > 4 first four bytes was writen to DP0_AUXWDATA1 and all data get shifted by 4 bytes. Fix it. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-6-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 821b93f68eef..6d0772864935 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } -- GitLab From f7170eb80aff7daf2221c94a92ba88dae5f971be Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:24 +0300 Subject: [PATCH 1150/1398] drm/bridge: tc358767: fix 1-lane behavior [ Upstream commit 4dbd6c03fbf88299c573d676838896c6e06aade2 ] Use drm_dp_channel_eq_ok helper Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-7-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 6d0772864935..f64f35cdc2ff 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -819,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -965,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -985,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } -- GitLab From 9301165c46234b06f482589dddd765e33d1136f4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2017 08:01:03 +0200 Subject: [PATCH 1151/1398] drm/omap: Fix error handling path in 'omap_dmm_probe()' [ Upstream commit 8677b1ac2db021ab30bb1fa34f1e56ebe0051ec3 ] If we don't find a matching device node, we must free the memory allocated in 'omap_dmm' a few lines above. Fixes: 7cb0d6c17b96 ("drm/omap: fix TILER on OMAP5") Signed-off-by: Christophe JAILLET Signed-off-by: Tomi Valkeinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 4ceed7a9762f..4b83e9eeab06 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; -- GitLab From d96024440ee5c6126bd080e84e6e3ae5a89a39c5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:17 -0800 Subject: [PATCH 1152/1398] xfs: ubsan fixes [ Upstream commit 22a6c83777ac7c17d6c63891beeeac24cf5da450 ] Fix some complaints from the UBSAN about signed integer addition overflows. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index d31cd1ebd8e9..f3acecf3869d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -391,7 +391,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1295,7 +1295,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1347,7 +1347,7 @@ __xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); -- GitLab From fde77c712ac003f023ea6ba3da30a0284c71ad92 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 28 Nov 2017 08:54:10 -0800 Subject: [PATCH 1153/1398] xfs: Properly retry failed dquot items in case of error during buffer writeback [ Upstream commit 373b0589dc8d58bc09c9a28d03611ae4fb216057 ] Once the inode item writeback errors is already fixed, it's time to fix the same problem in dquot code. Although there were no reports of users hitting this bug in dquot code (at least none I've seen), the bug is there and I was already planning to fix it when the correct approach to fix the inodes part was decided. This patch aims to fix the same problem in dquot code, regarding failed buffers being unable to be resubmitted once they are flush locked. Tested with the recently test-case sent to fstests list by Hou Tao. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dquot.c | 14 +++++++++++--- fs/xfs/xfs_dquot_item.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9d06cc30e875..7a7b3ccf2273 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1004,14 +1004,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* -- GitLab From fa64914313c1437c7ba843581a0b09bc2870bec3 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 17 Nov 2017 19:14:55 -0200 Subject: [PATCH 1154/1398] scsi: aacraid: Prevent crash in case of free interrupt during scsi EH path [ Upstream commit e4717292ddebcfe231651b5aff9fa19ca158d178 ] As part of the scsi EH path, aacraid performs a reinitialization of the adapter, which encompass freeing resources and IRQs, NULLifying lots of pointers, and then initialize it all over again. We've identified a problem during the free IRQ portion of this path if CONFIG_DEBUG_SHIRQ is enabled on kernel config file. Happens that, in case this flag was set, right after free_irq() effectively clears the interrupt, it checks if it was requested as IRQF_SHARED. In positive case, it performs another call to the IRQ handler on driver. Problem is: since aacraid currently free some resources *before* freeing the IRQ, once free_irq() path calls the handler again (due to CONFIG_DEBUG_SHIRQ), aacraid crashes due to NULL pointer dereference with the following trace: aac_src_intr_message+0xf8/0x740 [aacraid] __free_irq+0x33c/0x4a0 free_irq+0x78/0xb0 aac_free_irq+0x13c/0x150 [aacraid] aac_reset_adapter+0x2e8/0x970 [aacraid] aac_eh_reset+0x3a8/0x5d0 [aacraid] scsi_try_host_reset+0x74/0x180 scsi_eh_ready_devs+0xc70/0x1510 scsi_error_handler+0x624/0xa20 This patch prevents the crash by changing the order of the deinitialization in this path of aacraid: first we clear the IRQ, then we free other resources. No functional change intended. Signed-off-by: Guilherme G. Piccoli Reviewed-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/commsup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 0aeecec1f5ea..e2962f15c189 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1416,13 +1416,13 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) * will ensure that i/o is queisced and the card is flushed in that * case. */ + aac_free_irq(aac); aac_fib_map_free(aac); pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys); aac->comm_addr = NULL; aac->comm_phys = 0; kfree(aac->queues); aac->queues = NULL; - aac_free_irq(aac); kfree(aac->fsa_dev); aac->fsa_dev = NULL; quirks = aac_get_driver_ident(index)->quirks; -- GitLab From a248dc6a55b782ec8a86e3a7ee2750e60f428c45 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 20 Nov 2017 08:12:29 -0600 Subject: [PATCH 1155/1398] scsi: ufs: ufshcd: fix potential NULL pointer dereference in ufshcd_config_vreg [ Upstream commit 727535903bea924c4f73abb202c4b3e85fff0ca4 ] _vreg_ is being dereferenced before it is null checked, hence there is a potential null pointer dereference. Fix this by moving the pointer dereference after _vreg_ has been null checked. This issue was detected with the help of Coccinelle. Fixes: aa4976130934 ("ufs: Add regulator enable support") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 530034bc2d13..2e9341233f66 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5327,12 +5327,15 @@ static int ufshcd_config_vreg(struct device *dev, struct ufs_vreg *vreg, bool on) { int ret = 0; - struct regulator *reg = vreg->reg; - const char *name = vreg->name; + struct regulator *reg; + const char *name; int min_uV, uA_load; BUG_ON(!vreg); + reg = vreg->reg; + name = vreg->name; + if (regulator_count_voltages(reg) > 0) { min_uV = on ? vreg->min_uV : 0; ret = regulator_set_voltage(reg, min_uV, vreg->max_uV); -- GitLab From 9adb2a0f9a470b59ccca26e07ed279c11200f0db Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 15 Nov 2017 14:12:30 +0200 Subject: [PATCH 1156/1398] iwlwifi: mvm: fix the TX queue hang timeout for MONITOR vif type [ Upstream commit d1b275ffec459c5ae12b5c7086c84175696e5a9f ] The MONITOR type is missing in the interface type switch. Add it. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index d04babd99b53..ff5ce1ed03c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1040,6 +1040,8 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, return le32_to_cpu(txq_timer->p2p_go); case NL80211_IFTYPE_P2P_DEVICE: return le32_to_cpu(txq_timer->p2p_device); + case NL80211_IFTYPE_MONITOR: + return default_timeout; default: WARN_ON(1); return mvm->cfg->base_params->wd_timeout; -- GitLab From c16c193e3abcf6ce98ac6d1e87c796070fc4fbb7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Nov 2017 11:10:29 -0800 Subject: [PATCH 1157/1398] ARM: dts: NSP: Fix PPI interrupt types [ Upstream commit 5f1aa51c7a1eef1c5a60b8334e32c89904964245 ] Booting a kernel results in the kernel warning us about the following PPI interrupts configuration: [ 0.105127] smp: Bringing up secondary CPUs ... [ 0.110545] GIC: PPI11 is secure or misconfigured [ 0.110551] GIC: PPI13 is secure or misconfigured Fix this by using the appropriate edge configuration for PPI11 and PPI13, this is similar to what was fixed for Northstar (BCM5301X) in commit 0e34079cd1f6 ("ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags"). Fixes: 7b2e987de207 ("ARM: NSP: add minimal Northstar Plus device tree") Fixes: 1a9d53cabaf4 ("ARM: dts: NSP: Add TWD Support to DT") Acked-by: Jon Mason Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-nsp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 7c9e0fae9bb9..65e0db1d3bd7 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -85,7 +85,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; @@ -93,7 +93,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; -- GitLab From b4bfc8ef594a459a23034682a464b6212408f3f0 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 16 Apr 2017 02:51:16 -0400 Subject: [PATCH 1158/1398] media: usbtv: add a new usbid [ Upstream commit 04226916d2360f56d57ad00bc48d2d1854d1e0b0 ] A new usbid of UTV007 is found in a newly bought device. The usbid is 1f71:3301. The ID on the chip is: UTV007 A89029.1 1520L18K1 Both video and audio is tested with the modified usbtv driver. Signed-off-by: Icenowy Zheng Acked-by: Lubomir Rintel Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbtv/usbtv-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c index dc76fd41e00f..0324633ede42 100644 --- a/drivers/media/usb/usbtv/usbtv-core.c +++ b/drivers/media/usb/usbtv/usbtv-core.c @@ -141,6 +141,7 @@ static void usbtv_disconnect(struct usb_interface *intf) static struct usb_device_id usbtv_id_table[] = { { USB_DEVICE(0x1b71, 0x3002) }, + { USB_DEVICE(0x1f71, 0x3301) }, {} }; MODULE_DEVICE_TABLE(usb, usbtv_id_table); -- GitLab From 0e216b0a0f74b06da20c03576daf6e75a77209f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 16:18:28 +0000 Subject: [PATCH 1159/1398] usb: gadget: don't dereference g until after it has been null checked [ Upstream commit b2fc059fa549fe6881d4c1f8d698b0f50bcd16ec ] Avoid dereferencing pointer g until after g has been sanity null checked; move the assignment of cdev much later when it is required into a more local scope. Detected by CoverityScan, CID#1222135 ("Dereference before null check") Fixes: b785ea7ce662 ("usb: gadget: composite: fix ep->maxburst initialization") Signed-off-by: Colin Ian King Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 325bf21ba13b..406758ed0b23 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -150,7 +150,6 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep) { - struct usb_composite_dev *cdev = get_gadget_data(g); struct usb_endpoint_descriptor *chosen_desc = NULL; struct usb_descriptor_header **speed_desc = NULL; @@ -229,8 +228,12 @@ int config_ep_by_speed(struct usb_gadget *g, _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) + if (comp_desc->bMaxBurst != 0) { + struct usb_composite_dev *cdev; + + cdev = get_gadget_data(g); ERROR(cdev, "ep0 bMaxBurst must be 0\n"); + } _ep->maxburst = 1; break; } -- GitLab From ace1911b7620af13e83621049e3df9a53d38fd4e Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 25 Nov 2017 13:32:38 -0600 Subject: [PATCH 1160/1398] staging: rtl8188eu: Fix incorrect response to SIOCGIWESSID [ Upstream commit b77992d2df9e47144354d1b25328b180afa33442 ] When not associated with an AP, wifi device drivers should respond to the SIOCGIWESSID ioctl with a zero-length string for the SSID, which is the behavior expected by dhcpcd. Currently, this driver returns an error code (-1) from the ioctl call, which causes dhcpcd to assume that the device is not a wireless interface and therefore it fails to work correctly with it thereafter. This problem was reported and tested at https://github.com/lwfinger/rtl8188eu/issues/234. Signed-off-by: Larry Finger Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 4de9dbc93380..c7bf8ab26192 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1397,19 +1397,13 @@ static int rtw_wx_get_essid(struct net_device *dev, if ((check_fwstate(pmlmepriv, _FW_LINKED)) || (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) { len = pcur_bss->Ssid.SsidLength; - - wrqu->essid.length = len; - memcpy(extra, pcur_bss->Ssid.Ssid, len); - - wrqu->essid.flags = 1; } else { - ret = -1; - goto exit; + len = 0; + *extra = 0; } - -exit: - + wrqu->essid.length = len; + wrqu->essid.flags = 1; return ret; } -- GitLab From f94b238fb856432ae84b18ddde23518b5e5e358b Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 29 Dec 2017 17:05:43 +0100 Subject: [PATCH 1161/1398] drm/vc4: Move IRQ enable to PM path [ Upstream commit ce9caf2f79a5aa170a4b6456a03db639eed9c988 ] We were calling enable_irq on bind, where it was already enabled previously by the IRQ helper. Additionally, dev->irq is not set correctly until after postinstall and so was always zero here, triggering a warning in 4.15. Fix both by moving the enable to the power management resume path, where we know there was a previous disable invocation during suspend. Fixes: 253696ccd613 ("drm/vc4: Account for interrupts in flight") Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1514563543-32511-1-git-send-email-stschake@gmail.com Tested-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_irq.c | 3 --- drivers/gpu/drm/vc4/vc4_v3d.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index d45a7c0a7915..d96c084d3a76 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,9 +208,6 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - /* Undo the effects of a previous vc4_irq_uninstall. */ - enable_irq(dev->irq); - /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0b..ce7c21d250cf 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -173,6 +173,9 @@ static int vc4_v3d_runtime_resume(struct device *dev) struct vc4_dev *vc4 = v3d->vc4; vc4_v3d_init_hw(vc4->dev); + + /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */ + enable_irq(vc4->dev->irq); vc4_irq_postinstall(vc4->dev); return 0; -- GitLab From 383e0620b70bc654619c12faf3fd3b2384a42a4c Mon Sep 17 00:00:00 2001 From: Dmitry Eremin Date: Thu, 25 Jan 2018 16:51:04 +0300 Subject: [PATCH 1162/1398] staging: lustre: separate a connection destroy from free struct kib_conn commit 9b046013e5837f8a58453d1e9f8e01d03adb7fe7 upstream. The logic of the original commit 4d99b2581eff ("staging: lustre: avoid intensive reconnecting for ko2iblnd") was assumed conditional free of struct kib_conn if the second argument free_conn in function kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) is true. But this hunk of code was dropped from original commit. As result the logic works wrong and current code use struct kib_conn after free. > drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c > 3317 kiblnd_destroy_conn(conn, !peer); > ^^^^ Freed always (but should be conditionally) > 3318 > 3319 spin_lock_irqsave(lock, flags); > 3320 if (!peer) > 3321 continue; > 3322 > 3323 conn->ibc_peer = peer; > ^^^^^^^^^^^^^^ Use after free > 3324 if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) > 3325 list_add_tail(&conn->ibc_list, > ^^^^^^^^^^^^^^ Use after free > 3326 &kiblnd_data.kib_reconn_list); > 3327 else > 3328 list_add_tail(&conn->ibc_list, > ^^^^^^^^^^^^^^ Use after free > 3329 &kiblnd_data.kib_reconn_wait); To avoid confusion this fix moved the freeing a struct kib_conn outside of the function kiblnd_destroy_conn() and free as it was intended in original commit. Fixes: 4d99b2581eff ("staging: lustre: avoid intensive reconnecting for ko2iblnd") Signed-off-by: Dmitry Eremin Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 7 +++---- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 2 +- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 9e8802181452..e8d9db4d8179 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -824,14 +824,15 @@ struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cm return conn; failed_2: - kiblnd_destroy_conn(conn, true); + kiblnd_destroy_conn(conn); + LIBCFS_FREE(conn, sizeof(*conn)); failed_1: LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); failed_0: return NULL; } -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) +void kiblnd_destroy_conn(struct kib_conn *conn) { struct rdma_cm_id *cmid = conn->ibc_cmid; struct kib_peer *peer = conn->ibc_peer; @@ -894,8 +895,6 @@ void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); } - - LIBCFS_FREE(conn, sizeof(*conn)); } int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index 14576977200f..30cb2f5b3c15 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -1018,7 +1018,7 @@ int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, int state, int version); -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn); +void kiblnd_destroy_conn(struct kib_conn *conn); void kiblnd_close_conn(struct kib_conn *conn, int error); void kiblnd_close_conn_locked(struct kib_conn *conn, int error); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 995f2dac7f26..ea9a0c21d29d 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3323,11 +3323,13 @@ kiblnd_connd(void *arg) spin_unlock_irqrestore(lock, flags); dropped_lock = 1; - kiblnd_destroy_conn(conn, !peer); + kiblnd_destroy_conn(conn); spin_lock_irqsave(lock, flags); - if (!peer) + if (!peer) { + kfree(conn); continue; + } conn->ibc_peer = peer; if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) -- GitLab From 55eaecffe3d663d02084023b9fc06d5f39b97389 Mon Sep 17 00:00:00 2001 From: Gaurav Kohli Date: Tue, 23 Jan 2018 13:16:34 +0530 Subject: [PATCH 1163/1398] tty: fix data race between tty_init_dev and flush of buf commit b027e2298bd588d6fa36ed2eda97447fb3eac078 upstream. There can be a race, if receive_buf call comes before tty initialization completes in n_tty_open and tty->disc_data may be NULL. CPU0 CPU1 ---- ---- 000|n_tty_receive_buf_common() n_tty_open() -001|n_tty_receive_buf2() tty_ldisc_open.isra.3() -002|tty_ldisc_receive_buf(inline) tty_ldisc_setup() Using ldisc semaphore lock in tty_init_dev till disc_data initializes completely. Signed-off-by: Gaurav Kohli Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 8 +++++++- drivers/tty/tty_ldisc.c | 4 ++-- include/linux/tty.h | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 734a635e7363..8d9f9a803b42 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1543,6 +1543,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) "%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n", __func__, tty->driver->name); + retval = tty_ldisc_lock(tty, 5 * HZ); + if (retval) + goto err_release_lock; tty->port->itty = tty; /* @@ -1553,6 +1556,7 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) retval = tty_ldisc_setup(tty, tty->link); if (retval) goto err_release_tty; + tty_ldisc_unlock(tty); /* Return the tty locked so that it cannot vanish under the caller */ return tty; @@ -1565,9 +1569,11 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) /* call the tty release_tty routine to clean out this slot */ err_release_tty: - tty_unlock(tty); + tty_ldisc_unlock(tty); tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n", retval, idx); +err_release_lock: + tty_unlock(tty); release_tty(tty, idx); return ERR_PTR(retval); } diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index b0500a0a87b8..3a9e2a2fd4c6 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -336,7 +336,7 @@ static inline void __tty_ldisc_unlock(struct tty_struct *tty) ldsem_up_write(&tty->ldisc_sem); } -static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) +int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; @@ -347,7 +347,7 @@ static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) return 0; } -static void tty_ldisc_unlock(struct tty_struct *tty) +void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); __tty_ldisc_unlock(tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index 40144f382516..a41244fe58d0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -394,6 +394,8 @@ extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); +extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); +extern void tty_ldisc_unlock(struct tty_struct *tty); #else static inline void console_init(void) { } -- GitLab From 2ef0d2ad5ce80410a1f770cf765d3ce6a9ba45e2 Mon Sep 17 00:00:00 2001 From: OKAMOTO Yoshiaki Date: Tue, 16 Jan 2018 09:51:17 +0000 Subject: [PATCH 1164/1398] usb: option: Add support for FS040U modem commit 69341bd15018da0a662847e210f9b2380c71e623 upstream. FS040U modem is manufactured by omega, and sold by Fujisoft. This patch adds ID of the modem to use option1 driver. Interface 3 is used as qmi_wwan, so the interface is ignored. Signed-off-by: Yoshiaki Okamoto Signed-off-by: Hiroyuki Yamamoto Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a818c43a02ec..1799aa058a5b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -383,6 +383,9 @@ static void option_instat_callback(struct urb *urb); #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 #define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 +/* Fujisoft products */ +#define FUJISOFT_PRODUCT_FS040U 0x9b02 + /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -1897,6 +1900,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), .driver_info = (kernel_ulong_t)&four_g_w100_blacklist }, + {USB_DEVICE(LONGCHEER_VENDOR_ID, FUJISOFT_PRODUCT_FS040U), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist}, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, -- GitLab From 068cc4ad2b233eaff1d60552353daee8c047f5e2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 Jan 2018 09:48:55 +0100 Subject: [PATCH 1165/1398] USB: serial: pl2303: new device id for Chilitag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d08dd3f3dd2ae351b793fc5b76abdbf0fd317b12 upstream. This adds a new device id for Chilitag devices to the pl2303 driver. Reported-by: "Chu.Mike [朱堅宜]" Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a51b28379850..3da25ad267a2 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -39,6 +39,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_CHILITAG) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 3b5a15d1dc0d..123289085ee2 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -17,6 +17,7 @@ #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 #define PL2303_PRODUCT_ID_RSAQ3 0xaaa2 +#define PL2303_PRODUCT_ID_CHILITAG 0xaaa8 #define PL2303_PRODUCT_ID_ALDIGA 0x0611 #define PL2303_PRODUCT_ID_MMX 0x0612 #define PL2303_PRODUCT_ID_GPRS 0x0609 -- GitLab From c3b1f3137751ca44d90a416f741794465641b522 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 14 Jan 2018 16:09:00 +0100 Subject: [PATCH 1166/1398] USB: cdc-acm: Do not log urb submission errors on disconnect commit f0386c083c2ce85284dc0b419d7b89c8e567c09f upstream. When disconnected sometimes the cdc-acm driver logs errors like these: [20278.039417] cdc_acm 2-2:2.1: urb 9 failed submission with -19 [20278.042924] cdc_acm 2-2:2.1: urb 10 failed submission with -19 [20278.046449] cdc_acm 2-2:2.1: urb 11 failed submission with -19 [20278.049920] cdc_acm 2-2:2.1: urb 12 failed submission with -19 [20278.053442] cdc_acm 2-2:2.1: urb 13 failed submission with -19 [20278.056915] cdc_acm 2-2:2.1: urb 14 failed submission with -19 [20278.060418] cdc_acm 2-2:2.1: urb 15 failed submission with -19 Silence these by not logging errors when the result is -ENODEV. Signed-off-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ea20b2cc189f..ec8fb55d85f8 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -375,7 +375,7 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) res = usb_submit_urb(acm->read_urbs[index], mem_flags); if (res) { - if (res != -EPERM) { + if (res != -EPERM && res != -ENODEV) { dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); -- GitLab From aa6a93fd0c382cda250876825c981ed270059e50 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 18 Jan 2018 12:13:45 +0100 Subject: [PATCH 1167/1398] CDC-ACM: apply quirk for card reader commit df1cc78a52491f71d8170d513d0f6f114faa1bda upstream. This devices drops random bytes from messages if you talk to it too fast. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ec8fb55d85f8..34d23cc99fbd 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1706,6 +1706,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */ .driver_info = SINGLE_RX_URB, /* firmware bug */ }, + { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ + .driver_info = SINGLE_RX_URB, + }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, -- GitLab From ec719c52af164f2eaff0e5bff3b6074ded70c9bf Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 13 Dec 2017 20:34:36 +0800 Subject: [PATCH 1168/1398] USB: serial: io_edgeport: fix possible sleep-in-atomic commit c7b8f77872c73f69a16528a9eb87afefcccdc18b upstream. According to drivers/usb/serial/io_edgeport.c, the driver may sleep under a spinlock. The function call path is: edge_bulk_in_callback (acquire the spinlock) process_rcvd_data process_rcvd_status change_port_settings send_iosp_ext_cmd write_cmd_usb usb_kill_urb --> may sleep To fix it, the redundant usb_kill_urb() is removed from the error path after usb_submit_urb() fails. This possible bug is found by my static analysis tool (DSAC) and checked by my code review. Signed-off-by: Jia-Ju Bai Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 464db17b5328..de61271f2ba3 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2215,7 +2215,6 @@ static int write_cmd_usb(struct edgeport_port *edge_port, /* something went wrong */ dev_err(dev, "%s - usb_submit_urb(write command) failed, status = %d\n", __func__, status); - usb_kill_urb(urb); usb_free_urb(urb); atomic_dec(&CmdUrbs); return status; -- GitLab From 4c6fcc3425e19d1f63fd5a7f4f4408ce8f688a48 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:07:30 -0700 Subject: [PATCH 1169/1398] usbip: prevent bind loops on devices attached to vhci_hcd commit ef54cf0c600fb8f5737fb001a9e357edda1a1de8 upstream. usbip host binds to devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Unbind followed by bind works, however device is left in a bad state with accesses via the attached busid result in errors and system hangs during shutdown. Fix it to check and bail out if the device is already attached to vhci_hcd. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_bind.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c index fa46141ae68b..e121cfb1746a 100644 --- a/tools/usb/usbip/src/usbip_bind.c +++ b/tools/usb/usbip/src/usbip_bind.c @@ -144,6 +144,7 @@ static int bind_device(char *busid) int rc; struct udev *udev; struct udev_device *dev; + const char *devpath; /* Check whether the device with this bus ID exists. */ udev = udev_new(); @@ -152,8 +153,16 @@ static int bind_device(char *busid) err("device with the specified bus ID does not exist"); return -1; } + devpath = udev_device_get_devpath(dev); udev_unref(udev); + /* If the device is already attached to vhci_hcd - bail out */ + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + err("bind loop detected: device: %s is attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + return -1; + } + rc = unbind_other(busid); if (rc == UNBIND_ST_FAILED) { err("could not unbind driver from device on busid %s", busid); -- GitLab From f80079536bb6b2c733776abb19c1f3e8c48923b6 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:08:03 -0700 Subject: [PATCH 1170/1398] usbip: list: don't list devices attached to vhci_hcd commit ef824501f50846589f02173d73ce3fe6021a9d2a upstream. usbip host lists devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Fix it to check and not list devices that are attached to vhci_hcd. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_list.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index f1b38e866dd7..d65a9f444174 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -187,6 +187,7 @@ static int list_devices(bool parsable) const char *busid; char product_name[128]; int ret = -1; + const char *devpath; /* Create libudev context. */ udev = udev_new(); @@ -209,6 +210,14 @@ static int list_devices(bool parsable) path = udev_list_entry_get_name(dev_list_entry); dev = udev_device_new_from_syspath(udev, path); + /* Ignore devices attached to vhci_hcd */ + devpath = udev_device_get_devpath(dev); + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + dbg("Skip the device %s already attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + continue; + } + /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); -- GitLab From 800de0fab17ac3e8656c86214d23299fedbf6ca4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 Jan 2018 14:46:41 +1100 Subject: [PATCH 1171/1398] USB: serial: simple: add Motorola Tetra driver commit 46fe895e22ab3845515ec06b01eaf1282b342e29 upstream. Add new Motorola Tetra (simple) driver for Motorola Solutions TETRA PEI devices. D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0cad ProdID=9011 Rev=24.16 S: Manufacturer=Motorola Solutions Inc. S: Product=Motorola Solutions TETRA PEI interface C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) Note that these devices do not support the CDC SET_CONTROL_LINE_STATE request (for any interface). Reported-by: Max Schulze Tested-by: Max Schulze Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/Kconfig | 1 + drivers/usb/serial/usb-serial-simple.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index 56ecb8b5115d..584ae8cbaf1c 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -63,6 +63,7 @@ config USB_SERIAL_SIMPLE - Google USB serial devices - HP4x calculators - a number of Motorola phones + - Motorola Tetra devices - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index e98b6e57b703..6aa7ff2c1cf7 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -80,6 +80,11 @@ DEVICE(vivopay, VIVOPAY_IDS); { USB_DEVICE(0x22b8, 0x2c64) } /* Motorola V950 phone */ DEVICE(moto_modem, MOTO_IDS); +/* Motorola Tetra driver */ +#define MOTOROLA_TETRA_IDS() \ + { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ +DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -110,6 +115,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &google_device, &vivopay_device, &moto_modem_device, + &motorola_tetra_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -125,6 +131,7 @@ static const struct usb_device_id id_table[] = { GOOGLE_IDS(), VIVOPAY_IDS(), MOTO_IDS(), + MOTOROLA_TETRA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), -- GitLab From f24d171a8100fb50a20fc6bb8c750fe9c452b9dc Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 9 Jan 2018 12:30:53 +0530 Subject: [PATCH 1172/1398] usb: f_fs: Prevent gadget unbind if it is already unbound commit ce5bf9a50daf2d9078b505aca1cea22e88ecb94a upstream. Upon usb composition switch there is possibility of ep0 file release happening after gadget driver bind. In case of composition switch from adb to a non-adb composition gadget will never gets bound again resulting into failure of usb device enumeration. Fix this issue by checking FFS_FL_BOUND flag and avoid extra gadget driver unbind if it is already done as part of composition switch. This fixes adb reconnection error reported on Android running v4.4 and above kernel versions. Verified on Hikey running vanilla v4.15-rc7 + few out of tree Mali patches. Reviewed-at: https://android-review.googlesource.com/#/c/582632/ Cc: Felipe Balbi Cc: Greg KH Cc: Michal Nazarewicz Cc: John Stultz Cc: Dmitry Shmidt Cc: Badhri Cc: Android Kernel Team Signed-off-by: Hemant Kumar [AmitP: Cherry-picked it from android-4.14 and updated the commit log] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7b107e43b1c4..d90bf57ba30e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3725,7 +3725,8 @@ static void ffs_closed(struct ffs_data *ffs) ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; ffs_dev_unlock(); - unregister_gadget_item(ci); + if (test_bit(FFS_FL_BOUND, &ffs->flags)) + unregister_gadget_item(ci); return; done: ffs_dev_unlock(); -- GitLab From 92e64a1079fac557ebb48ce266d23070924109ef Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 11 Jan 2018 13:10:16 +0100 Subject: [PATCH 1173/1398] usb: uas: unconditionally bring back host after reset commit cbeef22fd611c4f47c494b821b2b105b8af970bb upstream. Quoting Hans: If we return 1 from our post_reset handler, then our disconnect handler will be called immediately afterwards. Since pre_reset blocks all scsi requests our disconnect handler will then hang in the scsi_remove_host call. This is esp. bad because our disconnect handler hanging for ever also stops the USB subsys from enumerating any new USB devices, causes commands like lsusb to hang, etc. In practice this happens when unplugging some uas devices because the hub code may see the device as needing a warm-reset and calls usb_reset_device before seeing the disconnect. In this case uas_configure_endpoints fails with -ENODEV. We do not want to print an error for this, so this commit also silences the shost_printk for -ENODEV. ENDQUOTE However, if we do that we better drop any unconditional execution and report to the SCSI subsystem that we have undergone a reset but we are not operational now. Signed-off-by: Oliver Neukum Reported-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9876af4ab64e..6891e9092775 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -1076,20 +1076,19 @@ static int uas_post_reset(struct usb_interface *intf) return 0; err = uas_configure_endpoints(devinfo); - if (err) { + if (err && err != ENODEV) shost_printk(KERN_ERR, shost, "%s: alloc streams error %d after reset", __func__, err); - return 1; - } + /* we must unblock the host in every case lest we deadlock */ spin_lock_irqsave(shost->host_lock, flags); scsi_report_bus_reset(shost, 0); spin_unlock_irqrestore(shost->host_lock, flags); scsi_unblock_requests(shost); - return 0; + return err ? 1 : 0; } static int uas_suspend(struct usb_interface *intf, pm_message_t message) -- GitLab From 57d4bb1beecb1b34237ddc8a08e819a0da4243b1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 17:50:02 +1100 Subject: [PATCH 1174/1398] usb/gadget: Fix "high bandwidth" check in usb_gadget_ep_match_desc() commit 11fb37998759c48e4e4c200c974593cbeab25d3e upstream. The current code tries to test for bits that are masked out by usb_endpoint_maxp(). Instead, use the proper accessor to access the new high bandwidth bits. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d685d82dcf48..e97539fc127e 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -913,7 +913,7 @@ int usb_gadget_ep_match_desc(struct usb_gadget *gadget, return 0; /* "high bandwidth" works only at high speed */ - if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11)) + if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp_mult(desc) > 1) return 0; switch (type) { -- GitLab From 9df847674ede722e12094def6520fd2ff98452df Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 26 Jan 2018 11:54:35 -0700 Subject: [PATCH 1175/1398] usbip: vhci_hcd: clear just the USB_PORT_STAT_POWER bit Upstream commit 1c9de5bf4286 ("usbip: vhci-hcd: Add USB3 SuperSpeed support") vhci_hcd clears all the bits port_status bits instead of clearing just the USB_PORT_STAT_POWER bit when it handles ClearPortFeature: USB_PORT_FEAT_POWER. This causes vhci_hcd attach to fail in a bad state, leaving device unusable by the client. The device is still attached and however client can't use it. The problem was fixed as part of larger change to add USB3 Super Speed support. This patch backports just the change to clear the USB_PORT_STAT_POWER. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 7f161b095176..dbe615ba07c9 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -300,7 +300,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " ClearPortFeature: USB_PORT_FEAT_POWER\n"); - dum->port_status[rhport] = 0; + dum->port_status[rhport] &= ~USB_PORT_STAT_POWER; dum->resuming = 0; break; case USB_PORT_FEAT_C_RESET: -- GitLab From 5846849a1ac75c4a4898f09f3835ac737bb888d5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jan 2018 15:58:34 -0200 Subject: [PATCH 1176/1398] serial: imx: Only wakeup via RTSDEN bit if the system has RTS/CTS commit 38b1f0fb42f772b8c9aac53593883a18ff5eb9d7 upstream. The wakeup mechanism via RTSDEN bit relies on the system using the RTS/CTS lines, so only allow such wakeup method when the system actually has RTS/CTS support. Fixes: bc85734b126f ("serial: imx: allow waking up on RTSD") Signed-off-by: Fabio Estevam Reviewed-by: Martin Kaiser Acked-by: Fugang Duan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a70356dad1b7..521a6e450755 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2239,12 +2239,14 @@ static void serial_imx_enable_wakeup(struct imx_port *sport, bool on) val &= ~UCR3_AWAKEN; writel(val, sport->port.membase + UCR3); - val = readl(sport->port.membase + UCR1); - if (on) - val |= UCR1_RTSDEN; - else - val &= ~UCR1_RTSDEN; - writel(val, sport->port.membase + UCR1); + if (sport->have_rtscts) { + val = readl(sport->port.membase + UCR1); + if (on) + val |= UCR1_RTSDEN; + else + val &= ~UCR1_RTSDEN; + writel(val, sport->port.membase + UCR1); + } } static int imx_serial_port_suspend_noirq(struct device *dev) -- GitLab From 1333c3e996eb799286ee2ef2c01752da45bf926f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 7 Jan 2018 15:05:49 +0100 Subject: [PATCH 1177/1398] spi: imx: do not access registers while clocks disabled commit d593574aff0ab846136190b1729c151c736727ec upstream. Since clocks are disabled except during message transfer clocks are also disabled when spi_imx_remove gets called. Accessing registers leads to a freeeze at least on a i.MX 6ULL. Enable clocks before disabling accessing the MXC_CSPICTRL register. Fixes: 9e556dcc55774 ("spi: spi-imx: only enable the clocks when we start to transfer a message") Signed-off-by: Stefan Agner Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-imx.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index deb782f6556c..a6e34f05d44d 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1307,12 +1307,23 @@ static int spi_imx_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct spi_imx_data *spi_imx = spi_master_get_devdata(master); + int ret; spi_bitbang_stop(&spi_imx->bitbang); + ret = clk_enable(spi_imx->clk_per); + if (ret) + return ret; + + ret = clk_enable(spi_imx->clk_ipg); + if (ret) { + clk_disable(spi_imx->clk_per); + return ret; + } + writel(0, spi_imx->base + MXC_CSPICTRL); - clk_unprepare(spi_imx->clk_ipg); - clk_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); + clk_disable_unprepare(spi_imx->clk_per); spi_imx_sdma_exit(spi_imx); spi_master_put(master); -- GitLab From 331b057d4f3ccf2290e6e651b5728db81e9249c6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 3 Feb 2018 17:05:43 +0100 Subject: [PATCH 1178/1398] Linux 4.9.80 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4a7e6dff1c2e..9550b6939076 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 79 +SUBLEVEL = 80 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 550c01d0e051461437d6e9d72f573759e7bc5047 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 5 Jan 2018 11:27:07 +0100 Subject: [PATCH 1179/1398] UPSTREAM: ANDROID: binder: remove waitqueue when thread exits. binder_poll() passes the thread->wait waitqueue that can be slept on for work. When a thread that uses epoll explicitly exits using BINDER_THREAD_EXIT, the waitqueue is freed, but it is never removed from the corresponding epoll data structure. When the process subsequently exits, the epoll cleanup code tries to access the waitlist, which results in a use-after-free. Prevent this by using POLLFREE when the thread exits. (cherry picked from commit f5cb779ba16334b45ba8946d6bfa6d9834d1527f) Change-Id: Ib34b1cbb8ab2192d78c3d9956b2f963a66ecad2e Signed-off-by: Martijn Coenen Reported-by: syzbot Cc: stable # 4.14 Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6b4a99162198..bb48a7b6ee2d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4535,6 +4535,18 @@ static int binder_thread_release(struct binder_proc *proc, if (t) spin_lock(&t->lock); } + + /* + * If this thread used poll, make sure we remove the waitqueue + * from any epoll data structures holding it with POLLFREE. + * waitqueue_active() is safe to use here because we're holding + * the inner lock. + */ + if ((thread->looper & BINDER_LOOPER_STATE_POLL) && + waitqueue_active(&thread->wait)) { + wake_up_poll(&thread->wait, POLLHUP | POLLFREE); + } + binder_inner_proc_unlock(thread->proc); if (send_reply) -- GitLab From 20c8a0089294504692c17419fe67381689325d6d Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Tue, 28 Nov 2017 18:22:11 -0800 Subject: [PATCH 1180/1398] ANDROID: qtaguid: Fix the UAF probelm with tag_ref_tree When multiple threads is trying to tag/delete the same socket at the same time, there is a chance the tag_ref_entry of the target socket to be null before the uid_tag_data entry is freed. It is caused by the ctrl_cmd_tag function where it doesn't correctly grab the spinlocks when tagging a socket. Signed-off-by: Chenbo Feng Bug: 65853158 Change-Id: I5d89885918054cf835370a52bff2d693362ac5f0 --- net/netfilter/xt_qtaguid.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index cd8309c0936e..3dfa5a000853 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -519,13 +519,11 @@ static struct tag_ref *get_tag_ref(tag_t full_tag, DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", full_tag); - spin_lock_bh(&uid_tag_data_tree_lock); tr_entry = lookup_tag_ref(full_tag, &utd_entry); BUG_ON(IS_ERR_OR_NULL(utd_entry)); if (!tr_entry) tr_entry = new_tag_ref(full_tag, utd_entry); - spin_unlock_bh(&uid_tag_data_tree_lock); if (utd_res) *utd_res = utd_entry; DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", @@ -2027,6 +2025,7 @@ static int ctrl_cmd_delete(const char *input) /* Delete socket tags */ spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); node = rb_first(&sock_tag_tree); while (node) { st_entry = rb_entry(node, struct sock_tag, sock_node); @@ -2056,6 +2055,7 @@ static int ctrl_cmd_delete(const char *input) list_del(&st_entry->list); } } + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); sock_tag_tree_erase(&st_to_free_tree); @@ -2265,10 +2265,12 @@ static int ctrl_cmd_tag(const char *input) full_tag = combine_atag_with_uid(acct_tag, uid_int); spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); sock_tag_entry = get_sock_stat_nl(el_socket->sk); tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); if (IS_ERR(tag_ref_entry)) { res = PTR_ERR(tag_ref_entry); + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); goto err_put; } @@ -2295,9 +2297,14 @@ static int ctrl_cmd_tag(const char *input) pr_err("qtaguid: ctrl_tag(%s): " "socket tag alloc failed\n", input); + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + tag_ref_entry->num_sock_tags--; + free_tag_ref_from_utd_entry(tag_ref_entry, + uid_tag_data_entry); + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); res = -ENOMEM; - goto err_tag_unref_put; + goto err_put; } /* * Hold the sk refcount here to make sure the sk pointer cannot @@ -2307,7 +2314,6 @@ static int ctrl_cmd_tag(const char *input) sock_tag_entry->sk = el_socket->sk; sock_tag_entry->pid = current->tgid; sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int); - spin_lock_bh(&uid_tag_data_tree_lock); pqd_entry = proc_qtu_data_tree_search( &proc_qtu_data_tree, current->tgid); /* @@ -2325,11 +2331,11 @@ static int ctrl_cmd_tag(const char *input) else list_add(&sock_tag_entry->list, &pqd_entry->sock_tag_list); - spin_unlock_bh(&uid_tag_data_tree_lock); sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); atomic64_inc(&qtu_events.sockets_tagged); } + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); /* We keep the ref to the sk until it is untagged */ CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n", @@ -2338,10 +2344,6 @@ static int ctrl_cmd_tag(const char *input) sockfd_put(el_socket); return 0; -err_tag_unref_put: - BUG_ON(tag_ref_entry->num_sock_tags <= 0); - tag_ref_entry->num_sock_tags--; - free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); err_put: CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n", input, atomic_read(&el_socket->sk->sk_refcnt) - 1); -- GitLab From 8a174b4749d384865b60690cb33cf9f2dc490986 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Wed, 24 Jan 2018 09:25:24 +0000 Subject: [PATCH 1181/1398] sched/fair: prevent possible infinite loop in sched_group_energy There is a race between hotplug and energy_diff which might result in endless loop in sched_group_energy. When this happens, the end condition cannot be detected. We can store how many CPUs we need to visit at the beginning, and bail out of the energy calculation if we visit more cpus than expected. Bug: 72311797 72202633 Change-Id: I8dda75468ee1570da4071cd8165ef5131a8205d8 Signed-off-by: Chris Redpath --- kernel/sched/fair.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b0573f2d8646..bae83f7f1acf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5633,10 +5633,22 @@ static int sched_group_energy(struct energy_env *eenv) { struct cpumask visit_cpus; u64 total_energy = 0; + int cpu_count; WARN_ON(!eenv->sg_top->sge); cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); + /* If a cpu is hotplugged in while we are in this function, + * it does not appear in the existing visit_cpus mask + * which came from the sched_group pointer of the + * sched_domain pointed at by sd_ea for either the prev + * or next cpu and was dereferenced in __energy_diff. + * Since we will dereference sd_scs later as we iterate + * through the CPUs we expect to visit, new CPUs can + * be present which are not in the visit_cpus mask. + * Guard this with cpu_count. + */ + cpu_count = cpumask_weight(&visit_cpus); while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; @@ -5646,6 +5658,8 @@ static int sched_group_energy(struct energy_env *eenv) /* * Is the group utilization affected by cpus outside this * sched_group? + * This sd may have groups with cpus which were not present + * when we took visit_cpus. */ sd = rcu_dereference(per_cpu(sd_scs, cpu)); @@ -5695,8 +5709,24 @@ static int sched_group_energy(struct energy_env *eenv) total_energy += sg_busy_energy + sg_idle_energy; - if (!sd->child) + if (!sd->child) { + /* + * cpu_count here is the number of + * cpus we expect to visit in this + * calculation. If we race against + * hotplug, we can have extra cpus + * added to the groups we are + * iterating which do not appear in + * the visit_cpus mask. In that case + * we are not able to calculate energy + * without restarting so we will bail + * out and use prev_cpu this time. + */ + if (!cpu_count) + return -EINVAL; cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); + cpu_count--; + } if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) goto next_cpu; @@ -5708,6 +5738,9 @@ static int sched_group_energy(struct energy_env *eenv) * If we raced with hotplug and got an sd NULL-pointer; * returning a wrong energy estimation is better than * entering an infinite loop. + * Specifically: If a cpu is unplugged after we took + * the visit_cpus mask, it no longer has an sd_scs + * pointer, so when we dereference it, we get NULL. */ if (cpumask_test_cpu(cpu, &visit_cpus)) return -EINVAL; -- GitLab From be6641a7e6f79b69446e7f1c44bab75bf20f1665 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Jan 2018 03:52:05 +1100 Subject: [PATCH 1182/1398] powerpc/pseries: Add H_GET_CPU_CHARACTERISTICS flags & wrapper commit 191eccb1580939fb0d47deb405b82a85b0379070 upstream. A new hypervisor call has been defined to communicate various characteristics of the CPU to guests. Add definitions for the hcall number, flags and a wrapper function. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman [Balbir fixed conflicts in backport] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/hvcall.h | 17 +++++++++++++++++ arch/powerpc/include/asm/plpar_wrappers.h | 14 ++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf147..0e12cb2437d1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -240,6 +240,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -306,6 +307,17 @@ #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + #ifndef __ASSEMBLY__ /** @@ -433,6 +445,11 @@ static inline unsigned long cmo_get_page_size(void) } #endif /* CONFIG_PPC_PSERIES */ +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2..4e53b8570d1f 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ -- GitLab From 8fd3f98d0f4d5a9044b900de950ab02164968d27 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1183/1398] powerpc/64: Add macros for annotating the destination of rfid/hrfid commit 50e51c13b3822d14ff6df4279423e4b7b2269bc3 upstream. The rfid/hrfid ((Hypervisor) Return From Interrupt) instruction is used for switching from the kernel to userspace, and from the hypervisor to the guest kernel. However it can and is also used for other transitions, eg. from real mode kernel code to virtual mode kernel code, and it's not always clear from the code what the destination context is. To make it clearer when reading the code, add macros which encode the expected destination context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/exception-64e.h | 6 +++++ arch/powerpc/include/asm/exception-64s.h | 29 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b6..555e22d5e07f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9a3eee661297..6771cbe44594 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -51,6 +51,35 @@ #define EX_PPR 88 /* SMT thread status register (priority) */ #define EX_CTR 96 +/* Macros for annotating the expected destination of (h)rfid */ + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + rfid + +#define RFI_TO_USER_OR_KERNEL \ + rfid + +#define RFI_TO_GUEST \ + rfid + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + hrfid + +#define HRFI_TO_USER_OR_KERNEL \ + hrfid + +#define HRFI_TO_GUEST \ + hrfid + +#define HRFI_TO_UNKNOWN \ + hrfid + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ -- GitLab From 9d914324d966497f4d40cfa9333cbe55150cc09b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1184/1398] powerpc/64: Convert fast_exception_return to use RFI_TO_USER/KERNEL commit a08f828cf47e6c605af21d2cdec68f84e799c318 upstream. Similar to the syscall return path, in fast_exception_return we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index caa659671599..d267d3bcde90 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -859,7 +859,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -872,8 +872,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ - rfid +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ -- GitLab From 00e40620a51ebee4ea002ec2efcd64f1960cb964 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1185/1398] powerpc/64: Convert the syscall exit path to use RFI_TO_USER/KERNEL commit b8e90cb7bc04a509e821e82ab6ed7a8ef11ba333 upstream. In the syscall exit path we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d267d3bcde90..c33b69d10919 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -251,13 +251,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ syscall_error: -- GitLab From 48cc95d4e4d6a1265b7f728182d6dc62de849b05 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1186/1398] powerpc/64s: Convert slb_miss_common to use RFI_TO_USER/KERNEL commit c7305645eb0c1621351cfc104038831ae87c0053 upstream. In the SLB miss handler we may be returning to user or kernel. We need to add a check early on and save the result in the cr4 register, and then we bifurcate the return path based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Nicholas Piggin [mpe: Backport to 4.4 based on patch from Balbir] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fd68e19b9ef7..fc72f81411c4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ beq- 2f + andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */ + bne 1f /* All done -- return from exception. */ @@ -671,7 +673,23 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_KERNEL + b . /* prevent speculative execution */ + +1: +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_USER b . /* prevent speculative execution */ 2: mfspr r11,SPRN_SRR0 @@ -679,7 +697,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: mfspr r11,SPRN_SRR0 -- GitLab From c3b82ebee6e0d92431c92ee80393c023d550c8a1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1187/1398] powerpc/64s: Add support for RFI flush of L1-D cache commit aa8a5e0062ac940f7659394f4817c948dc8c0667 upstream. On some CPUs we can prevent the Meltdown vulnerability by flushing the L1-D cache on exit from kernel to user mode, and from hypervisor to guest. This is known to be the case on at least Power7, Power8 and Power9. At this time we do not know the status of the vulnerability on other CPUs such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale CPUs. As more information comes to light we can enable this, or other mechanisms on those CPUs. The vulnerability occurs when the load of an architecturally inaccessible memory region (eg. userspace load of kernel memory) is speculatively executed to the point where its result can influence the address of a subsequent speculatively executed load. In order for that to happen, the first load must hit in the L1, because before the load is sent to the L2 the permission check is performed. Therefore if no kernel addresses hit in the L1 the vulnerability can not occur. We can ensure that is the case by flushing the L1 whenever we return to userspace. Similarly for hypervisor vs guest. In order to flush the L1-D cache on exit, we add a section of nops at each (h)rfi location that returns to a lower privileged context, and patch that with some sequence. Newer firmwares are able to advertise to us that there is a special nop instruction that flushes the L1-D. If we do not see that advertised, we fall back to doing a displacement flush in software. For guest kernels we support migration between some CPU versions, and different CPUs may use different flush instructions. So that we are prepared to migrate to a machine with a different flush instruction activated, we may have to patch more than one flush instruction at boot if the hypervisor tells us to. In the end this patch is mostly the work of Nicholas Piggin and Michael Ellerman. However a cast of thousands contributed to analysis of the issue, earlier versions of the patch, back ports testing etc. Many thanks to all of them. Tested-by: Jon Masters Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman [Balbir - back ported to stable with changes] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/exception-64s.h | 40 ++++++++--- arch/powerpc/include/asm/feature-fixups.h | 15 ++++ arch/powerpc/include/asm/paca.h | 10 +++ arch/powerpc/include/asm/setup.h | 13 ++++ arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/kernel/exceptions-64s.S | 86 +++++++++++++++++++++++ arch/powerpc/kernel/setup_64.c | 79 +++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 9 +++ arch/powerpc/lib/feature-fixups.c | 42 +++++++++++ 9 files changed, 290 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 6771cbe44594..cab6d2a46c41 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -51,34 +51,58 @@ #define EX_PPR 88 /* SMT thread status register (priority) */ #define EX_CTR 96 -/* Macros for annotating the expected destination of (h)rfid */ +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop #define RFI_TO_KERNEL \ rfid #define RFI_TO_USER \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_GUEST \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define HRFI_TO_KERNEL \ hrfid #define HRFI_TO_USER \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_GUEST \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index ddf54f5bbdd1..7b332342071c 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -189,4 +189,19 @@ void apply_feature_fixups(void); void setup_feature_keys(void); #endif +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + +#ifndef __ASSEMBLY__ + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + +#endif + #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6a6792bb39fb..ea43897183fd 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -205,6 +205,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[13] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 654d64c9f3ac..6825a67cc3db 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c833d88c423d..64bcbd580495 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -240,6 +240,10 @@ int main(void) #ifdef CONFIG_PPC_BOOK3S_64 DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); + DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area)); + DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi)); + DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence)); + DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets)); #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fc72f81411c4..96db6c3adebe 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1594,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) bl kernel_bad_stack b 1b + .globl rfi_flush_fallback +rfi_flush_fallback: + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + + .globl hrfi_flush_fallback +hrfi_flush_fallback: + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Called from arch_local_irq_enable when an interrupt needs * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a12be60181bf..849d086288c6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -678,4 +678,83 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +bool rfi_flush; + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.dsize; + limit = min(safe_stack_limit(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + rfi_flush_enable(enable); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7394b770ae1f..b61fb7902018 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RODATA +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 043415f0bdb1..e86bfa111f3c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include #include #include +#include struct fixup_entry { unsigned long mask; @@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; -- GitLab From 0ef9f8289edf1d335fb2bd3c162521528823b585 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1188/1398] powerpc/64s: Support disabling RFI flush with no_rfi_flush and nopti commit bc9c9304a45480797e13a8e1df96ffcf44fb62fe upstream. Because there may be some performance overhead of the RFI flush, add kernel command line options to disable it. We add a sensibly named 'no_rfi_flush' option, but we also hijack the x86 option 'nopti'. The RFI flush is not the same as KPTI, but if we see 'nopti' we can guess that the user is trying to avoid any overhead of Meltdown mitigations, and it means we don't have to educate every one about a different command line option. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 849d086288c6..e728bb437fbd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -682,8 +682,29 @@ early_initcall(disable_hardlockup_detector); #ifdef CONFIG_PPC_BOOK3S_64 static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; +static bool no_rfi_flush; bool rfi_flush; +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + static void do_nothing(void *unused) { /* @@ -754,7 +775,8 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - rfi_flush_enable(enable); + if (!no_rfi_flush) + rfi_flush_enable(enable); } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif -- GitLab From 7db0fff62f52c3f23c39262b9e037d8b43dfc88d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1189/1398] powerpc/pseries: Query hypervisor for RFI flush settings commit 8989d56878a7735dfdb234707a2fee6faf631085 upstream. A new hypervisor call is available which tells the guest settings related to the RFI flush. Use it to query the appropriate flush instruction(s), and whether the flush is required. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/setup.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 97aa3f332f24..1845fc611912 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); -- GitLab From 6aec12e1869e31839f317c02f81a92c393222f71 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 1190/1398] powerpc/powernv: Check device-tree for RFI flush settings commit 6e032b350cd1fdb830f18f8320ef0e13b4e24094 upstream. New device-tree properties are available which tell the hypervisor settings related to the RFI flush. Use them to determine the appropriate flush instruction to use, and whether the flush is required. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/setup.c | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index b33faa0015cc..6f8b4c19373a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,13 +35,63 @@ #include #include #include +#include +#include #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); -- GitLab From 1f0c936f431d98611fff5ef7082380f087da1578 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 21:20:05 +1100 Subject: [PATCH 1191/1398] powerpc/64s: Wire up cpu_show_meltdown() commit fd6e440f20b1a4304553775fc55938848ff617c9 upstream. The recent commit 87590ce6e373 ("sysfs/cpu: Add vulnerability folder") added a generic folder and set of files for reporting information on CPU vulnerabilities. One of those was for meltdown: /sys/devices/system/cpu/vulnerabilities/meltdown This commit wires up that file for 64-bit Book3S powerpc. For now we default to "Vulnerable" unless the RFI flush is enabled. That may not actually be true on all hardware, further patches will refine the reporting based on the CPU/platform etc. But for now we default to being pessimists. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/setup_64.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6eda5abbd719..0a6bb48854e3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,6 +128,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_TIME_VSYSCALL_OLD select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e728bb437fbd..79136dc981be 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -778,5 +778,13 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ #endif -- GitLab From 9fed3978c39b27b27eeb676301b7ba960a74170c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 22:17:18 +1100 Subject: [PATCH 1192/1398] powerpc/64s: Allow control of RFI flush via debugfs commit 236003e6b5443c45c18e613d2b0d776a9f87540e upstream. Expose the state of the RFI flush (enabled/disabled) via debugfs, and allow it to be enabled/disabled at runtime. eg: $ cat /sys/kernel/debug/powerpc/rfi_flush 1 $ echo 0 > /sys/kernel/debug/powerpc/rfi_flush $ cat /sys/kernel/debug/powerpc/rfi_flush 0 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 79136dc981be..7c30a91c1f86 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -779,6 +780,35 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) rfi_flush_enable(enable); } +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (rfi_flush) -- GitLab From 781a2d683110864fc136d85df7a2fb3fc145e5e8 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Wed, 10 Jan 2018 17:41:10 +0100 Subject: [PATCH 1193/1398] auxdisplay: img-ascii-lcd: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 09c479f7f1fbfaf848e5813996793966cd50be81 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/auxdisplay/img-ascii-lcd.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/img-ascii-lcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index 83f1439e57fd..6e8eaa7fe7a6 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -442,3 +442,7 @@ static struct platform_driver img_ascii_lcd_driver = { .remove = img_ascii_lcd_remove, }; module_platform_driver(img_ascii_lcd_driver); + +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); +MODULE_AUTHOR("Paul Burton "); +MODULE_LICENSE("GPL"); -- GitLab From 0ee4f5e7bbffc07d98cb7626446175700ef5fcce Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:58:03 -0800 Subject: [PATCH 1194/1398] pinctrl: pxa: pxa2xx: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 0b9335cbd38e3bd2025bcc23b5758df4ac035f75 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/pinctrl/pxa/pinctrl-pxa2xx.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pxa/pinctrl-pxa2xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c index 866aa3ce1ac9..6cf0006d4c8d 100644 --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); + +MODULE_AUTHOR("Robert Jarzmik "); +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); +MODULE_LICENSE("GPL v2"); -- GitLab From 374c84de94af798cad81b5e09b81e901966a4eb0 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Sun, 19 Nov 2017 23:45:49 -0800 Subject: [PATCH 1195/1398] ASoC: pcm512x: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 0cab20cec0b663b7be8e2be5998d5a4113647f86 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/codecs/snd-soc-pcm512x-spi.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/pcm512x-spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 712ed6598c48..ebdf9bd5a64c 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { }; module_spi_driver(pcm512x_spi_driver); + +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL v2"); -- GitLab From 0a61cd6caed72ecac8cbb637f6498e17a33d73b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 29 Jan 2018 18:16:55 -0800 Subject: [PATCH 1196/1398] kaiser: fix intel_bts perf crashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI; Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7: honggfuzz -f /tmp/somedirectorywithatleastonefile \ --linux_perf_bts_edge -s -- /bin/true (honggfuzz from https://github.com/google/honggfuzz) crashed with BUG: unable to handle kernel paging request at ffff9d3215100000 (then narrowed it down to perf record --per-thread -e intel_bts//u -- /bin/ls). The intel_bts driver does not use the 'normal' BTS buffer which is exposed through kaiser_add_mapping(), but instead uses the memory allocated for the perf AUX buffer. This obviously comes apart when using PTI, because then the kernel mapping, which includes that AUX buffer memory, disappears while switched to user page tables. Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping() to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit 99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now. Slightly reorganized surrounding code in bts_buffer_setup_aux(), so it can better match bts_buffer_free_aux(): free_aux with an #ifdef to avoid the loop when PTI is off, but setup_aux needs to loop anyway (and kaiser_add_mapping() is cheap when PTI config is off or "pti=off"). Reported-by: Vince Weaver Reported-by: Robert Święcki Analyzed-by: Peter Zijlstra Analyzed-by: Stephane Eranian Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Vince Weaver Cc: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/bts.c | 44 +++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e31daca..21298c173b0e 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) return 1 << (PAGE_SHIFT + page_private(page)); } +static void bts_buffer_free_aux(void *data) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct bts_buffer *buf = data; + int nbuf; + + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { + struct page *page = buf->buf[nbuf].page; + void *kaddr = page_address(page); + size_t page_size = buf_size(page); + + kaiser_remove_mapping((unsigned long)kaddr, page_size); + } +#endif + kfree(data); +} + static void * bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) { @@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) buf->real_size = size - size % BTS_RECORD_SIZE; for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { - unsigned int __nr_pages; + void *kaddr = pages[pg]; + size_t page_size; + + page = virt_to_page(kaddr); + page_size = buf_size(page); + + if (kaiser_add_mapping((unsigned long)kaddr, + page_size, __PAGE_KERNEL) < 0) { + buf->nr_bufs = nbuf; + bts_buffer_free_aux(buf); + return NULL; + } - page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; buf->buf[nbuf].size -= pad; - pg += __nr_pages; - offset += __nr_pages << PAGE_SHIFT; + pg += page_size >> PAGE_SHIFT; + offset += page_size; } return buf; } -static void bts_buffer_free_aux(void *data) -{ - kfree(data); -} - static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) { return buf->buf[idx].offset + buf->buf[idx].displacement; -- GitLab From ae1fc8de51b10dba40cbd54959b5ba0a311c0861 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 29 Jan 2018 18:17:26 -0800 Subject: [PATCH 1197/1398] x86/pti: Make unpoison of pgd for trusted boot work for real commit 445b69e3b75e42362a5bdc13c8b8f61599e2228a upstream The inital fix for trusted boot and PTI potentially misses the pgd clearing if pud_alloc() sets a PGD. It probably works in *practice* because for two adjacent calls to map_tboot_page() that share a PGD entry, the first will clear NX, *then* allocate and set the PGD (without NX clear). The second call will *not* allocate but will clear the NX bit. Defer the NX clearing to a point after it is known that all top-level allocations have occurred. Add a comment to clarify why. [ tglx: Massaged changelog ] [ hughd notes: I have not tested tboot, but this looks to me as necessary and as safe in old-Kaiser backports as it is upstream; I'm not submitting the commit-to-be-fixed 262b6b30087, since it was undone by 445b69e3b75e, and makes conflict trouble because of 5-level's p4d versus 4-level's pgd.] Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Andrea Arcangeli Cc: Jon Masters Cc: Tim Chen Cc: gnomes@lxorguk.ukuu.org.uk Cc: peterz@infradead.org Cc: ning.sun@intel.com Cc: tboot-devel@lists.sourceforge.net Cc: andi@firstfloor.org Cc: luto@kernel.org Cc: law@redhat.com Cc: pbonzini@redhat.com Cc: torvalds@linux-foundation.org Cc: gregkh@linux-foundation.org Cc: dwmw@amazon.co.uk Cc: nickc@redhat.com Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com Cc: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tboot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 8402907825b0..21454e254a4c 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); + + /* + * PTI poisons low addresses in the kernel page tables in the + * name of making them unusable for userspace. To execute + * code at such a low address, the poison must be cleared. + * + * Note: 'pgd' actually gets set in pud_alloc(). + */ + pgd->pgd &= ~_PAGE_NX; + return 0; } -- GitLab From 400d3c8b0c7f6c942cd9bf201da1f32da9bdcc9f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 29 Jan 2018 18:17:58 -0800 Subject: [PATCH 1198/1398] kaiser: allocate pgd with order 0 when pti=off The 4.9.77 version of "x86/pti/efi: broken conversion from efi to kernel page table" looked nicer than the 4.4.112 version, but was suboptimal on machines booted with "pti=off" (or on AMD machines): it allocated pgd with an order 1 page whatever the setting of kaiser_enabled. Fix that by moving the definition of PGD_ALLOCATION_ORDER from asm/pgalloc.h to asm/pgtable.h, which already defines kaiser_enabled. Fixes: 1b92c48a2eeb ("x86/pti/efi: broken conversion from efi to kernel page table") Reviewed-by: Pavel Tatashin Cc: Steven Sistare Cc: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgalloc.h | 11 ----------- arch/x86/include/asm/pgtable.h | 6 ++++++ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 1178a51b77f3..b6d425999f99 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; -#ifdef CONFIG_PAGE_TABLE_ISOLATION -/* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif - /* * Allocate and free page tables. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2536f90cd30c..5af0401ccff2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -20,9 +20,15 @@ #ifdef CONFIG_PAGE_TABLE_ISOLATION extern int kaiser_enabled; +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ #else #define kaiser_enabled 0 #endif +#define PGD_ALLOCATION_ORDER kaiser_enabled void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_checkwx(void); -- GitLab From ffcf167d348ee5e8ddbe2c4116b71ebfff2258e0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 11 Jan 2018 18:57:26 +0100 Subject: [PATCH 1199/1398] serial: core: mark port as initialized after successful IRQ change commit 44117a1d1732c513875d5a163f10d9adbe866c08 upstream. setserial changes the IRQ via uart_set_info(). It invokes uart_shutdown() which free the current used IRQ and clear TTY_PORT_INITIALIZED. It will then update the IRQ number and invoke uart_startup() before returning to the caller leaving TTY_PORT_INITIALIZED cleared. The next open will crash with | list_add double add: new=ffffffff839fcc98, prev=ffffffff839fcc98, next=ffffffff839fcc98. since the close from the IOCTL won't free the IRQ (and clean the list) due to the TTY_PORT_INITIALIZED check in uart_shutdown(). There is same pattern in uart_do_autoconfig() and I *think* it also needs to set TTY_PORT_INITIALIZED there. Is there a reason why uart_startup() does not set the flag by itself after the IRQ has been acquired (since it is cleared in uart_shutdown)? Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f2303f390345..23973a8124fc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -965,6 +965,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, } } else { retval = uart_startup(tty, state, 1); + if (retval == 0) + tty_port_set_initialized(port, true); if (retval > 0) retval = 0; } -- GitLab From 7d3d60ef2256f5fcfd4929963dd67f58028ebeee Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 31 Jan 2018 16:29:30 +0200 Subject: [PATCH 1200/1398] ip6mr: fix stale iterator [ Upstream commit 4adfa79fc254efb7b0eb3cd58f62c2c3f805f1ba ] When we dump the ip6mr mfc entries via proc, we initialize an iterator with the table to dump but we don't clear the cache pointer which might be initialized from a prior read on the same descriptor that ended. This can result in lock imbalance (an unnecessary unlock) leading to other crashes and hangs. Clear the cache pointer like ipmr does to fix the issue. Thanks for the reliable reproducer. Here's syzbot's trace: WARNING: bad unlock balance detected! 4.15.0-rc3+ #128 Not tainted syzkaller971460/3195 is trying to release lock (mrt_lock) at: [<000000006898068d>] ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553 but there are no more locks to release! other info that might help us debug this: 1 lock held by syzkaller971460/3195: #0: (&p->lock){+.+.}, at: [<00000000744a6565>] seq_read+0xd5/0x13d0 fs/seq_file.c:165 stack backtrace: CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_unlock_imbalance_bug+0x12f/0x140 kernel/locking/lockdep.c:3561 __lock_release kernel/locking/lockdep.c:3775 [inline] lock_release+0x5f9/0xda0 kernel/locking/lockdep.c:4023 __raw_read_unlock include/linux/rwlock_api_smp.h:225 [inline] _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255 ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553 traverse+0x3bc/0xa00 fs/seq_file.c:135 seq_read+0x96a/0x13d0 fs/seq_file.c:189 proc_reg_read+0xef/0x170 fs/proc/inode.c:217 do_loop_readv_writev fs/read_write.c:673 [inline] do_iter_read+0x3db/0x5b0 fs/read_write.c:897 compat_readv+0x1bf/0x270 fs/read_write.c:1140 do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189 C_SYSC_preadv fs/read_write.c:1209 [inline] compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125 RIP: 0023:0xf7f73c79 RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0 RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 BUG: sleeping function called from invalid context at lib/usercopy.c:25 in_atomic(): 1, irqs_disabled(): 0, pid: 3195, name: syzkaller971460 INFO: lockdep is turned off. CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6060 __might_sleep+0x95/0x190 kernel/sched/core.c:6013 __might_fault+0xab/0x1d0 mm/memory.c:4525 _copy_to_user+0x2c/0xc0 lib/usercopy.c:25 copy_to_user include/linux/uaccess.h:155 [inline] seq_read+0xcb4/0x13d0 fs/seq_file.c:279 proc_reg_read+0xef/0x170 fs/proc/inode.c:217 do_loop_readv_writev fs/read_write.c:673 [inline] do_iter_read+0x3db/0x5b0 fs/read_write.c:897 compat_readv+0x1bf/0x270 fs/read_write.c:1140 do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189 C_SYSC_preadv fs/read_write.c:1209 [inline] compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125 RIP: 0023:0xf7f73c79 RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0 RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 WARNING: CPU: 1 PID: 3195 at lib/usercopy.c:26 _copy_to_user+0xb5/0xc0 lib/usercopy.c:26 Reported-by: syzbot Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 117405dd07a3..a30e7e925c9b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(-ENOENT); it->mrt = mrt; + it->cache = NULL; return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } -- GitLab From ce43c07fcef8cf5cdbc7a45db7a6b4a1a7473905 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2018 10:26:57 -0800 Subject: [PATCH 1201/1398] net: igmp: add a missing rcu locking section [ Upstream commit e7aadb27a5415e8125834b84a74477bfbee4eff5 ] Newly added igmpv3_get_srcaddr() needs to be called under rcu lock. Timer callbacks do not ensure this locking. ============================= WARNING: suspicious RCU usage 4.15.0+ #200 Not tainted ----------------------------- ./include/linux/inetdevice.h:216 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by syzkaller616973/4074: #0: (&mm->mmap_sem){++++}, at: [<00000000bfce669e>] __do_page_fault+0x32d/0xc90 arch/x86/mm/fault.c:1355 #1: ((&im->timer)){+.-.}, at: [<00000000619d2f71>] lockdep_copy_map include/linux/lockdep.h:178 [inline] #1: ((&im->timer)){+.-.}, at: [<00000000619d2f71>] call_timer_fn+0x1c6/0x820 kernel/time/timer.c:1316 #2: (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] spin_lock_bh include/linux/spinlock.h:315 [inline] #2: (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] igmpv3_send_report+0x98/0x5b0 net/ipv4/igmp.c:600 stack backtrace: CPU: 0 PID: 4074 Comm: syzkaller616973 Not tainted 4.15.0+ #200 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4592 __in_dev_get_rcu include/linux/inetdevice.h:216 [inline] igmpv3_get_srcaddr net/ipv4/igmp.c:329 [inline] igmpv3_newpack+0xeef/0x12e0 net/ipv4/igmp.c:389 add_grhead.isra.27+0x235/0x300 net/ipv4/igmp.c:432 add_grec+0xbd3/0x1170 net/ipv4/igmp.c:565 igmpv3_send_report+0xd5/0x5b0 net/ipv4/igmp.c:605 igmp_send_report+0xc43/0x1050 net/ipv4/igmp.c:722 igmp_timer_expire+0x322/0x5c0 net/ipv4/igmp.c:831 call_timer_fn+0x228/0x820 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:541 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:938 Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9c7a4cea1628..7f5fe07d0b13 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; + + rcu_read_lock(); pip->saddr = igmpv3_get_srcaddr(dev, &fl4); + rcu_read_unlock(); + pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); -- GitLab From 97fe899816a68e3970b877e766bcb8d66013076b Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 29 Jan 2018 17:53:42 +0800 Subject: [PATCH 1202/1398] qlcnic: fix deadlock bug [ Upstream commit 233ac3891607f501f08879134d623b303838f478 ] The following soft lockup was caught. This is a deadlock caused by recusive locking. Process kworker/u40:1:28016 was holding spin lock "mbx->queue_lock" in qlcnic_83xx_mailbox_worker(), while a softirq came in and ask the same spin lock in qlcnic_83xx_enqueue_mbx_cmd(). This lock should be hold by disable bh.. [161846.962125] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/u40:1:28016] [161846.962367] Modules linked in: tun ocfs2 xen_netback xen_blkback xen_gntalloc xen_gntdev xen_evtchn xenfs xen_privcmd autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs bnx2fc fcoe libfcoe libfc sunrpc 8021q mrp garp bridge stp llc bonding dm_round_robin dm_multipath iTCO_wdt iTCO_vendor_support pcspkr sb_edac edac_core i2c_i801 shpchp lpc_ich mfd_core ioatdma ipmi_devintf ipmi_si ipmi_msghandler sg ext4 jbd2 mbcache2 sr_mod cdrom sd_mod igb i2c_algo_bit i2c_core ahci libahci megaraid_sas ixgbe dca ptp pps_core vxlan udp_tunnel ip6_udp_tunnel qla2xxx scsi_transport_fc qlcnic crc32c_intel be2iscsi bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi ipv6 cxgb3 mdio libiscsi_tcp qla4xxx iscsi_boot_sysfs libiscsi scsi_transport_iscsi dm_mirror dm_region_hash dm_log dm_mod [161846.962454] [161846.962460] CPU: 1 PID: 28016 Comm: kworker/u40:1 Not tainted 4.1.12-94.5.9.el6uek.x86_64 #2 [161846.962463] Hardware name: Oracle Corporation SUN SERVER X4-2L /ASSY,MB,X4-2L , BIOS 26050100 09/19/2017 [161846.962489] Workqueue: qlcnic_mailbox qlcnic_83xx_mailbox_worker [qlcnic] [161846.962493] task: ffff8801f2e34600 ti: ffff88004ca5c000 task.ti: ffff88004ca5c000 [161846.962496] RIP: e030:[] [] xen_hypercall_sched_op+0xa/0x20 [161846.962506] RSP: e02b:ffff880202e43388 EFLAGS: 00000206 [161846.962509] RAX: 0000000000000000 RBX: ffff8801f6996b70 RCX: ffffffff810013aa [161846.962511] RDX: ffff880202e433cc RSI: ffff880202e433b0 RDI: 0000000000000003 [161846.962513] RBP: ffff880202e433d0 R08: 0000000000000000 R09: ffff8801fe893200 [161846.962516] R10: ffff8801fe400538 R11: 0000000000000206 R12: ffff880202e4b000 [161846.962518] R13: 0000000000000050 R14: 0000000000000001 R15: 000000000000020d [161846.962528] FS: 0000000000000000(0000) GS:ffff880202e40000(0000) knlGS:ffff880202e40000 [161846.962531] CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [161846.962533] CR2: 0000000002612640 CR3: 00000001bb796000 CR4: 0000000000042660 [161846.962536] Stack: [161846.962538] ffff880202e43608 0000000000000000 ffffffff813f0442 ffff880202e433b0 [161846.962543] 0000000000000000 ffff880202e433cc ffffffff00000001 0000000000000000 [161846.962547] 00000009813f03d6 ffff880202e433e0 ffffffff813f0460 ffff880202e43440 [161846.962552] Call Trace: [161846.962555] [161846.962565] [] ? xen_poll_irq_timeout+0x42/0x50 [161846.962570] [] xen_poll_irq+0x10/0x20 [161846.962578] [] xen_lock_spinning+0xe2/0x110 [161846.962583] [] __raw_callee_save_xen_lock_spinning+0x11/0x20 [161846.962592] [] ? _raw_spin_lock+0x57/0x80 [161846.962609] [] qlcnic_83xx_enqueue_mbx_cmd+0x7c/0xe0 [qlcnic] [161846.962623] [] qlcnic_83xx_issue_cmd+0x58/0x210 [qlcnic] [161846.962636] [] qlcnic_83xx_sre_macaddr_change+0x162/0x1d0 [qlcnic] [161846.962649] [] qlcnic_83xx_change_l2_filter+0x2b/0x30 [qlcnic] [161846.962657] [] ? __skb_flow_dissect+0x18b/0x650 [161846.962670] [] qlcnic_send_filter+0x205/0x250 [qlcnic] [161846.962682] [] qlcnic_xmit_frame+0x547/0x7b0 [qlcnic] [161846.962691] [] xmit_one+0x82/0x1a0 [161846.962696] [] dev_hard_start_xmit+0x50/0xa0 [161846.962701] [] sch_direct_xmit+0x112/0x220 [161846.962706] [] __dev_queue_xmit+0x1df/0x5e0 [161846.962710] [] dev_queue_xmit_sk+0x13/0x20 [161846.962721] [] bond_dev_queue_xmit+0x35/0x80 [bonding] [161846.962729] [] __bond_start_xmit+0x1cb/0x210 [bonding] [161846.962736] [] bond_start_xmit+0x31/0x60 [bonding] [161846.962740] [] xmit_one+0x82/0x1a0 [161846.962745] [] dev_hard_start_xmit+0x50/0xa0 [161846.962749] [] __dev_queue_xmit+0x4ee/0x5e0 [161846.962754] [] dev_queue_xmit_sk+0x13/0x20 [161846.962760] [] vlan_dev_hard_start_xmit+0xb2/0x150 [8021q] [161846.962764] [] xmit_one+0x82/0x1a0 [161846.962769] [] dev_hard_start_xmit+0x50/0xa0 [161846.962773] [] __dev_queue_xmit+0x4ee/0x5e0 [161846.962777] [] dev_queue_xmit_sk+0x13/0x20 [161846.962789] [] br_dev_queue_push_xmit+0x54/0xa0 [bridge] [161846.962797] [] br_forward_finish+0x2f/0x90 [bridge] [161846.962807] [] ? ttwu_do_wakeup+0x1d/0x100 [161846.962811] [] ? __alloc_skb+0x8b/0x1f0 [161846.962818] [] __br_forward+0x8d/0x120 [bridge] [161846.962822] [] ? __kmalloc_reserve+0x3b/0xa0 [161846.962829] [] ? update_rq_runnable_avg+0xee/0x230 [161846.962836] [] br_forward+0x96/0xb0 [bridge] [161846.962845] [] br_handle_frame_finish+0x1ae/0x420 [bridge] [161846.962853] [] br_handle_frame+0x17f/0x260 [bridge] [161846.962862] [] ? br_handle_frame_finish+0x420/0x420 [bridge] [161846.962867] [] __netif_receive_skb_core+0x1f7/0x870 [161846.962872] [] __netif_receive_skb+0x22/0x70 [161846.962877] [] netif_receive_skb_internal+0x23/0x90 [161846.962884] [] ? xenvif_idx_release+0xea/0x100 [xen_netback] [161846.962889] [] ? _raw_spin_unlock_irqrestore+0x20/0x50 [161846.962893] [] netif_receive_skb_sk+0x24/0x90 [161846.962899] [] xenvif_tx_submit+0x2ca/0x3f0 [xen_netback] [161846.962906] [] xenvif_tx_action+0x9c/0xd0 [xen_netback] [161846.962915] [] xenvif_poll+0x35/0x70 [xen_netback] [161846.962920] [] napi_poll+0xcb/0x1e0 [161846.962925] [] net_rx_action+0x90/0x1c0 [161846.962931] [] __do_softirq+0x10a/0x350 [161846.962938] [] irq_exit+0x125/0x130 [161846.962943] [] xen_evtchn_do_upcall+0x39/0x50 [161846.962950] [] xen_do_hypervisor_callback+0x1e/0x40 [161846.962952] [161846.962959] [] ? _raw_spin_lock+0x4a/0x80 [161846.962964] [] ? _raw_spin_lock_irqsave+0x1e/0xa0 [161846.962978] [] ? qlcnic_83xx_mailbox_worker+0xb9/0x2a0 [qlcnic] [161846.962991] [] ? process_one_work+0x151/0x4b0 [161846.962995] [] ? check_events+0x12/0x20 [161846.963001] [] ? worker_thread+0x120/0x480 [161846.963005] [] ? __schedule+0x30b/0x890 [161846.963010] [] ? process_one_work+0x4b0/0x4b0 [161846.963015] [] ? process_one_work+0x4b0/0x4b0 [161846.963021] [] ? kthread+0xce/0xf0 [161846.963025] [] ? kthread_freezable_should_stop+0x70/0x70 [161846.963031] [] ? ret_from_fork+0x42/0x70 [161846.963035] [] ? kthread_freezable_should_stop+0x70/0x70 [161846.963037] Code: cc 51 41 53 b8 1c 00 00 00 0f 05 41 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc Signed-off-by: Junxiao Bi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bdbcd2b088a0..c3c28f0960e5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3849,7 +3849,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) struct list_head *head = &mbx->cmd_q; struct qlcnic_cmd_args *cmd = NULL; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); while (!list_empty(head)) { cmd = list_entry(head->next, struct qlcnic_cmd_args, list); @@ -3860,7 +3860,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) qlcnic_83xx_notify_cmd_completion(adapter, cmd); } - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); } static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) @@ -3896,12 +3896,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_del(&cmd->list); mbx->num_cmds--; - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); qlcnic_83xx_notify_cmd_completion(adapter, cmd); } @@ -3966,7 +3966,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, init_completion(&cmd->completion); cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_add_tail(&cmd->list, &mbx->cmd_q); mbx->num_cmds++; @@ -3974,7 +3974,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT; queue_work(mbx->work_q, &mbx->work); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return 0; } @@ -4070,15 +4070,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; spin_unlock_irqrestore(&mbx->aen_lock, flags); - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); if (list_empty(head)) { - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return; } cmd = list_entry(head->next, struct qlcnic_cmd_args, list); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); mbx_ops->encode_cmd(adapter, cmd); mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST); -- GitLab From 9f2f873d5a1c3f6b22b18db0e4c9f3a30164b8f0 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 30 Jan 2018 14:12:55 +0100 Subject: [PATCH 1203/1398] qmi_wwan: Add support for Quectel EP06 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0b91a56a2e57a5a370655b25d677ae0ebf8a2d0 ] The Quectel EP06 is a Cat. 6 LTE modem. It uses the same interface as the EC20/EC25 for QMI, and requires the same "set DTR"-quirk to work. Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index db65d9ad4488..e1e5e8438457 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -944,6 +944,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ -- GitLab From 5db5cabbf09dd793bf42e65cea9abe66fad62c13 Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Wed, 31 Jan 2018 01:32:36 +0800 Subject: [PATCH 1204/1398] r8169: fix RTL8168EP take too long to complete driver initialization. [ Upstream commit 086ca23d03c0d2f4088f472386778d293e15c5f6 ] Driver check the wrong register bit in rtl_ocp_tx_cond() that keep driver waiting until timeout. Fix this by waiting for the right register bit. Signed-off-by: Chunhao Lin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 298b74ebc1e9..18e68c91e651 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1387,7 +1387,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) { void __iomem *ioaddr = tp->mmio_addr; - return RTL_R8(IBISR0) & 0x02; + return RTL_R8(IBISR0) & 0x20; } static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) @@ -1395,7 +1395,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000); RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); } -- GitLab From ee46a8614204e136e7ec6426e9c696bd21901405 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 26 Jan 2018 16:40:41 +0800 Subject: [PATCH 1205/1398] tcp: release sk_frag.page in tcp_disconnect [ Upstream commit 9b42d55a66d388e4dd5550107df051a9637564fc ] socket can be disconnected and gets transformed back to a listening socket, if sk_frag.page is not released, which will be cloned into a new socket by sk_clone_lock, but the reference count of this page is increased, lead to a use after free or double free issue Signed-off-by: Li RongQing Cc: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7efa6b062049..0d1a767db1bb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2316,6 +2316,12 @@ int tcp_disconnect(struct sock *sk, int flags) WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + sk->sk_error_report(sk); return err; } -- GitLab From 73adb3b74efd2809735d2063b8fdaaf762bdd071 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 25 Jan 2018 22:03:52 +0800 Subject: [PATCH 1206/1398] vhost_net: stop device during reset owner [ Upstream commit 4cd879515d686849eec5f718aeac62a70b067d82 ] We don't stop device before reset owner, this means we could try to serve any virtqueue kick before reset dev->worker. This will result a warn since the work was pending at llist during owner resetting. Fix this by stopping device during owner reset. Reported-by: syzbot+eb17c6162478cc50632c@syzkaller.appspotmail.com Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 96a0661011fd..e5b7652234fc 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1078,6 +1078,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: -- GitLab From b980f718f52561b58072e0e675320fd21d71b94a Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 31 Jan 2018 15:43:05 -0500 Subject: [PATCH 1207/1398] tcp_bbr: fix pacing_gain to always be unity when using lt_bw [ Upstream commit 3aff3b4b986e51bcf4ab249e5d48d39596e0df6a ] This commit fixes the pacing_gain to remain at BBR_UNIT (1.0) when using lt_bw and returning from the PROBE_RTT state to PROBE_BW. Previously, when using lt_bw, upon exiting PROBE_RTT and entering PROBE_BW the bbr_reset_probe_bw_mode() code could sometimes randomly end up with a cycle_idx of 0 and hence have bbr_advance_cycle_phase() set a pacing gain above 1.0. In such cases this would result in a pacing rate that is 1.25x higher than intended, potentially resulting in a high loss rate for a little while until we stop using the lt_bw a bit later. This commit is a stable candidate for kernels back as far as 4.9. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Reported-by: Beyers Cronje Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index e86a34fd5484..8ec60532be2b 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -452,7 +452,8 @@ static void bbr_advance_cycle_phase(struct sock *sk) bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : + bbr_pacing_gain[bbr->cycle_idx]; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -461,8 +462,7 @@ static void bbr_update_cycle_phase(struct sock *sk, { struct bbr *bbr = inet_csk_ca(sk); - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && - bbr_is_next_cycle_phase(sk, rs)) + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) bbr_advance_cycle_phase(sk); } -- GitLab From fa46d1437fabe9da3a3c084cef6ac310ecadb9f8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2018 16:02:22 +0100 Subject: [PATCH 1208/1398] cls_u32: add missing RCU annotation. [ Upstream commit 058a6c033488494a6b1477b05fe8e1a16e344462 ] In a couple of points of the control path, n->ht_down is currently accessed without the required RCU annotation. The accesses are safe, but sparse complaints. Since we already held the rtnl lock, let use rtnl_dereference(). Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs") Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers") Signed-off-by: Paolo Abeni Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_u32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ae83c3aec308..da574a16e7b3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -496,6 +496,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; @@ -520,7 +521,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, offload.cls_u32->knode.sel = &n->sel; offload.cls_u32->knode.exts = &n->exts; if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; + offload.cls_u32->knode.link_handle = ht->handle; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -788,8 +789,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, struct tc_u_knode *n) { - struct tc_u_knode *new; + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tc_u32_sel *s = &n->sel; + struct tc_u_knode *new; new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); @@ -807,11 +809,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->fshift = n->fshift; new->res = n->res; new->flags = n->flags; - RCU_INIT_POINTER(new->ht_down, n->ht_down); + RCU_INIT_POINTER(new->ht_down, ht); /* bump reference count as long as we hold pointer to structure */ - if (new->ht_down) - new->ht_down->refcnt++; + if (ht) + ht->refcnt++; #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update -- GitLab From 5771415d24bf66f199a4a16d900d2c3afaaa6423 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Jan 2018 23:15:27 -0800 Subject: [PATCH 1209/1398] ipv6: Fix SO_REUSEPORT UDP socket with implicit sk_ipv6only [ Upstream commit 7ece54a60ee2ba7a386308cae73c790bd580589c ] If a sk_v6_rcv_saddr is !IPV6_ADDR_ANY and !IPV6_ADDR_MAPPED, it implicitly implies it is an ipv6only socket. However, in inet6_bind(), this addr_type checking and setting sk->sk_ipv6only to 1 are only done after sk->sk_prot->get_port(sk, snum) has been completed successfully. This inconsistency between sk_v6_rcv_saddr and sk_ipv6only confuses the 'get_port()'. In particular, when binding SO_REUSEPORT UDP sockets, udp_reuseport_add_sock(sk,...) is called. udp_reuseport_add_sock() checks "ipv6_only_sock(sk2) == ipv6_only_sock(sk)" before adding sk to sk2->sk_reuseport_cb. In this case, ipv6_only_sock(sk2) could be 1 while ipv6_only_sock(sk) is still 0 here. The end result is, reuseport_alloc(sk) is called instead of adding sk to the existing sk2->sk_reuseport_cb. It can be reproduced by binding two SO_REUSEPORT UDP sockets on an IPv6 address (!ANY and !MAPPED). Only one of the socket will receive packet. The fix is to set the implicit sk_ipv6only before calling get_port(). The original sk_ipv6only has to be saved such that it can be restored in case get_port() failed. The situation is similar to the inet_reset_saddr(sk) after get_port() has failed. Thanks to Calvin Owens who created an easy reproduction which leads to a fix. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5cad76f87536..421379014995 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -274,6 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; + bool saved_ipv6only; int addr_type = 0; int err = 0; @@ -378,19 +379,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; + saved_ipv6only = sk->sk_ipv6only; + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) + sk->sk_ipv6only = 1; + /* Make sure we are allowed to bind here. */ if ((snum || !inet->bind_address_no_port) && sk->sk_prot->get_port(sk, snum)) { + sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); err = -EADDRINUSE; goto out; } - if (addr_type != IPV6_ADDR_ANY) { + if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; - if (addr_type != IPV6_ADDR_MAPPED) - sk->sk_ipv6only = 1; - } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); -- GitLab From b671f40419bb0d59c5af69c17af5a86bc467a273 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2018 10:27:27 -0800 Subject: [PATCH 1210/1398] soreuseport: fix mem leak in reuseport_add_sock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4db428a7c9ab07e08783e0fcdc4ca0f555da0567 ] reuseport_add_sock() needs to deal with attaching a socket having its own sk_reuseport_cb, after a prior setsockopt(SO_ATTACH_REUSEPORT_?BPF) Without this fix, not only a WARN_ONCE() was issued, but we were also leaking memory. Thanks to sysbot and Eric Biggers for providing us nice C repros. ------------[ cut here ]------------ socket already in reuseport group WARNING: CPU: 0 PID: 3496 at net/core/sock_reuseport.c:119   reuseport_add_sock+0x742/0x9b0 net/core/sock_reuseport.c:117 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 3496 Comm: syzkaller869503 Not tainted 4.15.0-rc6+ #245 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS   Google 01/01/2011 Call Trace:   __dump_stack lib/dump_stack.c:17 [inline]   dump_stack+0x194/0x257 lib/dump_stack.c:53   panic+0x1e4/0x41c kernel/panic.c:183   __warn+0x1dc/0x200 kernel/panic.c:547   report_bug+0x211/0x2d0 lib/bug.c:184   fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178   fixup_bug arch/x86/kernel/traps.c:247 [inline]   do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296   do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315   invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:1079 Fixes: ef456144da8e ("soreuseport: define reuseport groups") Signed-off-by: Eric Dumazet Reported-by: syzbot+c0ea2226f77a42936bf7@syzkaller.appspotmail.com Acked-by: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock_reuseport.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 77f396b679ce..5dce4291f0ed 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -93,6 +93,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) return more_reuse; } +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + /** * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. @@ -101,7 +111,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) */ int reuseport_add_sock(struct sock *sk, struct sock *sk2) { - struct sock_reuseport *reuse; + struct sock_reuseport *old_reuse, *reuse; if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2); @@ -112,10 +122,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "socket already in reuseport group"); + lockdep_is_held(&reuseport_lock)); + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (old_reuse && old_reuse->num_socks != 1) { + spin_unlock_bh(&reuseport_lock); + return -EBUSY; + } if (reuse->num_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); @@ -133,19 +146,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_unlock_bh(&reuseport_lock); + if (old_reuse) + call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } -static void reuseport_free_rcu(struct rcu_head *head) -{ - struct sock_reuseport *reuse; - - reuse = container_of(head, struct sock_reuseport, rcu); - if (reuse->prog) - bpf_prog_destroy(reuse->prog); - kfree(reuse); -} - void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; -- GitLab From 0a9b2dec6c12e30895ab7478000420ff2e811dce Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 28 Sep 2017 16:58:26 -0500 Subject: [PATCH 1211/1398] x86/asm: Fix inline asm call constraints for GCC 4.4 commit 520a13c530aeb5f63e011d668c42db1af19ed349 upstream. The kernel test bot (run by Xiaolong Ye) reported that the following commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") is causing double faults in a kernel compiled with GCC 4.4. Linus subsequently diagnosed the crash pattern and the buggy commit and found that the issue is with this code: register unsigned int __asm_call_sp asm("esp"); #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) Even on a 64-bit kernel, it's using ESP instead of RSP. That causes GCC to produce the following bogus code: ffffffff8147461d: 89 e0 mov %esp,%eax ffffffff8147461f: 4c 89 f7 mov %r14,%rdi ffffffff81474622: 4c 89 fe mov %r15,%rsi ffffffff81474625: ba 20 00 00 00 mov $0x20,%edx ffffffff8147462a: 89 c4 mov %eax,%esp ffffffff8147462c: e8 bf 52 05 00 callq ffffffff814c98f0 Despite the absurdity of it backing up and restoring the stack pointer for no reason, the bug is actually the fact that it's only backing up and restoring the lower 32 bits of the stack pointer. The upper 32 bits are getting cleared out, corrupting the stack pointer. So change the '__asm_call_sp' register variable to be associated with the actual full-size stack pointer. This also requires changing the __ASM_SEL() macro to be based on the actual compiled arch size, rather than the CONFIG value, because CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso). Otherwise Clang fails to build the kernel because it complains about the use of a 64-bit register (RSP) in a 32-bit file. Reported-and-Bisected-and-Tested-by: kernel test robot Diagnosed-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dmitriy Vyukov Cc: LKP Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble Signed-off-by: Ingo Molnar Cc: Matthias Kaehlcke Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 00523524edbf..7bb29a416b77 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -11,10 +11,12 @@ # define __ASM_FORM_COMMA(x) " " #x "," #endif -#ifdef CONFIG_X86_32 +#ifndef __x86_64__ +/* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else +/* 64 bit */ # define __ASM_SEL(a,b) __ASM_FORM(b) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif -- GitLab From dd7b14c3e05ef49bc2062ccc18a703604063fe1d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 18 Dec 2016 17:44:13 +0100 Subject: [PATCH 1212/1398] x86/microcode/AMD: Do not load when running on a hypervisor commit a15a753539eca8ba243d576f02e7ca9c4b7d7042 upstream. Doing so is completely void of sense for multiple reasons so prevent it. Set dis_ucode_ldr to true and thus disable the microcode loader by default to address xen pv guests which execute the AP path but not the BSP path. By having it turned off by default, the APs won't run into the loader either. Also, check CPUID(1).ECX[31] which hypervisors set. Well almost, not the xen pv one. That one gets the aforementioned "fix". Also, improve the detection method by caching the final decision whether to continue loading in dis_ucode_ldr and do it once on the BSP. The APs then simply test that value. Signed-off-by: Borislav Petkov Tested-by: Juergen Gross Tested-by: Boris Ostrovsky Acked-by: Juergen Gross Link: http://lkml.kernel.org/r/20161218164414.9649-4-bp@alien8.de Signed-off-by: Thomas Gleixner Cc: Rolf Neugebauer Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/core.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5ce5155f0695..dc0b9f8763ad 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -43,7 +43,7 @@ #define MICROCODE_VERSION "2.01" static struct microcode_ops *microcode_ops; -static bool dis_ucode_ldr; +static bool dis_ucode_ldr = true; /* * Synchronization. @@ -73,6 +73,7 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; + u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -85,8 +86,23 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (cmdline_find_option_bool(cmdline, option)) - *res = true; + if (!have_cpuid_p()) + return *res; + + a = 1; + c = 0; + native_cpuid(&a, &b, &c, &d); + + /* + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not + * completely accurate as xen pv guests don't see that CPUID bit set but + * that's good enough as they don't land on the BSP path anyway. + */ + if (c & BIT(31)) + return *res; + + if (cmdline_find_option_bool(cmdline, option) <= 0) + *res = false; return *res; } @@ -118,9 +134,6 @@ void __init load_ucode_bsp(void) if (check_loader_disabled_bsp()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); @@ -154,9 +167,6 @@ void load_ucode_ap(void) if (check_loader_disabled_ap()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); -- GitLab From 113d22965c78a79210d4da2d455dc9bcff5e2fb6 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 15:56:28 -0500 Subject: [PATCH 1213/1398] media: soc_camera: soc_scale_crop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 5331aec1bf9c9da557668174e0a4bfcee39f1121 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/platform/soc_camera/soc_scale_crop.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/soc_camera/soc_scale_crop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c index f77252d6ccd3..d29c24854c2c 100644 --- a/drivers/media/platform/soc_camera/soc_scale_crop.c +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c @@ -418,3 +418,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd, mf->height = soc_camera_shift_scale(rect->height, shift, scale_v); } EXPORT_SYMBOL(soc_camera_calc_client_output); + +MODULE_DESCRIPTION("soc-camera scaling-cropping functions"); +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_LICENSE("GPL"); -- GitLab From 0a01ecbd23a9547b60b1a1d7e83f60704b176925 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 May 2017 11:27:52 +0200 Subject: [PATCH 1214/1398] b43: Add missing MODULE_FIRMWARE() commit 3c89a72ad80c64bdbd5ff851ee9c328a191f7e01 upstream. Some firmware entries were forgotten to be added via MODULE_FIRMWARE(), which may result in the non-functional state when the driver is loaded in initrd. Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1037344 Fixes: 15be8e89cdd9 ("b43: add more bcma cores") Signed-off-by: Takashi Iwai Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index 6e5d9095b195..a635fc6b1722 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -71,8 +71,18 @@ MODULE_FIRMWARE("b43/ucode11.fw"); MODULE_FIRMWARE("b43/ucode13.fw"); MODULE_FIRMWARE("b43/ucode14.fw"); MODULE_FIRMWARE("b43/ucode15.fw"); +MODULE_FIRMWARE("b43/ucode16_lp.fw"); MODULE_FIRMWARE("b43/ucode16_mimo.fw"); +MODULE_FIRMWARE("b43/ucode24_lcn.fw"); +MODULE_FIRMWARE("b43/ucode25_lcn.fw"); +MODULE_FIRMWARE("b43/ucode25_mimo.fw"); +MODULE_FIRMWARE("b43/ucode26_mimo.fw"); +MODULE_FIRMWARE("b43/ucode29_mimo.fw"); +MODULE_FIRMWARE("b43/ucode33_lcn40.fw"); +MODULE_FIRMWARE("b43/ucode30_mimo.fw"); MODULE_FIRMWARE("b43/ucode5.fw"); +MODULE_FIRMWARE("b43/ucode40.fw"); +MODULE_FIRMWARE("b43/ucode42.fw"); MODULE_FIRMWARE("b43/ucode9.fw"); static int modparam_bad_frames_preempt; -- GitLab From 9692602ab850eec484d8cc5a740803d34f00016c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Jun 2017 14:48:18 +0100 Subject: [PATCH 1215/1398] KEYS: encrypted: fix buffer overread in valid_master_desc() commit 794b4bc292f5d31739d89c0202c54e7dc9bc3add upstream. With the 'encrypted' key type it was possible for userspace to provide a data blob ending with a master key description shorter than expected, e.g. 'keyctl add encrypted desc "new x" @s'. When validating such a master key description, validate_master_desc() could read beyond the end of the buffer. Fix this by using strncmp() instead of memcmp(). [Also clean up the code to deduplicate some logic.] Cc: Mimi Zohar Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Jin Qian Signed-off-by: Greg Kroah-Hartman --- security/keys/encrypted-keys/encrypted.c | 31 ++++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index a871159bf03c..ead2fd60244d 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -141,23 +141,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc) */ static int valid_master_desc(const char *new_desc, const char *orig_desc) { - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN)) - goto out; - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_USER_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN)) - goto out; - } else - goto out; + int prefix_len; + + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) + prefix_len = KEY_TRUSTED_PREFIX_LEN; + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) + prefix_len = KEY_USER_PREFIX_LEN; + else + return -EINVAL; + + if (!new_desc[prefix_len]) + return -EINVAL; + + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) + return -EINVAL; + return 0; -out: - return -EINVAL; } /* -- GitLab From 734e687d1d7bcddf2548eeb5a4935a186f452b69 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 22 Jan 2018 17:09:34 -0500 Subject: [PATCH 1216/1398] x86/retpoline: Remove the esp/rsp thunk (cherry picked from commit 1df37383a8aeabb9b418698f0bcdffea01f4b1b2) It doesn't make sense to have an indirect call thunk with esp/rsp as retpoline code won't work correctly with the stack pointer register. Removing it will help compiler writers to catch error in case such a thunk call is emitted incorrectly. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Suggested-by: Jeff Law Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Tom Lendacky Cc: Kees Cook Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 1 - arch/x86/lib/retpoline.S | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index b15aa4083dfd..5a25ada75aeb 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -37,5 +37,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -INDIRECT_THUNK(sp) #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index dfb2ba91b670..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX) GENERATE_THUNK(_ASM_SI) GENERATE_THUNK(_ASM_DI) GENERATE_THUNK(_ASM_BP) -GENERATE_THUNK(_ASM_SP) #ifdef CONFIG_64BIT GENERATE_THUNK(r8) GENERATE_THUNK(r9) -- GitLab From fea3c9a54012227b30e73f5fb3d132c103c7aa84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:13 +0100 Subject: [PATCH 1217/1398] KVM: x86: Make indirect calls in emulator speculation safe (cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab) Replace the indirect calls with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org [dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6f5a3b076341..c8d573822e60 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "x86.h" #include "tss.h" @@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; call *%[fastop]" - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); + asm("push %[flags]; popf; " CALL_NOSPEC + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); return rc; } @@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) { - register void *__sp asm(_ASM_SP); ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; if (!(ctxt->d & ByteOp)) fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), "+r"(__sp) + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); -- GitLab From ec86a1dad0c00757ceb49e86b4c10dd7fbc380cb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:14 +0100 Subject: [PATCH 1218/1398] KVM: VMX: Make indirect call speculation safe (cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699) Replace indirect call with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 178a344f55f8..fcdcc8802677 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8676,14 +8676,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #endif "pushf\n\t" __ASM_SIZE(push) " $%c[cs]\n\t" - "call *%[entry]\n\t" + CALL_NOSPEC : #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif "+r"(__sp) : - [entry]"r"(entry), + THUNK_TARGET(entry), [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); -- GitLab From a1745ad92f50e95b6c2d101cd9b77253969ddccc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 25 Jan 2018 15:50:28 -0800 Subject: [PATCH 1219/1398] module/retpoline: Warn about missing retpoline in module (cherry picked from commit caf7501a1b4ec964190f31f9c3f163de252273b8) There's a risk that a kernel which has full retpoline mitigations becomes vulnerable when a module gets loaded that hasn't been compiled with the right compiler or the right option. To enable detection of that mismatch at module load time, add a module info string "retpoline" at build time when the module was compiled with retpoline support. This only covers compiled C source, but assembler source or prebuilt object files are not checked. If a retpoline enabled kernel detects a non retpoline protected module at load time, print a warning and report it in the sysfs vulnerability file. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: jeyu@kernel.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- include/linux/module.h | 9 +++++++++ kernel/module.c | 11 +++++++++++ scripts/mod/modpost.c | 9 +++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8cacf62ec458..4cea7d49b363 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -92,6 +93,19 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static bool spectre_v2_bad_module; + +#ifdef RETPOLINE +bool retpoline_module_ok(bool has_retpoline) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + + pr_err("System may be vunerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; +} +#endif static void __init spec2_print_if_insecure(const char *reason) { @@ -277,6 +291,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); + return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif diff --git a/include/linux/module.h b/include/linux/module.h index 0c3207d26ac0..d2224a09b4b5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef RETPOLINE +extern bool retpoline_module_ok(bool has_retpoline); +#else +static inline bool retpoline_module_ok(bool has_retpoline) +{ + return true; +} +#endif + #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { diff --git a/kernel/module.c b/kernel/module.c index 0e54d5bf0097..07bfb9971f2f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) } #endif /* CONFIG_LIVEPATCH */ +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) +{ + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) + return; + + pr_warn("%s: loading module not compiled with retpoline compiler.\n", + mod->name); +} + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } + check_modinfo_retpoline(mod, info); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); pr_warn("%s: module is from the staging directory, the quality " diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 845eb9b800f3..238db4ffd30c 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); } +/* Cannot check for assembler */ +static void add_retpoline(struct buffer *b) +{ + buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); + buf_printf(b, "#endif\n"); +} + static void add_staging_flag(struct buffer *b, const char *name) { static const char *staging_dir = "drivers/staging"; @@ -2474,6 +2482,7 @@ int main(int argc, char **argv) add_header(&buf, mod); add_intree_flag(&buf, !external_module); + add_retpoline(&buf); add_staging_flag(&buf, mod->name); err |= add_versions(&buf, mod); add_depends(&buf, mod, modules); -- GitLab From d3eba7744075dc55f367364b5ed2055b5e3d5687 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:09 +0000 Subject: [PATCH 1220/1398] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf (cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f) This is a pure feature bits leaf. There are two AVX512 feature bits in it already which were handled as scattered bits, and three more from this leaf are going to be added for speculation control features. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 10 ++++++---- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/scattered.c | 2 -- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9ea67a04ff4f..8c101579f535 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -28,6 +28,7 @@ enum cpuid_leafs CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, + CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8537a21acd8b..9d4a422d5598 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -12,7 +12,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -197,9 +197,7 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ @@ -295,6 +293,10 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 21c5ac15657b..1f8cca459c6c 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -59,6 +59,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fac9a5c0abe9..6847d85400a8 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -100,6 +100,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d198ae02f2b7..426727318ea2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -737,6 +737,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; + c->x86_capability[CPUID_7_EDX] = edx; } /* Extended state features: level 0x0000000d */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b0dd9aec183d..afbb52532791 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, -- GitLab From 40532f65cccc5056b50cf1ab07a9a41445b24aa8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:10 +0000 Subject: [PATCH 1221/1398] x86/cpufeatures: Add Intel feature bits for Speculation Control (cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61) Add three feature bits exposed by new microcode on Intel CPUs for speculation control. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9d4a422d5598..1f038882bf32 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -296,6 +296,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* * BUG word(s) -- GitLab From c26a6bea26b356fb3539cdf5b2e348a5e528aa7b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:11 +0000 Subject: [PATCH 1222/1398] x86/cpufeatures: Add AMD feature bits for Speculation Control (cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7) AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: Tom Lendacky Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1f038882bf32..c4d03e70086b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -258,6 +258,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ +#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -- GitLab From af57d43c908fe9b06dc555b400eca68562262eca Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:12 +0000 Subject: [PATCH 1223/1398] x86/msr: Add definitions for new speculation control MSRs (cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410) Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b11c4c072df8..c768bc1550a1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -37,6 +37,13 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd @@ -50,6 +57,11 @@ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e -- GitLab From a8799fd14d9f7f385a5a5c86cde247caf4bb0320 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:13 +0000 Subject: [PATCH 1224/1398] x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown (cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621) Also, for CPUs which don't speculate at all, don't report that they're vulnerable to the Spectre variants either. Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it for now, even though that could be done with a simple comparison, on the assumption that we'll have more to add. Based on suggestions from Dave Hansen and Alan Cox. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Acked-by: Dave Hansen Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 426727318ea2..cfa026f323cf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -44,6 +44,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -838,6 +840,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initdata struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_CENTAUR, 5 }, + { X86_VENDOR_INTEL, 5 }, + { X86_VENDOR_NSC, 5 }, + { X86_VENDOR_ANY, 4 }, + {} +}; + +static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} +}; + +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = 0; + + if (x86_match_cpu(cpu_no_meltdown)) + return false; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return false; + + return true; +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -884,11 +921,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (c->x86_vendor != X86_VENDOR_AMD) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (!x86_match_cpu(cpu_no_speculation)) { + if (cpu_vulnerable_to_meltdown(c)) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + } fpu__init_system(c); -- GitLab From 6c5e49150a51b0210710246b61a972300a274dcd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:14 +0000 Subject: [PATCH 1225/1398] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes (cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035) This doesn't refuse to load the affected microcodes; it just refuses to use the Spectre v2 mitigation features if they're detected, by clearing the appropriate feature bits. The AMD CPUID bits are handled here too, because hypervisors *may* have been exposing those bits even on Intel chips, for fine-grained control of what's available. It is non-trivial to use x86_match_cpu() for this table because that doesn't handle steppings. And the approach taken in commit bd9240a18 almost made me lose my lunch. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/intel-family.h | 7 ++- arch/x86/kernel/cpu/intel.c | 66 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 34a46dc076d3..75b748a1deb8 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -12,6 +12,7 @@ */ #define INTEL_FAM6_CORE_YONAH 0x0E + #define INTEL_FAM6_CORE2_MEROM 0x0F #define INTEL_FAM6_CORE2_MEROM_L 0x16 #define INTEL_FAM6_CORE2_PENRYN 0x17 @@ -21,6 +22,7 @@ #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ #define INTEL_FAM6_NEHALEM_EP 0x1A #define INTEL_FAM6_NEHALEM_EX 0x2E + #define INTEL_FAM6_WESTMERE 0x25 #define INTEL_FAM6_WESTMERE_EP 0x2C #define INTEL_FAM6_WESTMERE_EX 0x2F @@ -36,9 +38,9 @@ #define INTEL_FAM6_HASWELL_GT3E 0x46 #define INTEL_FAM6_BROADWELL_CORE 0x3D -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_BROADWELL_GT3E 0x47 #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E @@ -57,9 +59,10 @@ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A /* Xeon Phi */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb03..4d23d787a217 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release + */ +struct sku_microcode { + u8 model; + u8 stepping; + u32 microcode; +}; +static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + /* Updated in the 20180108 release; blacklist until we know otherwise */ + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +}; + +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_mask == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_STIBP) || + cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || + cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_STIBP); + clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); + clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + } + /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * -- GitLab From 31fd9eda7f69e0be0d0410682fc0d4cd76fe3699 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:15 +0000 Subject: [PATCH 1226/1398] x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support (cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d) Expose indirect_branch_prediction_barrier() for use in subsequent patches. [ tglx: Add IBPB status to spectre_v2 sysfs file ] Co-developed-by: KarimAllah Ahmed Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ arch/x86/kernel/cpu/bugs.c | 10 +++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c4d03e70086b..390154576dad 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -202,6 +202,8 @@ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ +#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4ad41087ce0e..34e384c7208f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void) #endif } +static inline void indirect_branch_prediction_barrier(void) +{ + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +} + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4cea7d49b363..1c4b39de49bb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -262,6 +262,13 @@ static void __init spectre_v2_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Filling RSB on context switch\n"); } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { + setup_force_cpu_cap(X86_FEATURE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } } #undef pr_fmt @@ -291,7 +298,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif -- GitLab From 18bc71dff630283333ccea760efb398dbe282a72 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:37 +0100 Subject: [PATCH 1227/1398] x86/nospec: Fix header guards names (cherry picked from commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9) ... to adhere to the _ASM_X86_ naming scheme. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 34e384c7208f..865192a2cc31 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NOSPEC_BRANCH_H__ -#define __NOSPEC_BRANCH_H__ +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ +#define _ASM_X86_NOSPEC_BRANCH_H_ #include #include @@ -232,4 +232,4 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ -#endif /* __NOSPEC_BRANCH_H__ */ +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ -- GitLab From 557cbfa2221110761d6c26085c6df43d48425598 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:39 +0100 Subject: [PATCH 1228/1398] x86/bugs: Drop one "mitigation" from dmesg (cherry picked from commit 55fa19d3e51f33d9cd4056d25836d93abf9438db) Make [ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline into [ 0.031118] Spectre V2: Mitigation: Full generic retpoline to reduce the mitigation mitigations strings. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1c4b39de49bb..674ad4666f85 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -90,7 +90,7 @@ static const char *spectre_v2_strings[] = { }; #undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; static bool spectre_v2_bad_module; -- GitLab From 98911226d51ea50dd8cb33cfb5a90be39872401b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2018 15:45:14 +0100 Subject: [PATCH 1229/1398] x86/cpu/bugs: Make retpoline module warning conditional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02) If sysfs is disabled and RETPOLINE not defined: arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used [-Wunused-variable] static bool spectre_v2_bad_module; Hide it. Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module") Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: David Woodhouse Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 674ad4666f85..efe55c58a921 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -93,9 +93,10 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -static bool spectre_v2_bad_module; #ifdef RETPOLINE +static bool spectre_v2_bad_module; + bool retpoline_module_ok(bool has_retpoline) { if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) @@ -105,6 +106,13 @@ bool retpoline_module_ok(bool has_retpoline) spectre_v2_bad_module = true; return false; } + +static inline const char *spectre_v2_module_string(void) +{ + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; +} +#else +static inline const char *spectre_v2_module_string(void) { return ""; } #endif static void __init spec2_print_if_insecure(const char *reason) @@ -299,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", - spectre_v2_bad_module ? " - vulnerable module loaded" : ""); + boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + spectre_v2_module_string()); } #endif -- GitLab From 77b3b3ee238664df176ed44dd4658e578186c6d3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jan 2018 16:24:32 +0000 Subject: [PATCH 1230/1398] x86/cpufeatures: Clean up Spectre v2 related CPUID flags (cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2) We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", "ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them as the user-visible bits. When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP bit is set, set the AMD STIBP that's used for the generic hardware capability. Hide the rest from /proc/cpuinfo by putting "" in the comments. Including RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are patches to make the sysfs vulnerabilities information non-readable by non-root, and the same should apply to all information about which mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. The feature bit for whether IBPB is actually used, which is needed for ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. Originally-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 18 ++++++++-------- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 7 +++---- arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++--------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 390154576dad..8eb23f5cf7f4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,15 +194,15 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ -#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -260,9 +260,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ -#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ -#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -301,8 +301,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 865192a2cc31..19ecb5446b30 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -225,7 +225,7 @@ static inline void indirect_branch_prediction_barrier(void) "movl %[val], %%eax\n\t" "movl $0, %%edx\n\t" "wrmsr", - X86_FEATURE_IBPB) + X86_FEATURE_USE_IBPB) : : [msr] "i" (MSR_IA32_PRED_CMD), [val] "i" (PRED_CMD_IBPB) : "eax", "ecx", "edx", "memory"); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index efe55c58a921..3a06718257bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -272,9 +272,8 @@ static void __init spectre_v2_select_mitigation(void) } /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || - boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { - setup_force_cpu_cap(X86_FEATURE_IBPB); + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Enabling Indirect Branch Prediction Barrier\n"); } } @@ -307,7 +306,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4d23d787a217..2e257f87c527 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -140,17 +140,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } - if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_STIBP) || - cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || - cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { - pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + + /* Now if any of them are set, check the blacklist and clear the lot */ + if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); + clear_cpu_cap(c, X86_FEATURE_IBRS); + clear_cpu_cap(c, X86_FEATURE_IBPB); clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); - clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); } /* -- GitLab From 77d1424d2fb84ba2d3ce60a0ec4c3913e2e59abd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 27 Jan 2018 16:24:33 +0000 Subject: [PATCH 1231/1398] x86/retpoline: Simplify vmexit_fill_RSB() (cherry picked from commit 1dde7415e99933bb7293d6b2843752cbdb43ec11) Simplify it to call an asm-function instead of pasting 41 insn bytes at every call site. Also, add alignment to the macro as suggested here: https://support.google.com/faqs/answer/7625886 [dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 ++ arch/x86/include/asm/nospec-branch.h | 70 +++------------------------ arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 56 +++++++++++++++++++++ 6 files changed, 71 insertions(+), 65 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a76dc738ec61..f5434b4670c1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -237,7 +237,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %ebx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e729e1528584..d8bc3e001df2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -435,7 +435,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %rbx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 5a25ada75aeb..166654218329 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -37,4 +37,7 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) +asmlinkage void __fill_rsb(void); +asmlinkage void __clear_rsb(void); + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ecb5446b30..df4ececa6ebf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,50 +7,6 @@ #include #include -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * We define a CPP macro such that it can be used from both .S files and - * inline assembly. It's possible to do a .macro and then include that - * from C via asm(".include ") but let's not go there. - */ - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -/* - * Google experimented with loop-unrolling and this turned out to be - * the optimal version — two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - call 772f; \ -773: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - call 774f; \ -775: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; - #ifdef __ASSEMBLY__ /* @@ -121,17 +77,10 @@ #endif .endm - /* - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP - * monstrosity above, manually. - */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +/* This clobbers the BX register */ +.macro FILL_RETURN_BUFFER nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr -.Lskip_rsb_\@: + ALTERNATIVE "", "call __clear_rsb", \ftr #endif .endm @@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); + alternative_input("", + "call __fill_rsb", + X86_FEATURE_RETPOLINE, + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 6bf1898ddf49..4ad7c4dd311c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_RETPOLINE) += retpoline.o +OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c909961e678a..480edc3a5e03 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,6 +7,7 @@ #include #include #include +#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -46,3 +47,58 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +.macro STUFF_RSB nr:req sp:req + mov $(\nr / 2), %_ASM_BX + .align 16 +771: + call 772f +773: /* speculation trap */ + pause + lfence + jmp 773b + .align 16 +772: + call 774f +775: /* speculation trap */ + pause + lfence + jmp 775b + .align 16 +774: + dec %_ASM_BX + jnz 771b + add $((BITS_PER_LONG/8) * \nr), \sp +.endm + +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +ENTRY(__fill_rsb) + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP + ret +END(__fill_rsb) +EXPORT_SYMBOL_GPL(__fill_rsb) + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +ENTRY(__clear_rsb) + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP + ret +END(__clear_rsb) +EXPORT_SYMBOL_GPL(__clear_rsb) -- GitLab From 9eedeb72c4127726b2f3d62b4c7a163f67d721ab Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 30 Jan 2018 14:13:50 +0800 Subject: [PATCH 1232/1398] x86/spectre: Check CONFIG_RETPOLINE in command line parser (cherry picked from commit 9471eee9186a46893726e22ebb54cade3f9bc043) The spectre_v2 option 'auto' does not check whether CONFIG_RETPOLINE is enabled. As a consequence it fails to emit the appropriate warning and sets feature flags which have no effect at all. Add the missing IS_ENABLED() check. Fixes: da285121560e ("x86/spectre: Add boot time option to select Spectre v2 mitigation") Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: Tomohiro Cc: dave.hansen@intel.com Cc: bp@alien8.de Cc: arjan@linux.intel.com Cc: dwmw@amazon.co.uk Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f5892721-7528-3647-08fb-f8d10e65ad87@cn.fujitsu.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3a06718257bf..51624c6f2a3b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -212,10 +212,10 @@ static void __init spectre_v2_select_mitigation(void) return; case SPECTRE_V2_CMD_FORCE: - /* FALLTRHU */ case SPECTRE_V2_CMD_AUTO: - goto retpoline_auto; - + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; case SPECTRE_V2_CMD_RETPOLINE_AMD: if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_amd; -- GitLab From d7f8d17406d62f0c8b20a9100d34d0e203557fe1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:49 -0800 Subject: [PATCH 1233/1398] x86/entry/64: Remove the SYSCALL64 fast path (cherry picked from commit 21d375b6b34ff511a507de27bf316b3dde6938d9) The SYCALLL64 fast path was a nice, if small, optimization back in the good old days when syscalls were actually reasonably fast. Now there is PTI to slow everything down, and indirect branches are verboten, making everything messier. The retpoline code in the fast path is particularly nasty. Just get rid of the fast path. The slow path is barely slower. [ tglx: Split out the 'push all extra regs' part ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Borislav Petkov Cc: Linus Torvalds Cc: Kernel Hardening Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 123 +----------------------------------- arch/x86/entry/syscall_64.c | 7 +- 2 files changed, 3 insertions(+), 127 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d8bc3e001df2..2c154c980543 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -179,94 +179,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r11 /* pt_regs->r11 */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - /* - * If we need to do entry work or if we guess we'll need to do - * exit work, go straight to the slow path. - */ - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz entry_SYSCALL64_slow_path - -entry_SYSCALL_64_fastpath: - /* - * Easy case: enable interrupts and issue the syscall. If the syscall - * needs pt_regs, we'll call a stub that disables interrupts again - * and jumps to the slow path. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx - - /* - * This call instruction is handled specially in stub_ptregs_64. - * It might end up jumping to the slow path. If it jumps, RAX - * and all argument registers are clobbered. - */ -#ifdef CONFIG_RETPOLINE - movq sys_call_table(, %rax, 8), %rax - call __x86_indirect_thunk_rax -#else - call *sys_call_table(, %rax, 8) -#endif -.Lentry_SYSCALL_64_after_fastpath_call: - - movq %rax, RAX(%rsp) -1: - - /* - * If we get here, then we know that pt_regs is clean for SYSRET64. - * If we see that no exit work is required (which we are required - * to check with IRQs off), then we can go straight to SYSRET64. - */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz 1f - - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON /* user mode is traced as IRQs on */ - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - RESTORE_C_REGS_EXCEPT_RCX_R11 - /* - * This opens a window where we have a user CR3, but are - * running in the kernel. This makes using the CS - * register useless for telling whether or not we need to - * switch CR3 in NMIs. Normal interrupts are OK because - * they are off here. - */ - SWITCH_USER_CR3 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 - -1: - /* - * The fast path looked good when we started, but something changed - * along the way and we need to switch to the slow path. Calling - * raise(3) will trigger this, for example. IRQs are off. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_EXTRA_REGS - movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ - jmp return_from_SYSCALL_64 - -entry_SYSCALL64_slow_path: /* IRQs are off. */ SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ -return_from_SYSCALL_64: RESTORE_EXTRA_REGS TRACE_IRQS_IRETQ /* we're about to change IF */ @@ -339,6 +256,7 @@ return_from_SYSCALL_64: syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 + /* * This opens a window where we have a user CR3, but are * running in the kernel. This makes using the CS @@ -363,45 +281,6 @@ opportunistic_sysret_failed: jmp restore_c_regs_and_iret END(entry_SYSCALL_64) -ENTRY(stub_ptregs_64) - /* - * Syscalls marked as needing ptregs land here. - * If we are on the fast path, we need to save the extra regs, - * which we achieve by trying again on the slow path. If we are on - * the slow path, the extra regs are already saved. - * - * RAX stores a pointer to the C function implementing the syscall. - * IRQs are on. - */ - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) - jne 1f - - /* - * Called from fast path -- disable IRQs again, pop return address - * and jump to slow path - */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - popq %rax - jmp entry_SYSCALL64_slow_path - -1: - JMP_NOSPEC %rax /* Called from C */ -END(stub_ptregs_64) - -.macro ptregs_stub func -ENTRY(ptregs_\func) - leaq \func(%rip), %rax - jmp stub_ptregs_64 -END(ptregs_\func) -.endm - -/* Instantiate ptregs_stub for each ptregs-using syscall */ -#define __SYSCALL_64_QUAL_(sym) -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) -#include - /* * %rdi: prev task * %rsi: next task diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 9dbc5abb6162..6705edda4ac3 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -6,14 +6,11 @@ #include #include -#define __SYSCALL_64_QUAL_(sym) sym -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym - -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -- GitLab From 572e509178112895f6162ead5d4bf39d2b981729 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:49 -0800 Subject: [PATCH 1234/1398] x86/entry/64: Push extra regs right away (cherry picked from commit d1f7732009e0549eedf8ea1db948dc37be77fd46) With the fast path removed there is no point in splitting the push of the normal and the extra register set. Just push the extra regs right away. [ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Borislav Petkov Cc: Linus Torvalds Cc: Kernel Hardening Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2c154c980543..db5009ce065a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -177,10 +177,14 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ /* IRQs are off. */ - SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ -- GitLab From f03d00ba0b478963fc96c975d3b27fc1b9bc3a43 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:50 -0800 Subject: [PATCH 1235/1398] x86/asm: Move 'status' from thread_struct to thread_info (cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3) The TS_COMPAT bit is very hot and is accessed from code paths that mostly also touch thread_info::flags. Move it into struct thread_info to improve cache locality. The only reason it was in thread_struct is that there was a brief period during which arch-specific fields were not allowed in struct thread_info. Linus suggested further changing: ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); to: if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); on the theory that frequently dirtying the cacheline even in pure 64-bit code that never needs to modify status hurts performance. That could be a reasonable followup patch, but I suspect it matters less on top of this patch. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Kernel Hardening Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 4 ++-- arch/x86/include/asm/processor.h | 2 -- arch/x86/include/asm/syscall.h | 6 +++--- arch/x86/include/asm/thread_info.h | 3 ++- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bdd9cc59d20f..bd1d1027384f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -201,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -299,7 +299,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - current->thread.status |= TS_COMPAT; + ti->status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 353f038ec645..cb866ae1bc5d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -391,8 +391,6 @@ struct thread_struct { unsigned short gsindex; #endif - u32 status; /* thread synchronous flags */ - #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index e3c95e8e61c5..03eedc21246d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index bdf9c4c91572..89978b9c667a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -54,6 +54,7 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ }; #define INIT_THREAD_INFO(tsk) \ @@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, #define in_ia32_syscall() true #else #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ - current->thread.status & TS_COMPAT) + current_thread_info()->status & TS_COMPAT) #endif /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0887d2ae3797..dffe81d3c261 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -538,7 +538,7 @@ void set_personality_ia32(bool x32) current->personality &= ~READ_IMPLIES_EXEC; /* in_compat_syscall() uses the presence of the x32 syscall bit flag to determine compat status */ - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); @@ -546,7 +546,7 @@ void set_personality_ia32(bool x32) current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; } } EXPORT_SYMBOL_GPL(set_personality_ia32); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0e63c0267f99..e497d374412a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 763af1d0de64..b1a5d252d482 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI -- GitLab From 899ab2cf91389aa3e27c341a9858dc22985dae2f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 29 Jan 2018 17:02:16 -0800 Subject: [PATCH 1236/1398] Documentation: Document array_index_nospec (cherry picked from commit f84a56f73dddaeac1dba8045b007f742f61cd2da) Document the rationale and usage of the new array_index_nospec() helper. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: linux-arch@vger.kernel.org Cc: Jonathan Corbet Cc: Peter Zijlstra Cc: gregkh@linuxfoundation.org Cc: kernel-hardening@lists.openwall.com Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/speculation.txt | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 Documentation/speculation.txt diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt new file mode 100644 index 000000000000..e9e6cbae2841 --- /dev/null +++ b/Documentation/speculation.txt @@ -0,0 +1,90 @@ +This document explains potential effects of speculation, and how undesirable +effects can be mitigated portably using common APIs. + +=========== +Speculation +=========== + +To improve performance and minimize average latencies, many contemporary CPUs +employ speculative execution techniques such as branch prediction, performing +work which may be discarded at a later stage. + +Typically speculative execution cannot be observed from architectural state, +such as the contents of registers. However, in some cases it is possible to +observe its impact on microarchitectural state, such as the presence or +absence of data in caches. Such state may form side-channels which can be +observed to extract secret information. + +For example, in the presence of branch prediction, it is possible for bounds +checks to be ignored by code which is speculatively executed. Consider the +following code: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else + return array[index]; + } + +Which, on arm64, may be compiled to an assembly sequence such as: + + CMP , #MAX_ARRAY_ELEMS + B.LT less + MOV , #0 + RET + less: + LDR , [, ] + RET + +It is possible that a CPU mis-predicts the conditional branch, and +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This +value will subsequently be discarded, but the speculated load may affect +microarchitectural state which can be subsequently measured. + +More complex sequences involving multiple dependent memory accesses may +result in sensitive information being leaked. Consider the following +code, building on the prior example: + + int load_dependent_arrays(int *arr1, int *arr2, int index) + { + int val1, val2, + + val1 = load_array(arr1, index); + val2 = load_array(arr2, val1); + + return val2; + } + +Under speculation, the first call to load_array() may return the value +of an out-of-bounds address, while the second call will influence +microarchitectural state dependent on this value. This may provide an +arbitrary read primitive. + +==================================== +Mitigating speculation side-channels +==================================== + +The kernel provides a generic API to ensure that bounds checks are +respected even under speculation. Architectures which are affected by +speculation-based side-channels are expected to implement these +primitives. + +The array_index_nospec() helper in can be used to +prevent information from being leaked via side-channels. + +A call to array_index_nospec(index, size) returns a sanitized index +value that is bounded to [0, size) even under cpu speculation +conditions. + +This can be used to protect the earlier load_array() example: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else { + index = array_index_nospec(index, MAX_ARRAY_ELEMS); + return array[index]; + } + } -- GitLab From 579ef9ea20d67a80a0bc5d093259ff6e97087d59 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:22 -0800 Subject: [PATCH 1237/1398] array_index_nospec: Sanitize speculative array de-references (cherry picked from commit f3804203306e098dae9ca51540fcd5eb700d7f40) array_index_nospec() is proposed as a generic mechanism to mitigate against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary checks via speculative execution. The array_index_nospec() implementation is expected to be safe for current generation CPUs across multiple architectures (ARM, x86). Based on an original implementation by Linus Torvalds, tweaked to remove speculative flows by Alexei Starovoitov, and tweaked again by Linus to introduce an x86 assembly implementation for the mask generation. Co-developed-by: Linus Torvalds Co-developed-by: Alexei Starovoitov Suggested-by: Cyril Novikov Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 72 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/linux/nospec.h diff --git a/include/linux/nospec.h b/include/linux/nospec.h new file mode 100644 index 000000000000..b99bced39ac2 --- /dev/null +++ b/include/linux/nospec.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Linus Torvalds. All rights reserved. +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#ifndef _LINUX_NOSPEC_H +#define _LINUX_NOSPEC_H + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * When @index is out of bounds (@index >= @size), the sign bit will be + * set. Extend the sign bit to all bits and invert, giving a result of + * zero for an out of bounds index, or ~0 if within bounds [0, @size). + */ +#ifndef array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) + return 0; + + /* + * Always calculate and emit the mask even if the compiler + * thinks the mask is not needed. The compiler does not take + * into account the value of @index under speculation. + */ + OPTIMIZER_HIDE_VAR(index); + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); +} +#endif + +/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: + * + * if (index < size) { + * index = array_index_nospec(index, size); + * val = array[index]; + * } + * + * ...if the CPU speculates past the bounds check then + * array_index_nospec() will clamp the index within the range of [0, + * size). + */ +#define array_index_nospec(index, size) \ +({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ + \ + _i &= _mask; \ + _i; \ +}) +#endif /* _LINUX_NOSPEC_H */ -- GitLab From 8c33e2d23a6821cb7d608011c3d2f54accf4212c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:28 -0800 Subject: [PATCH 1238/1398] x86: Implement array_index_mask_nospec (cherry picked from commit babdde2698d482b6c0de1eab4f697cf5856c5859) array_index_nospec() uses a mask to sanitize user controllable array indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask otherwise. While the default array_index_mask_nospec() handles the carry-bit from the (index - size) result in software. The x86 array_index_mask_nospec() does the same, but the carry-bit is handled in the processor CF flag without conditional instructions in the control flow. Suggested-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index bfb28caf97b1..ca22173e42dc 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -23,6 +23,30 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"r"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else -- GitLab From 1f03d140e2f509ae27f21d229930e707436a07ac Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:33 -0800 Subject: [PATCH 1239/1398] x86: Introduce barrier_nospec (cherry picked from commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a) Rename the open coded form of this instruction sequence from rdtsc_ordered() into a generic barrier primitive, barrier_nospec(). One of the mitigations for Spectre variant1 vulnerabilities is to fence speculative execution after successfully validating a bounds check. I.e. force the result of a bounds check to resolve in the instruction pipeline to ensure speculative execution honors that result before potentially operating on out-of-bounds data. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 4 ++++ arch/x86/include/asm/msr.h | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index ca22173e42dc..857590390397 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -47,6 +47,10 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, /* Override the default implementation from linux/nospec.h. */ #define array_index_mask_nospec array_index_mask_nospec +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ + "lfence", X86_FEATURE_LFENCE_RDTSC) + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index b5fee97813cd..ed35b915b5c9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * that some other imaginary CPU is updating continuously with a * time stamp. */ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); + barrier_nospec(); return rdtsc(); } -- GitLab From e06d7bfb223e6babd7a7f8586e9c25b87272b841 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:39 -0800 Subject: [PATCH 1240/1398] x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec (cherry picked from commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd) For __get_user() paths, do not allow the kernel to speculate on the value of a user controlled pointer. In addition to the 'stac' instruction for Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the access_ok() result to resolve in the pipeline before the CPU might take any speculative action on the pointer value. Given the cost of 'stac' the speculation barrier is placed after 'stac' to hopefully overlap the cost of disabling SMAP with the cost of flushing the instruction pipeline. Since __get_user is a major kernel interface that deals with user controlled pointers, the __uaccess_begin_nospec() mechanism will prevent speculative execution past an access_ok() permission check. While speculative execution past access_ok() is not enough to lead to a kernel memory leak, it is a necessary precondition. To be clear, __uaccess_begin_nospec() is addressing a class of potential problems near __get_user() usages. Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used to protect __get_user(), pointer masking similar to array_index_nospec() will be used for get_user() since it incorporates a bounds check near the usage. uaccess_try_nospec provides the same mechanism for get_user_try. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index dead0f3921f3..43148457d255 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -123,6 +123,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current->thread.uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ -- GitLab From ae75f83e79e4b2de7981d34ddcceb54178098bea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:44 -0800 Subject: [PATCH 1241/1398] x86/usercopy: Replace open coded stac/clac with __uaccess_{begin, end} (cherry picked from commit b5c4ae4f35325d520b230bab6eb3310613b72ac1) In preparation for converting some __uaccess_begin() instances to __uacess_begin_nospec(), make sure all 'from user' uaccess paths are using the _begin(), _end() helpers rather than open-coded stac() and clac(). No functional changes. Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416438.33451.17309465232057176966.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/usercopy_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 3bc7baf2a711..9b5fa0f6964e 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,12 +570,12 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); -- GitLab From 065eae4be83d8d2bea325c11610ebaf836e1cebb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:49 -0800 Subject: [PATCH 1242/1398] x86/uaccess: Use __uaccess_begin_nospec() and uaccess_try_nospec (cherry picked from commit 304ec1b050310548db33063e567123fae8fd0301) Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. __uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the limit check is far away from the user pointer de-reference. In those cases a barrier_nospec() prevents speculation with a potential pointer to privileged memory. uaccess_try_nospec covers get_user_try. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 6 +++--- arch/x86/include/asm/uaccess_32.h | 12 ++++++------ arch/x86/include/asm/uaccess_64.h | 12 ++++++------ arch/x86/lib/usercopy_32.c | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 43148457d255..a8d85a687cf4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -437,7 +437,7 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -547,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -582,7 +582,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 7d3bdd1ed697..d6d245088dd5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; @@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 673059a109fe..6e5cc08134ba 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9b5fa0f6964e..5c06dbffc52f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,7 +570,7 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); -- GitLab From 398a39311c0b67ff1d886f9861ae5251f8a7cad4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:54 -0800 Subject: [PATCH 1243/1398] x86/get_user: Use pointer masking to limit speculation (cherry picked from commit c7f631cb07e7da06ac1d231ca178452339e32a94) Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. Unlike the __get_user() case get_user() includes the address limit check near the pointer de-reference. With that locality the speculation can be mitigated with pointer narrowing rather than a barrier, i.e. array_index_nospec(). Where the narrowing is performed by: cmp %limit, %ptr sbb %mask, %mask and %mask, %ptr With respect to speculation the value of %ptr is either less than %limit or NULL. Co-developed-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: Andy Lutomirski Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/getuser.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 37b62d412148..b12b214713a6 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,6 +39,8 @@ ENTRY(__get_user_1) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 1: movzbl (%_ASM_AX),%edx xor %eax,%eax @@ -53,6 +55,8 @@ ENTRY(__get_user_2) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax @@ -67,6 +71,8 @@ ENTRY(__get_user_4) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 3: movl -3(%_ASM_AX),%edx xor %eax,%eax @@ -82,6 +88,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movq -7(%_ASM_AX),%rdx xor %eax,%eax @@ -93,6 +101,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movl -7(%_ASM_AX),%edx 5: movl -3(%_ASM_AX),%ecx -- GitLab From c3193fd49f6f0a9b378379f8fd3a95f01172d477 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:59 -0800 Subject: [PATCH 1244/1398] x86/syscall: Sanitize syscall table de-references under speculation (cherry picked from commit 2fbd7af5af8665d18bcefae3e9700be07e22b681) The syscall table base is a user controlled function pointer in kernel space. Use array_index_nospec() to prevent any out of bounds speculation. While retpoline prevents speculating into a userspace directed target it does not stop the pointer de-reference, the concern is leaking memory relative to the syscall table base, by observing instruction cache behavior. Reported-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Andy Lutomirski Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727417984.33451.1216731042505722161.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bd1d1027384f..b0cd306dc527 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) * regs->orig_ax, which changes the behavior of some syscalls. */ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); + regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); } @@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) } if (likely(nr < IA32_NR_syscalls)) { + nr = array_index_nospec(nr, IA32_NR_syscalls); /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that -- GitLab From c26ceec69576cb61157d2487812fb2776e125260 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:05 -0800 Subject: [PATCH 1245/1398] vfs, fdtable: Prevent bounds-check bypass via speculative execution (cherry picked from commit 56c30ba7b348b90484969054d561f711ba196507) 'fd' is a user controlled value that is used as a data dependency to read from the 'fdt->fd' array. In order to avoid potential leaks of kernel memory values, block speculative execution of the instruction stream that could issue reads based on an invalid 'file *' returned from __fcheck_files. Co-developed-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/fdtable.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 6e84b2cae6ad..442b54a14cbc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i { struct fdtable *fdt = rcu_dereference_raw(files->fdt); - if (fd < fdt->max_fds) + if (fd < fdt->max_fds) { + fd = array_index_nospec(fd, fdt->max_fds); return rcu_dereference_raw(fdt->fd[fd]); + } return NULL; } -- GitLab From 0781a50a30d37230147954a3dd15bbfcebfc2398 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:15 -0800 Subject: [PATCH 1246/1398] nl80211: Sanitize array index in parse_txq_params (cherry picked from commit 259d8c1e984318497c84eef547bbb6b1d9f4eb05) Wireless drivers rely on parse_txq_params to validate that txq_params->ac is less than NL80211_NUM_ACS by the time the low-level driver's ->conf_tx() handler is called. Use a new helper, array_index_nospec(), to sanitize txq_params->ac with respect to speculation. I.e. ensure that any speculation into ->conf_tx() handlers is done with a value of txq_params->ac that is within the bounds of [0, NL80211_NUM_ACS). Reported-by: Christian Lamparter Reported-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Acked-by: Johannes Berg Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: linux-wireless@vger.kernel.org Cc: torvalds@linux-foundation.org Cc: "David S. Miller" Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727419584.33451.7700736761686184303.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c626f679e1c8..91722e97cdd5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -2014,20 +2015,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { static int parse_txq_params(struct nlattr *tb[], struct ieee80211_txq_params *txq_params) { + u8 ac; + if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || !tb[NL80211_TXQ_ATTR_AIFS]) return -EINVAL; - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); - if (txq_params->ac >= NL80211_NUM_ACS) + if (ac >= NL80211_NUM_ACS) return -EINVAL; - + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); return 0; } -- GitLab From 359fde6bd0ec91c284641965e5c5c45a1fab6d51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:21 -0800 Subject: [PATCH 1247/1398] x86/spectre: Report get_user mitigation for spectre_v1 (cherry picked from commit edfbae53dab8348fca778531be9f4855d2ca0360) Reflect the presence of get_user(), __get_user(), and 'syscall' protections in sysfs. The expectation is that new and better tooling will allow the kernel to grow more usages of array_index_nospec(), for now, only claim mitigation for __user pointer de-references. Reported-by: Jiri Slaby Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727420158.33451.11658324346540434635.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 51624c6f2a3b..d4658e02ff93 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -296,7 +296,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, -- GitLab From f67e05d1506a24f329589c27952d776f977161b7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 30 Jan 2018 19:32:18 +0000 Subject: [PATCH 1248/1398] x86/spectre: Fix spelling mistake: "vunerable"-> "vulnerable" (cherry picked from commit e698dcdfcda41efd0984de539767b4cddd235f1e) Trivial fix to spelling mistake in pr_err error message text. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: kernel-janitors@vger.kernel.org Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180130193218.9271-1-colin.king@canonical.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4658e02ff93..aec7dafc1f13 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -102,7 +102,7 @@ bool retpoline_module_ok(bool has_retpoline) if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) return true; - pr_err("System may be vunerable to spectre v2\n"); + pr_err("System may be vulnerable to spectre v2\n"); spectre_v2_bad_module = true; return false; } -- GitLab From cda6b6074cc6f94ba4cb69a6c8928c1cd19fe291 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jan 2018 14:30:23 +0000 Subject: [PATCH 1249/1398] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel (cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b) Despite the fact that all the other code there seems to be doing it, just using set_cpu_cap() in early_intel_init() doesn't actually work. For CPUs with PKU support, setup_pku() calls get_cpu_cap() after c->c_init() has set those feature bits. That resets those bits back to what was queried from the hardware. Turning the bits off for bad microcode is easy to fix. That can just use setup_clear_cpu_cap() to force them off for all CPUs. I was less keen on forcing the feature bits *on* that way, just in case of inconsistencies. I appreciate that the kernel is going to get this utterly wrong if CPU features are not consistent, because it has already applied alternatives by the time secondary CPUs are brought up. But at least if setup_force_cpu_cap() isn't being used, we might have a chance of *detecting* the lack of the corresponding bit and either panicking or refusing to bring the offending CPU online. So ensure that the appropriate feature bits are set within get_cpu_cap() regardless of how many extra times it's called. Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 27 ++++++++------------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cfa026f323cf..60e537dac718 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -718,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) } } +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -812,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2e257f87c527..4097b43cba2d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -140,28 +140,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } - /* - * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - /* Now if any of them are set, check the blacklist and clear the lot */ - if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); - clear_cpu_cap(c, X86_FEATURE_IBRS); - clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); } /* -- GitLab From 7552556f65af7db6b2bad828beb4c633cb7d8533 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 30 Jan 2018 22:13:33 -0600 Subject: [PATCH 1250/1398] x86/paravirt: Remove 'noreplace-paravirt' cmdline option (cherry picked from commit 12c69f1e94c89d40696e83804dd2f0965b5250cd) The 'noreplace-paravirt' option disables paravirt patching, leaving the original pv indirect calls in place. That's highly incompatible with retpolines, unless we want to uglify paravirt even further and convert the paravirt calls to retpolines. As far as I can tell, the option doesn't seem to be useful for much other than introducing surprising corner cases and making the kernel vulnerable to Spectre v2. It was probably a debug option from the early paravirt days. So just remove it. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Andrea Arcangeli Cc: Peter Zijlstra Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: Tim Chen Cc: Rusty Russell Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jason Baron Cc: Paolo Bonzini Cc: Alok Kataria Cc: Arjan Van De Ven Cc: David Woodhouse Cc: Dan Williams Link: https://lkml.kernel.org/r/20180131041333.2x6blhxirc2kclrq@treble Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 -- arch/x86/kernel/alternative.c | 14 -------------- 2 files changed, 16 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c2667aa4634..466c039c622b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops - noreplace-smp [X86-32,SMP] Don't replace SMP instructions with UP alternatives diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 10d5a3d6affc..03b6e5c6cf23 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; -- GitLab From eb99bd6341cb3039d0b9d2b7f89d5f2ff98e9676 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 31 Jan 2018 17:47:03 -0800 Subject: [PATCH 1251/1398] x86/kvm: Update spectre-v1 mitigation (cherry picked from commit 085331dfc6bbe3501fb936e657331ca943827600) Commit 75f139aaf896 "KVM: x86: Add memory barrier on vmcs field lookup" added a raw 'asm("lfence");' to prevent a bounds check bypass of 'vmcs_field_to_offset_table'. The lfence can be avoided in this path by using the array_index_nospec() helper designed for these types of fixes. Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Andrew Honig Cc: kvm@vger.kernel.org Cc: Jim Mattson Link: https://lkml.kernel.org/r/151744959670.6342.3001723920950249067.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fcdcc8802677..feadff312131 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "x86.h" @@ -856,21 +857,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { static inline short vmcs_field_to_offset(unsigned long field) { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); + unsigned short offset; - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + BUILD_BUG_ON(size > SHRT_MAX); + if (field >= size) return -ENOENT; - /* - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a - * generic mechanism. - */ - asm("lfence"); - - if (vmcs_field_to_offset_table[field] == 0) + field = array_index_nospec(field, size); + offset = vmcs_field_to_offset_table[field]; + if (offset == 0) return -ENOENT; - - return vmcs_field_to_offset_table[field]; + return offset; } static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) -- GitLab From fe4333893936daf7c02c3b8afc379bc600b5e286 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 Feb 2018 11:27:20 +0000 Subject: [PATCH 1252/1398] x86/retpoline: Avoid retpolines for built-in __init functions (cherry picked from commit 66f793099a636862a71c59d4a6ba91387b155e0c) There's no point in building init code with retpolines, since it runs before any potentially hostile userspace does. And before the retpoline is actually ALTERNATIVEd into place, for much of it. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/init.h b/include/linux/init.h index e30104ceb86d..8e346d1bd837 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -4,6 +4,13 @@ #include #include +/* Built-in __init functions needn't be compiled with retpoline */ +#if defined(RETPOLINE) && !defined(MODULE) +#define __noretpoline __attribute__((indirect_branch("keep"))) +#else +#define __noretpoline +#endif + /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -39,7 +46,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold notrace __latent_entropy +#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) -- GitLab From 961cb14c615d13a823963585dafdc19ee9e9ba85 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 11:27:21 +0000 Subject: [PATCH 1253/1398] x86/spectre: Simplify spectre_v2 command line parsing (cherry picked from commit 9005c6834c0ffdfe46afa76656bd9276cca864f6) [dwmw2: Use ARRAY_SIZE] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 86 +++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index aec7dafc1f13..957ad443b786 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -118,13 +118,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static void __init spec2_print_if_secure(const char *reason) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static inline bool retp_compiler(void) @@ -139,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { char arg[20]; - int ret; - - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); - if (ret > 0) { - if (match_option(arg, ret, "off")) { - goto disable; - } else if (match_option(arg, ret, "on")) { - spec2_print_if_secure("force enabled on command line."); - return SPECTRE_V2_CMD_FORCE; - } else if (match_option(arg, ret, "retpoline")) { - spec2_print_if_insecure("retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE; - } else if (match_option(arg, ret, "retpoline,amd")) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec2_print_if_insecure("AMD retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_AMD; - } else if (match_option(arg, ret, "retpoline,generic")) { - spec2_print_if_insecure("generic retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; - } else if (match_option(arg, ret, "auto")) { + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } } - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; -disable: - spec2_print_if_insecure("disabled on command line."); - return SPECTRE_V2_CMD_NONE; + } + + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; } /* Check for Skylake-like CPUs (for RSB handling) */ -- GitLab From 4b234a253e52b1ae98449d390cd00f07b96ca7e7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 22:39:23 +0100 Subject: [PATCH 1254/1398] x86/pti: Mark constant arrays as __initconst (cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e) I'm seeing build failures from the two newly introduced arrays that are marked 'const' and '__initdata', which are mutually exclusive: arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init' arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init' The correct annotation is __initconst. Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Ricardo Neri Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Thomas Garnier Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 60e537dac718..08e89ed6aa87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -861,7 +861,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initdata struct x86_cpu_id cpu_no_speculation[] = { +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, @@ -874,7 +874,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { {} }; -static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { { X86_VENDOR_AMD }, {} }; -- GitLab From 34900390e96663cfe5c23e254baaf49664be8836 Mon Sep 17 00:00:00 2001 From: Darren Kenny Date: Fri, 2 Feb 2018 19:12:20 +0000 Subject: [PATCH 1255/1398] x86/speculation: Fix typo IBRS_ATT, which should be IBRS_ALL (cherry picked from commit af189c95a371b59f493dbe0f50c0a09724868881) Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit") Signed-off-by: Darren Kenny Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Tom Lendacky Cc: Andi Kleen Cc: Borislav Petkov Cc: Masami Hiramatsu Cc: Arjan van de Ven Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index df4ececa6ebf..300cc159b4a0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ATT *might* it be avoided. + * CPUs with IBRS_ALL *might* it be avoided. */ static inline void vmexit_fill_RSB(void) { -- GitLab From 19b1d4bdfe5ce543e54292fb1a45c7300398e6aa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Jan 2017 11:58:57 +0100 Subject: [PATCH 1256/1398] KVM: nVMX: kmap() can't fail commit 42cf014d38d8822cce63703a467e00f65d000952 upstream. kmap() can't fail, therefore it will always return a valid pointer. Let's just get rid of the unnecessary checks. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index feadff312131..0726a66c07ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4756,10 +4756,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) return 0; vapic_page = kmap(vmx->nested.virtual_apic_page); - if (!vapic_page) { - WARN_ON(1); - return -ENOMEM; - } __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -9584,11 +9580,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, if (!page) return false; msr_bitmap_l1 = (unsigned long *)kmap(page); - if (!msr_bitmap_l1) { - nested_release_page_clean(page); - WARN_ON(1); - return false; - } memset(msr_bitmap_l0, 0xff, PAGE_SIZE); -- GitLab From 1edccf20b9d82d318f0003ad67b8afed299ae93e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Jan 2017 11:58:58 +0100 Subject: [PATCH 1257/1398] KVM: nVMX: vmx_complete_nested_posted_interrupt() can't fail (cherry picked from commit 6342c50ad12e8ce0736e722184a7dbdea4a3477f) vmx_complete_nested_posted_interrupt() can't fail, let's turn it into a void function. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0726a66c07ab..91d17ab53e2b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4736,7 +4736,7 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; @@ -4747,13 +4747,13 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmx->nested.pi_pending) { vmx->nested.pi_pending = false; if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return 0; + return; max_irr = find_last_bit( (unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr == 256) - return 0; + return; vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); @@ -4766,7 +4766,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } - return 0; } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -10482,7 +10481,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - return vmx_complete_nested_posted_interrupt(vcpu); + vmx_complete_nested_posted_interrupt(vcpu); + return 0; } static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) -- GitLab From b7649e1776706c28d837d423412c1763900e521e Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 1 Aug 2017 14:00:40 -0700 Subject: [PATCH 1258/1398] KVM: nVMX: mark vmcs12 pages dirty on L2 exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570) The host physical addresses of L1's Virtual APIC Page and Posted Interrupt descriptor are loaded into the VMCS02. The CPU may write to these pages via their host physical address while L2 is running, bypassing address-translation-based dirty tracking (e.g. EPT write protection). Mark them dirty on every exit from L2 to prevent them from getting out of sync with dirty tracking. Also mark the virtual APIC page and the posted interrupt descriptor dirty when KVM is virtualizing posted interrupt processing. Signed-off-by: David Matlack Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 91d17ab53e2b..e91deda4864e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4736,6 +4736,28 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4743,18 +4765,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4766,6 +4785,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -8022,6 +8043,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU + * may write to these pages via their host physical address while + * L2 is running, bypassing any address-translation-based dirty + * tracking (e.g. EPT write protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; -- GitLab From 46e24dfc2dfe0062a77538698c84f13b24ce9b2c Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 27 Nov 2017 17:22:25 -0600 Subject: [PATCH 1259/1398] KVM: nVMX: Eliminate vmcs02 pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit de3a0021a60635de96aa92713c1a31a96747d72c) The potential performance advantages of a vmcs02 pool have never been realized. To simplify the code, eliminate the pool. Instead, a single vmcs02 is allocated per VCPU when the VCPU enters VMX operation. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Jim Mattson Signed-off-by: Mark Kanda Reviewed-by: Ameya More Reviewed-by: David Hildenbrand Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 146 +++++++-------------------------------------- 1 file changed, 23 insertions(+), 123 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e91deda4864e..44e40952c7fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -174,7 +174,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -208,7 +207,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -387,13 +386,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -420,15 +412,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -6677,94 +6669,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } -/* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_last_entry(&vmx->nested.vmcs02_pool, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - item->vmcs02.shadow_vmcs = NULL; - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified @@ -7078,6 +6982,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + vmx->nested.vmcs02.vmcs = alloc_vmcs(); + vmx->nested.vmcs02.shadow_vmcs = NULL; + if (!vmx->nested.vmcs02.vmcs) + goto out_vmcs02; + loaded_vmcs_init(&vmx->nested.vmcs02); + if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); @@ -7100,9 +7010,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -7120,6 +7027,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) free_page((unsigned long)vmx->nested.msr_bitmap); out_msr_bitmap: + free_loaded_vmcs(&vmx->nested.vmcs02); + +out_vmcs02: return -ENOMEM; } @@ -7205,7 +7115,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -7221,7 +7131,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -7255,8 +7165,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; @@ -8045,10 +7953,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) /* * The host physical addresses of some pages of guest memory - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU - * may write to these pages via their host physical address while - * L2 is running, bypassing any address-translation-based dirty - * tracking (e.g. EPT write protection). + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. @@ -10212,7 +10121,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; - struct loaded_vmcs *vmcs02; bool ia32e; u32 msr_entry_idx; @@ -10352,17 +10260,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; + vmx->loaded_vmcs = &vmx->nested.vmcs02; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; @@ -10877,10 +10781,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ -- GitLab From ff546f9d83d320bc81b72bd2ccd4ebae45d3d714 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jan 2018 12:16:15 +0100 Subject: [PATCH 1260/1398] KVM: VMX: introduce alloc_loaded_vmcs (cherry picked from commit f21f165ef922c2146cc5bdc620f542953c41714b) Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also allocate an MSR bitmap there. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 44e40952c7fc..da3cc7975556 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3522,11 +3522,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3545,6 +3540,22 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); + return 0; +} + static void free_kvm_area(void) { int cpu; @@ -6943,6 +6954,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) struct vmcs *shadow_vmcs; const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + int r; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -6982,11 +6994,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - vmx->nested.vmcs02.vmcs = alloc_vmcs(); - vmx->nested.vmcs02.shadow_vmcs = NULL; - if (!vmx->nested.vmcs02.vmcs) + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) goto out_vmcs02; - loaded_vmcs_init(&vmx->nested.vmcs02); if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = @@ -9107,17 +9117,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - vmx->loaded_vmcs->shadow_vmcs = NULL; - if (!vmx->loaded_vmcs->vmcs) - goto free_msrs; if (!vmm_exclusive) kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); - loaded_vmcs_init(vmx->loaded_vmcs); + err = alloc_loaded_vmcs(&vmx->vmcs01); if (!vmm_exclusive) kvm_cpu_vmxoff(); + if (err < 0) + goto free_msrs; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; -- GitLab From 6236b782eba37a028972bdfd654773ff2e283a22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Jan 2018 16:51:18 +0100 Subject: [PATCH 1261/1398] KVM: VMX: make MSR bitmaps per-VCPU (cherry picked from commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6) Place the MSR bitmap in struct loaded_vmcs, and update it in place every time the x2apic or APICv state can change. This is rare and the loop can handle 64 MSRs per iteration, in a similar fashion as nested_vmx_prepare_msr_bitmap. This prepares for choosing, on a per-VM basis, whether to intercept the SPEC_CTRL and PRED_CMD MSRs. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Suggested-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 314 ++++++++++++++++----------------------------- 1 file changed, 114 insertions(+), 200 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da3cc7975556..d772e88ae8f3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -110,6 +110,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ @@ -191,6 +199,7 @@ struct loaded_vmcs { struct vmcs *shadow_vmcs; int cpu; int launched; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -429,8 +438,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; - unsigned long *msr_bitmap; - struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -531,6 +538,7 @@ struct vcpu_vmx { unsigned long host_rsp; u8 fail; bool nmi_known_unmasked; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -902,6 +910,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -921,12 +930,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; -static unsigned long *vmx_msr_bitmap_legacy; -static unsigned long *vmx_msr_bitmap_longmode; -static unsigned long *vmx_msr_bitmap_legacy_x2apic; -static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; -static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2520,36 +2523,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; - else if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; - } - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2590,7 +2563,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -3537,6 +3510,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } @@ -3553,7 +3528,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs->shadow_vmcs = NULL; loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; } static void free_kvm_area(void) @@ -4562,10 +4548,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4599,8 +4583,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4634,6 +4618,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, } } +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -4680,58 +4673,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); -} + u8 mode = 0; -static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) -{ - if (apicv_active) { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); - } else { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_R); + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_R); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } } } -static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_W); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_W); - } + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(void) @@ -4976,7 +4979,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -5065,7 +5068,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -6396,7 +6399,7 @@ static void wakeup_handler(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6411,41 +6414,13 @@ static __init int hardware_setup(void) if (!vmx_io_bitmap_b) goto out; - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_legacy_x2apic_apicv_inactive = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) - goto out3; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out4; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out5; - - vmx_msr_bitmap_longmode_x2apic_apicv_inactive = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) - goto out6; - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out7; + goto out1; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out8; + goto out2; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); @@ -6454,12 +6429,9 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out9; + goto out3; } if (boot_cpu_has(X86_FEATURE_NX)) @@ -6516,47 +6488,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - /* - * enable_apicv && kvm_vcpu_apicv_active() - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr, true); - - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839, true); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808, true); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b, true); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f, true); - - /* - * (enable_apicv && !kvm_vcpu_apicv_active()) || - * !enable_apicv - */ - /* TPR */ - vmx_disable_intercept_msr_read_x2apic(0x808, false); - vmx_disable_intercept_msr_write_x2apic(0x808, false); - if (enable_ept) { kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, @@ -6602,22 +6535,10 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out9: - free_page((unsigned long)vmx_vmwrite_bitmap); -out8: - free_page((unsigned long)vmx_vmread_bitmap); -out7: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); -out6: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode); -out4: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_vmwrite_bitmap); out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_vmread_bitmap); out1: free_page((unsigned long)vmx_io_bitmap_b); out: @@ -6628,12 +6549,6 @@ static __init int hardware_setup(void) static __exit void hardware_unsetup(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); @@ -6998,13 +6913,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; @@ -7034,9 +6942,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); - -out_msr_bitmap: free_loaded_vmcs(&vmx->nested.vmcs02); out_vmcs02: @@ -7115,10 +7020,6 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); - if (vmx->nested.msr_bitmap) { - free_page((unsigned long)vmx->nested.msr_bitmap); - vmx->nested.msr_bitmap = NULL; - } if (enable_shadow_vmcs) { vmcs_clear(vmx->vmcs01.shadow_vmcs); free_vmcs(vmx->vmcs01.shadow_vmcs); @@ -8465,7 +8366,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -9085,6 +8986,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -9125,6 +9027,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err < 0) goto free_msrs; + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); @@ -9519,7 +9430,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) @@ -10034,6 +9945,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -10738,7 +10652,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) -- GitLab From 7013129a4034ea2168a0ccb32d7ddfefe9123333 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 1 Feb 2018 22:59:43 +0100 Subject: [PATCH 1262/1398] KVM/x86: Add IBPB support (cherry picked from commit 15d45071523d89b3fb7372e2135fbd72f6af9506) The Indirect Branch Predictor Barrier (IBPB) is an indirect branch control mechanism. It keeps earlier branches from influencing later ones. Unlike IBRS and STIBP, IBPB does not define a new mode of operation. It's a command that ensures predicted branch targets aren't used after the barrier. Although IBRS and IBPB are enumerated by the same CPUID enumeration, IBPB is very different. IBPB helps mitigate against three potential attacks: * Mitigate guests from being attacked by other guests. - This is addressed by issing IBPB when we do a guest switch. * Mitigate attacks from guest/ring3->host/ring3. These would require a IBPB during context switch in host, or after VMEXIT. The host process has two ways to mitigate - Either it can be compiled with retpoline - If its going through context switch, and has set !dumpable then there is a IBPB in that path. (Tim's patch: https://patchwork.kernel.org/patch/10192871) - The case where after a VMEXIT you return back to Qemu might make Qemu attackable from guest when Qemu isn't compiled with retpoline. There are issues reported when doing IBPB on every VMEXIT that resulted in some tsc calibration woes in guest. * Mitigate guest/ring0->host/ring0 attacks. When host kernel is using retpoline it is safe against these attacks. If host kernel isn't using retpoline we might need to do a IBPB flush on every VMEXIT. Even when using retpoline for indirect calls, in certain conditions 'ret' can use the BTB on Skylake-era CPUs. There are other mitigations available like RSB stuffing/clearing. * IBPB is issued only for SVM during svm_free_vcpu(). VMX has a vmclear and SVM doesn't. Follow discussion here: https://lkml.org/lkml/2018/1/15/146 Please refer to the following spec for more details on the enumeration and control. Refer here to get documentation about mitigations. https://software.intel.com/en-us/side-channel-security-support [peterz: rebase and changelog rewrite] [karahmed: - rebase - vmx: expose PRED_CMD if guest has it in CPUID - svm: only pass through IBPB if guest has it in CPUID - vmx: support !cpu_has_vmx_msr_bitmap()] - vmx: support nested] [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS) PRED_CMD is a write-only MSR] Signed-off-by: Ashok Raj Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: David Woodhouse Signed-off-by: KarimAllah Ahmed Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: kvm@vger.kernel.org Cc: Asit Mallick Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Dave Hansen Cc: Arjan Van De Ven Cc: Greg KH Cc: Jun Nakajima Cc: Paolo Bonzini Cc: Dan Williams Cc: Tim Chen Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 11 +++++- arch/x86/kvm/cpuid.h | 12 +++++++ arch/x86/kvm/svm.c | 28 ++++++++++++++++ arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 127 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 91af75e37306..6f24483b2b84 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(IBPB); + /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -607,7 +611,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* IBPB isn't necessarily present in hardware cpuid */ + if (boot_cpu_has(X86_FEATURE_IBPB)) + entry->ebx |= F(IBPB); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9368fecca3ee..ec4f9dc54c70 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -160,6 +160,18 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_RDTSCP)); } +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_IBPB))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 24af898fb3a6..6de8d6510dbe 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -248,6 +248,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -510,6 +511,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -1644,11 +1646,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1677,6 +1685,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } @@ -3599,6 +3611,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; case MSR_STAR: svm->vmcb->save.star = data; break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d772e88ae8f3..b58b7819e890 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -550,6 +550,7 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -911,6 +912,8 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1846,6 +1849,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2255,6 +2281,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -3056,6 +3083,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -9431,9 +9485,23 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd is trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - /* This shortcut is ok because we support only x2APIC MSRs so far. */ - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); @@ -9466,6 +9534,13 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, MSR_TYPE_W); } } + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); nested_release_page_clean(page); -- GitLab From 755502f810c646a1d828fbc66364c252430b6064 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:44 +0100 Subject: [PATCH 1263/1398] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES (cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd) Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO (bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the contents will come directly from the hardware, but user-space can still override it. [dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Reviewed-by: Darren Kenny Reviewed-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Dan Williams Cc: Tim Chen Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 +++++++- arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/vmx.c | 15 +++++++++++++++ arch/x86/kvm/x86.c | 1 + 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6f24483b2b84..9c6493f82094 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(ARCH_CAPABILITIES); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); } else { entry->ebx = 0; entry->ecx = 0; + entry->edx = 0; } entry->eax = 0; - entry->edx = 0; break; } case 9: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index ec4f9dc54c70..8719997dd2e4 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -171,6 +171,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); } +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); +} + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b58b7819e890..1bd0caf87c94 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -551,6 +551,8 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif + u64 arch_capabilities; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -2979,6 +2981,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3110,6 +3118,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -5196,6 +5209,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e023ef981feb..94d1573a717b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; -- GitLab From e5a83419c957edff9290a7e09b951f44af7fa2e2 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:45 +0100 Subject: [PATCH 1264/1398] KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL (cherry picked from commit d28b387fb74da95d69d2615732f50cceb38e9a4d) [ Based on a patch from Ashok Raj ] Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only start saving and restoring when a non-zero is written to it. No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. [dwmw2: Clean up CPUID bits, save/restore manually, handle reset] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Jim Mattson Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Tim Chen Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Paolo Bonzini Cc: Dan Williams Cc: Linus Torvalds Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 ++-- arch/x86/kvm/cpuid.h | 11 +++++ arch/x86/kvm/vmx.c | 103 ++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 2 +- 4 files changed, 118 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9c6493f82094..93f924de06cf 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB); + F(IBPB) | F(IBRS); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(ARCH_CAPABILITIES); + F(SPEC_CTRL) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -618,9 +618,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBPB isn't necessarily present in hardware cpuid */ + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ if (boot_cpu_has(X86_FEATURE_IBPB)) entry->ebx |= F(IBPB); + if (boot_cpu_has(X86_FEATURE_IBRS)) + entry->ebx |= F(IBRS); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 8719997dd2e4..d1beb7156704 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -171,6 +171,17 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); } +static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1bd0caf87c94..d49da86e3099 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -552,6 +552,7 @@ struct vcpu_vmx { #endif u64 arch_capabilities; + u64 spec_ctrl; u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; @@ -1851,6 +1852,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + /* * Check if MSR is intercepted for L01 MSR bitmap. */ @@ -2981,6 +3005,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; case MSR_IA32_ARCH_CAPABILITIES: if (!msr_info->host_initiated && !guest_cpuid_has_arch_capabilities(vcpu)) @@ -3091,6 +3122,36 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_cpuid_has_ibpb(vcpu)) @@ -5239,6 +5300,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->soft_vnmi_blocked = 0; @@ -8824,6 +8886,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_arm_hv_timer(vcpu); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -8942,6 +9013,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -9501,7 +9593,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* - * pred_cmd is trying to verify two things: + * pred_cmd & spec_ctrl are trying to verify two things: * * 1. L0 gave a permission to L1 to actually passthrough the MSR. This * ensures that we do not accidentally generate an L02 MSR bitmap @@ -9514,9 +9606,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, * the MSR. */ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd) + !pred_cmd && !spec_ctrl) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); @@ -9550,6 +9643,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, } } + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + if (pred_cmd) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 94d1573a717b..75f756eac979 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,7 +975,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_ARCH_CAPABILITIES + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; -- GitLab From fc00dde96099a1a331a683a87b23ccb4626d816a Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 3 Feb 2018 15:56:23 +0100 Subject: [PATCH 1265/1398] KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL (cherry picked from commit b2ac58f90540e39324e7a29a7ad471407ae0bf48) [ Based on a patch from Paolo Bonzini ] ... basically doing exactly what we do for VMX: - Passthrough SPEC_CTRL to guests (if enabled in guest CPUID) - Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest actually used it. Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Tim Chen Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Paolo Bonzini Cc: Dan Williams Cc: Linus Torvalds Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6de8d6510dbe..be644afab1bb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -183,6 +183,8 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -248,6 +250,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, @@ -863,6 +866,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1537,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -3520,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3611,6 +3642,33 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && !guest_cpuid_has_ibpb(vcpu)) @@ -4854,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -4946,6 +5013,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- GitLab From 7c17a1e5852a0c1c67a2ac65569270619fe1d576 Mon Sep 17 00:00:00 2001 From: Robert Baronescu Date: Tue, 10 Oct 2017 13:21:59 +0300 Subject: [PATCH 1266/1398] crypto: tcrypt - fix S/G table for test_aead_speed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c6ac1d4f8fbdbed65dbeb8cf149d736409d16a1 upstream. In case buffer length is a multiple of PAGE_SIZE, the S/G table is incorrectly generated. Fix this by handling buflen = k * PAGE_SIZE separately. Signed-off-by: Robert Baronescu Signed-off-by: Herbert Xu Signed-off-by: Horia Geantă Signed-off-by: Greg Kroah-Hartman --- crypto/tcrypt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e3af318af2db..2a07341aca46 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -223,11 +223,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], } sg_init_table(sg, np + 1); - np--; + if (rem) + np--; for (k = 0; k < np; k++) sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE); - sg_set_buf(&sg[k + 1], xbuf[k], rem); + if (rem) + sg_set_buf(&sg[k + 1], xbuf[k], rem); } static void test_aead_speed(const char *algo, int enc, unsigned int secs, -- GitLab From a7de0e9718c3062ed5fad916ceb65f387a29447b Mon Sep 17 00:00:00 2001 From: Julian Scheel Date: Wed, 24 May 2017 12:28:23 +0200 Subject: [PATCH 1267/1398] ASoC: simple-card: Fix misleading error message commit 7ac45d1635a4cd2e99a4b11903d4a2815ca1b27b upstream. In case cpu could not be found the error message would always refer to /codec/ not being found in DT. Fix this by catching the cpu node not found case explicitly. Signed-off-by: Julian Scheel Signed-off-by: Mark Brown Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- sound/soc/generic/simple-card.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index f608f8d23f3d..dd88c2cb6470 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -232,13 +232,19 @@ static int asoc_simple_card_dai_link_of(struct device_node *node, snprintf(prop, sizeof(prop), "%scpu", prefix); cpu = of_get_child_by_name(node, prop); + if (!cpu) { + ret = -EINVAL; + dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); + goto dai_link_of_err; + } + snprintf(prop, sizeof(prop), "%splat", prefix); plat = of_get_child_by_name(node, prop); snprintf(prop, sizeof(prop), "%scodec", prefix); codec = of_get_child_by_name(node, prop); - if (!cpu || !codec) { + if (!codec) { ret = -EINVAL; dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); goto dai_link_of_err; -- GitLab From 24978c21f7ed183e7bb9745a295d066483b6bd48 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 May 2017 01:48:24 +0000 Subject: [PATCH 1268/1398] ASoC: rsnd: don't call free_irq() on Parent SSI commit 1f8754d4daea5f257370a52a30fcb22798c54516 upstream. If SSI uses shared pin, some SSI will be used as parent SSI. Then, normal SSI's remove and Parent SSI's remove (these are same SSI) will be called when unbind or remove timing. In this case, free_irq() will be called twice. This patch solve this issue. Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Reported-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/ssi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 560cf4b51a99..071fa850fed5 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -699,9 +699,14 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); struct device *dev = rsnd_priv_to_dev(priv); int irq = ssi->irq; + /* Do nothing for SSI parent mod */ + if (ssi_parent_mod == mod) + return 0; + /* PIO will request IRQ again */ devm_free_irq(dev, irq, mod); -- GitLab From 1cb145c67260edf54f106f031756a6ae780b7a32 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 9 Aug 2017 02:16:20 +0000 Subject: [PATCH 1269/1398] ASoC: rsnd: avoid duplicate free_irq() commit e0936c3471a8411a5df327641fa3ffe12a2fb07b upstream. commit 1f8754d4daea5f ("ASoC: rsnd: don't call free_irq() on Parent SSI") fixed Parent SSI duplicate free_irq(). But on Renesas Sound, not only Parent SSI but also Multi SSI have same issue. This patch avoid duplicate free_irq() if it was not pure SSI. Fixes: 1f8754d4daea5f ("ASoC: rsnd: don't call free_irq() on Parent SSI") Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/ssi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 071fa850fed5..a9a43acce30e 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -699,12 +699,12 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); - struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); + struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io); struct device *dev = rsnd_priv_to_dev(priv); int irq = ssi->irq; - /* Do nothing for SSI parent mod */ - if (ssi_parent_mod == mod) + /* Do nothing if non SSI (= SSI parent, multi SSI) mod */ + if (pure_ssi_mod != mod) return 0; /* PIO will request IRQ again */ -- GitLab From 758e22acf4fd9aaf51c7bae93a47401b8f792d56 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Jul 2017 23:46:39 +0300 Subject: [PATCH 1270/1398] drm: rcar-du: Use the VBK interrupt for vblank events commit cbbb90b0c084d7dfb2ed8e3fecf8df200fbdd2a0 upstream. When implementing support for interlaced modes, the driver switched from reporting vblank events on the vertical blanking (VBK) interrupt to the frame end interrupt (FRM). This incorrectly divided the reported refresh rate by two. Fix it by moving back to the VBK interrupt. Fixes: 906eff7fcada ("drm: rcar-du: Implement support for interlaced modes") Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index a2ec6d8796a0..848f7f2152ee 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -551,7 +551,7 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) status = rcar_du_crtc_read(rcrtc, DSSR); rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); - if (status & DSSR_FRM) { + if (status & DSSR_VBK) { drm_crtc_handle_vblank(&rcrtc->crtc); rcar_du_crtc_finish_page_flip(rcrtc); ret = IRQ_HANDLED; -- GitLab From 230ca8fb951528f298e06a9c15257df5df85ecbd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 29 Jul 2017 02:31:33 +0300 Subject: [PATCH 1271/1398] drm: rcar-du: Fix race condition when disabling planes at CRTC stop commit 641307df71fe77d7b38a477067495ede05d47295 upstream. When stopping the CRTC the driver must disable all planes and wait for the change to take effect at the next vblank. Merely calling drm_crtc_wait_one_vblank() is not enough, as the function doesn't include any mechanism to handle the race with vblank interrupts. Replace the drm_crtc_wait_one_vblank() call with a manual mechanism that handles the vblank interrupt race. Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 53 +++++++++++++++++++++++--- drivers/gpu/drm/rcar-du/rcar_du_crtc.h | 8 ++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 848f7f2152ee..3322b157106d 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -392,6 +392,31 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc) rcrtc->started = true; } +static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc) +{ + struct rcar_du_device *rcdu = rcrtc->group->dev; + struct drm_crtc *crtc = &rcrtc->crtc; + u32 status; + /* Make sure vblank interrupts are enabled. */ + drm_crtc_vblank_get(crtc); + /* + * Disable planes and calculate how many vertical blanking interrupts we + * have to wait for. If a vertical blanking interrupt has been triggered + * but not processed yet, we don't know whether it occurred before or + * after the planes got disabled. We thus have to wait for two vblank + * interrupts in that case. + */ + spin_lock_irq(&rcrtc->vblank_lock); + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); + status = rcar_du_crtc_read(rcrtc, DSSR); + rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1; + spin_unlock_irq(&rcrtc->vblank_lock); + if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0, + msecs_to_jiffies(100))) + dev_warn(rcdu->dev, "vertical blanking timeout\n"); + drm_crtc_vblank_put(crtc); +} + static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) { struct drm_crtc *crtc = &rcrtc->crtc; @@ -400,17 +425,16 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) return; /* Disable all planes and wait for the change to take effect. This is - * required as the DSnPR registers are updated on vblank, and no vblank - * will occur once the CRTC is stopped. Disabling planes when starting - * the CRTC thus wouldn't be enough as it would start scanning out - * immediately from old frame buffers until the next vblank. + * required as the plane enable registers are updated on vblank, and no + * vblank will occur once the CRTC is stopped. Disabling planes when + * starting the CRTC thus wouldn't be enough as it would start scanning + * out immediately from old frame buffers until the next vblank. * * This increases the CRTC stop delay, especially when multiple CRTCs * are stopped in one operation as we now wait for one vblank per CRTC. * Whether this can be improved needs to be researched. */ - rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); - drm_crtc_wait_one_vblank(crtc); + rcar_du_crtc_disable_planes(rcrtc); /* Disable vertical blanking interrupt reporting. We first need to wait * for page flip completion before stopping the CRTC as userspace @@ -548,9 +572,24 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) irqreturn_t ret = IRQ_NONE; u32 status; + spin_lock(&rcrtc->vblank_lock); + status = rcar_du_crtc_read(rcrtc, DSSR); rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); + if (status & DSSR_VBK) { + /* + * Wake up the vblank wait if the counter reaches 0. This must + * be protected by the vblank_lock to avoid races in + * rcar_du_crtc_disable_planes(). + */ + if (rcrtc->vblank_count) { + if (--rcrtc->vblank_count == 0) + wake_up(&rcrtc->vblank_wait); + } + } + spin_unlock(&rcrtc->vblank_lock); + if (status & DSSR_VBK) { drm_crtc_handle_vblank(&rcrtc->crtc); rcar_du_crtc_finish_page_flip(rcrtc); @@ -606,6 +645,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } init_waitqueue_head(&rcrtc->flip_wait); + init_waitqueue_head(&rcrtc->vblank_wait); + spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; rcrtc->mmio_offset = mmio_offsets[index]; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index 6f08b7e7db06..48bef05b4c62 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -15,6 +15,7 @@ #define __RCAR_DU_CRTC_H__ #include +#include #include #include @@ -33,6 +34,9 @@ struct rcar_du_vsp; * @started: whether the CRTC has been started and is running * @event: event to post when the pending page flip completes * @flip_wait: wait queue used to signal page flip completion + * @vblank_lock: protects vblank_wait and vblank_count + * @vblank_wait: wait queue used to signal vertical blanking + * @vblank_count: number of vertical blanking interrupts to wait for * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC * @group: CRTC group this CRTC belongs to */ @@ -48,6 +52,10 @@ struct rcar_du_crtc { struct drm_pending_vblank_event *event; wait_queue_head_t flip_wait; + spinlock_t vblank_lock; + wait_queue_head_t vblank_wait; + unsigned int vblank_count; + unsigned int outputs; struct rcar_du_group *group; -- GitLab From 2760f452a71899af860355abe818a8b2bd32b2cb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Oct 2017 13:23:16 +0200 Subject: [PATCH 1272/1398] x86/microcode: Do the family check first commit 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 upstream with adjustments. On CPUs like AMD's Geode, for example, we shouldn't even try to load microcode because they do not support the modern microcode loading interface. However, we do the family check *after* the other checks whether the loader has been disabled on the command line or whether we're running in a guest. So move the family checks first in order to exit early if we're being loaded on an unsupported family. Reported-and-tested-by: Sven Glodowski Signed-off-by: Borislav Petkov Cc: # 4.11.. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://bugzilla.suse.com/show_bug.cgi?id=1061396 Link: http://lkml.kernel.org/r/20171012112316.977-1-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Rolf Neugebauer Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/core.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dc0b9f8763ad..0afaf00b029b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -86,9 +86,6 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (!have_cpuid_p()) - return *res; - a = 1; c = 0; native_cpuid(&a, &b, &c, &d); @@ -130,8 +127,9 @@ void __init load_ucode_bsp(void) { int vendor; unsigned int family; + bool intel = true; - if (check_loader_disabled_bsp()) + if (!have_cpuid_p()) return; vendor = x86_cpuid_vendor(); @@ -139,16 +137,27 @@ void __init load_ucode_bsp(void) switch (vendor) { case X86_VENDOR_INTEL: - if (family >= 6) - load_ucode_intel_bsp(); + if (family < 6) + return; break; + case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (family < 0x10) + return; + intel = false; break; + default: - break; + return; } + + if (check_loader_disabled_bsp()) + return; + + if (intel) + load_ucode_intel_bsp(); + else + load_ucode_amd_bsp(family); } static bool check_loader_disabled_ap(void) -- GitLab From 7f3bd8db99746a60bcae1ec4059a4756d19b63c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Feb 2018 12:36:03 +0100 Subject: [PATCH 1273/1398] Linux 4.9.81 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9550b6939076..4d5753f1c37b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 80 +SUBLEVEL = 81 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From ebb45d7f7c3e13911493acd4e4444b73fd137931 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 29 Nov 2016 09:47:14 -0700 Subject: [PATCH 1274/1398] UPSTREAM: coresight: reset "enable_sink" flag when need be When using coresight from the perf interface sinks are specified as part of the perf command line. As such the sink needs to be disabled once it has been acknowledged by the coresight framework. Otherwise the sink stays enabled, which may interfere with other sessions. This patch removes the sink selection check from the build path process and make it a function on it's own. The function is then used when operating from sysFS or perf to determine what sink has been selected. If operated from perf the status of the "enable_sink" flag is reset so that concurrent session can use a different sink. When used from sysFS the status of the flag is left untouched since users have full control. The implementation doesn't handle a scenario where a sink has been enabled from sysFS and another sink is selected from the perf command line as both modes of operation are mutually exclusive. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d52c9750f150111dc7f73e4036f6948b20c9f8c3) Change-Id: Ia099d121f0736a5207a3410b524efaf953566548 --- .../hwtracing/coresight/coresight-etm-perf.c | 31 ++++---- drivers/hwtracing/coresight/coresight-priv.h | 4 +- drivers/hwtracing/coresight/coresight.c | 74 +++++++++++++++++-- 3 files changed, 87 insertions(+), 22 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 2cd7c718198a..5a346fc8ce06 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -202,6 +202,21 @@ static void *etm_setup_aux(int event_cpu, void **pages, if (!event_data) return NULL; + /* + * In theory nothing prevent tracers in a trace session from being + * associated with different sinks, nor having a sink per tracer. But + * until we have HW with this kind of topology we need to assume tracers + * in a trace session are using the same sink. Therefore go through + * the coresight bus and pick the first enabled sink. + * + * When operated from sysFS users are responsible to enable the sink + * while from perf, the perf tools will do it based on the choice made + * on the cmd line. As such the "enable_sink" flag in sysFS is reset. + */ + sink = coresight_get_enabled_sink(true); + if (!sink) + return NULL; + INIT_WORK(&event_data->work, free_event_data); mask = &event_data->mask; @@ -219,25 +234,11 @@ static void *etm_setup_aux(int event_cpu, void **pages, * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev); + event_data->path[cpu] = coresight_build_path(csdev, sink); if (IS_ERR(event_data->path[cpu])) goto err; } - /* - * In theory nothing prevent tracers in a trace session from being - * associated with different sinks, nor having a sink per tracer. But - * until we have HW with this kind of topology and a way to convey - * sink assignement from the perf cmd line we need to assume tracers - * in a trace session are using the same sink. Therefore pick the sink - * found at the end of the first available path. - */ - cpu = cpumask_first(mask); - /* Grab the sink at the end of the path */ - sink = coresight_get_sink(event_data->path[cpu]); - if (!sink) - goto err; - if (!sink_ops(sink)->alloc_buffer) goto err; diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 196a14be4b3d..ef9d8e93e3b2 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -111,7 +111,9 @@ static inline void CS_UNLOCK(void __iomem *addr) void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode); struct coresight_device *coresight_get_sink(struct list_head *path); -struct list_head *coresight_build_path(struct coresight_device *csdev); +struct coresight_device *coresight_get_enabled_sink(bool reset); +struct list_head *coresight_build_path(struct coresight_device *csdev, + struct coresight_device *sink); void coresight_release_path(struct list_head *path); #ifdef CONFIG_CORESIGHT_SOURCE_ETM3X diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 7bf00a0beb6f..0c37356e417c 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -368,6 +368,52 @@ struct coresight_device *coresight_get_sink(struct list_head *path) return csdev; } +static int coresight_enabled_sink(struct device *dev, void *data) +{ + bool *reset = data; + struct coresight_device *csdev = to_coresight_device(dev); + + if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && + csdev->activated) { + /* + * Now that we have a handle on the sink for this session, + * disable the sysFS "enable_sink" flag so that possible + * concurrent perf session that wish to use another sink don't + * trip on it. Doing so has no ramification for the current + * session. + */ + if (*reset) + csdev->activated = false; + + return 1; + } + + return 0; +} + +/** + * coresight_get_enabled_sink - returns the first enabled sink found on the bus + * @deactivate: Whether the 'enable_sink' flag should be reset + * + * When operated from perf the deactivate parameter should be set to 'true'. + * That way the "enabled_sink" flag of the sink that was selected can be reset, + * allowing for other concurrent perf sessions to choose a different sink. + * + * When operated from sysFS users have full control and as such the deactivate + * parameter should be set to 'false', hence mandating users to explicitly + * clear the flag. + */ +struct coresight_device *coresight_get_enabled_sink(bool deactivate) +{ + struct device *dev = NULL; + + dev = bus_find_device(&coresight_bustype, NULL, &deactivate, + coresight_enabled_sink); + + return dev ? to_coresight_device(dev) : NULL; +} + /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. @@ -380,6 +426,7 @@ struct coresight_device *coresight_get_sink(struct list_head *path) * last one. */ static int _coresight_build_path(struct coresight_device *csdev, + struct coresight_device *sink, struct list_head *path) { int i; @@ -387,15 +434,15 @@ static int _coresight_build_path(struct coresight_device *csdev, struct coresight_node *node; /* An activated sink has been found. Enqueue the element */ - if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || - csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && csdev->activated) + if (csdev == sink) goto out; /* Not a sink - recursively explore each port found on this element */ for (i = 0; i < csdev->nr_outport; i++) { struct coresight_device *child_dev = csdev->conns[i].child_dev; - if (child_dev && _coresight_build_path(child_dev, path) == 0) { + if (child_dev && + _coresight_build_path(child_dev, sink, path) == 0) { found = true; break; } @@ -422,18 +469,22 @@ static int _coresight_build_path(struct coresight_device *csdev, return 0; } -struct list_head *coresight_build_path(struct coresight_device *csdev) +struct list_head *coresight_build_path(struct coresight_device *source, + struct coresight_device *sink) { struct list_head *path; int rc; + if (!sink) + return ERR_PTR(-EINVAL); + path = kzalloc(sizeof(struct list_head), GFP_KERNEL); if (!path) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(path); - rc = _coresight_build_path(csdev, path); + rc = _coresight_build_path(source, sink, path); if (rc) { kfree(path); return ERR_PTR(rc); @@ -497,6 +548,7 @@ static int coresight_validate_source(struct coresight_device *csdev, int coresight_enable(struct coresight_device *csdev) { int cpu, ret = 0; + struct coresight_device *sink; struct list_head *path; mutex_lock(&coresight_mutex); @@ -508,7 +560,17 @@ int coresight_enable(struct coresight_device *csdev) if (csdev->enable) goto out; - path = coresight_build_path(csdev); + /* + * Search for a valid sink for this session but don't reset the + * "enable_sink" flag in sysFS. Users get to do that explicitly. + */ + sink = coresight_get_enabled_sink(false); + if (!sink) { + ret = -EINVAL; + goto out; + } + + path = coresight_build_path(csdev, sink); if (IS_ERR(path)) { pr_err("building path(s) failed\n"); ret = PTR_ERR(path); -- GitLab From ef8490f7fdc78992ec4f6a26266efa8b5362ac23 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Tue, 29 Nov 2016 09:47:15 -0700 Subject: [PATCH 1275/1398] UPSTREAM: coresight: tmc: Cleanup operation mode handling The mode of operation of the TMC tracked in drvdata->mode is defined as a local_t type. This is always checked and modified under the drvdata->spinlock and hence we don't need local_t for it and the unnecessary synchronisation instructions that comes with it. This change makes the code a bit more cleaner. Also fixes the order in which we update the drvdata->mode to CS_MODE_DISABLED. i.e, in tmc_disable_etX_sink we change the mode to CS_MODE_DISABLED before invoking tmc_disable_etX_hw() which in turn depends on the mode to decide whether to dump the trace to a buffer. Applies on mathieu's coresight/next tree [1] https://git.linaro.org/kernel/coresight.git next Reported-by: Venkatesh Vivekanandan Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 297ab90f15f6222408380bbf4e8cbff57b02060d) Change-Id: I5f217d87f2e3a3e18764c1b5f909c88bf1a9dc91 --- .../hwtracing/coresight/coresight-tmc-etf.c | 32 ++++++++----------- .../hwtracing/coresight/coresight-tmc-etr.c | 26 +++++++-------- drivers/hwtracing/coresight/coresight-tmc.h | 2 +- 3 files changed, 26 insertions(+), 34 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index d6941ea24d8d..e80a8f4cd12e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -70,7 +70,7 @@ static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) * When operating in sysFS mode the content of the buffer needs to be * read before the TMC is disabled. */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etb_dump_hw(drvdata); tmc_disable_hw(drvdata); @@ -108,7 +108,6 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) int ret = 0; bool used = false; char *buf = NULL; - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -138,13 +137,12 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In sysFS mode we can have multiple writers per sink. Since this * sink is already enabled no memory is needed and the HW need not be * touched. */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) goto out; /* @@ -163,6 +161,7 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) drvdata->buf = buf; } + drvdata->mode = CS_MODE_SYSFS; tmc_etb_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -180,7 +179,6 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, u32 mode) { int ret = 0; - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -194,17 +192,17 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, u32 mode) goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In Perf mode there can be only one writer per sink. There * is also no need to continue if the ETB/ETR is already operated * from sysFS. */ - if (val != CS_MODE_DISABLED) { + if (drvdata->mode != CS_MODE_DISABLED) { ret = -EINVAL; goto out; } + drvdata->mode = mode; tmc_etb_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -227,7 +225,6 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) static void tmc_disable_etf_sink(struct coresight_device *csdev) { - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -237,10 +234,11 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev) return; } - val = local_xchg(&drvdata->mode, CS_MODE_DISABLED); /* Disable the TMC only if it needs to */ - if (val != CS_MODE_DISABLED) + if (drvdata->mode != CS_MODE_DISABLED) { tmc_etb_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + } spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -260,7 +258,7 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, } tmc_etf_enable_hw(drvdata); - local_set(&drvdata->mode, CS_MODE_SYSFS); + drvdata->mode = CS_MODE_SYSFS; spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_info(drvdata->dev, "TMC-ETF enabled\n"); @@ -280,7 +278,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, } tmc_etf_disable_hw(drvdata); - local_set(&drvdata->mode, CS_MODE_DISABLED); + drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_info(drvdata->dev, "TMC disabled\n"); @@ -383,7 +381,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, return; /* This shouldn't happen */ - if (WARN_ON_ONCE(local_read(&drvdata->mode) != CS_MODE_PERF)) + if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) return; CS_UNLOCK(drvdata->base); @@ -504,7 +502,6 @@ const struct coresight_ops tmc_etf_cs_ops = { int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) { - long val; enum tmc_mode mode; int ret = 0; unsigned long flags; @@ -528,9 +525,8 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) goto out; } - val = local_read(&drvdata->mode); /* Don't interfere if operated from Perf */ - if (val == CS_MODE_PERF) { + if (drvdata->mode == CS_MODE_PERF) { ret = -EINVAL; goto out; } @@ -542,7 +538,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) } /* Disable the TMC if need be */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etb_disable_hw(drvdata); drvdata->reading = true; @@ -573,7 +569,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) } /* Re-enable the TMC if need be */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { + if (drvdata->mode == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace * buffer. As such zero-out the buffer so that we don't end diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 886ea83c68e0..f23ef0c23303 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -86,7 +86,7 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) * When operating in sysFS mode the content of the buffer needs to be * read before the TMC is disabled. */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etr_dump_hw(drvdata); tmc_disable_hw(drvdata); @@ -97,7 +97,6 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) { int ret = 0; bool used = false; - long val; unsigned long flags; void __iomem *vaddr = NULL; dma_addr_t paddr; @@ -134,13 +133,12 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In sysFS mode we can have multiple writers per sink. Since this * sink is already enabled no memory is needed and the HW need not be * touched. */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) goto out; /* @@ -157,6 +155,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) memset(drvdata->vaddr, 0, drvdata->size); + drvdata->mode = CS_MODE_SYSFS; tmc_etr_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -174,7 +173,6 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, u32 mode) { int ret = 0; - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -188,17 +186,17 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, u32 mode) goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In Perf mode there can be only one writer per sink. There * is also no need to continue if the ETR is already operated * from sysFS. */ - if (val != CS_MODE_DISABLED) { + if (drvdata->mode != CS_MODE_DISABLED) { ret = -EINVAL; goto out; } + drvdata->mode = CS_MODE_PERF; tmc_etr_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -221,7 +219,6 @@ static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) static void tmc_disable_etr_sink(struct coresight_device *csdev) { - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -231,10 +228,11 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) return; } - val = local_xchg(&drvdata->mode, CS_MODE_DISABLED); /* Disable the TMC only if it needs to */ - if (val != CS_MODE_DISABLED) + if (drvdata->mode != CS_MODE_DISABLED) { tmc_etr_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + } spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -253,7 +251,6 @@ const struct coresight_ops tmc_etr_cs_ops = { int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) { int ret = 0; - long val; unsigned long flags; /* config types are set a boot time and never change */ @@ -266,9 +263,8 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) goto out; } - val = local_read(&drvdata->mode); /* Don't interfere if operated from Perf */ - if (val == CS_MODE_PERF) { + if (drvdata->mode == CS_MODE_PERF) { ret = -EINVAL; goto out; } @@ -280,7 +276,7 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) } /* Disable the TMC if need be */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etr_disable_hw(drvdata); drvdata->reading = true; @@ -303,7 +299,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) spin_lock_irqsave(&drvdata->spinlock, flags); /* RE-enable the TMC if need be */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { + if (drvdata->mode == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace * buffer. The trace buffer is cleared in tmc_etr_enable_hw(), diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 44b3ae346118..51c01851533e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -117,7 +117,7 @@ struct tmc_drvdata { void __iomem *vaddr; u32 size; u32 len; - local_t mode; + u32 mode; enum tmc_config_type config_type; enum tmc_mem_intf_width memwidth; u32 trigger_cntr; -- GitLab From d61eabcc2b0064f4a195389bab5cf145cbd88462 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Tue, 29 Nov 2016 09:47:16 -0700 Subject: [PATCH 1276/1398] UPSTREAM: coresight: tmc: Get rid of mode parameter for helper routines Get rid of the superfluous mode parameter and the check for the mode in tmc_etX_enable_sink_{perf/sysfs}. While at it, also remove the unnecessary WARN_ON() checks. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c38e505e27017268b86ca89e51582a57e67b39b7) Change-Id: I0394e5dd6335d1764d2ee6526a1c32d62d661bcd --- .../hwtracing/coresight/coresight-tmc-etf.c | 18 +++++------------- .../hwtracing/coresight/coresight-tmc-etr.c | 15 ++++----------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e80a8f4cd12e..1549436e2492 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -103,7 +103,7 @@ static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) { int ret = 0; bool used = false; @@ -111,10 +111,6 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_SYSFS)) - return -EINVAL; - /* * If we don't have a buffer release the lock and allocate memory. * Otherwise keep the lock and move along. @@ -176,16 +172,12 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) return ret; } -static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etf_sink_perf(struct coresight_device *csdev) { int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_PERF)) - return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { ret = -EINVAL; @@ -202,7 +194,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, u32 mode) goto out; } - drvdata->mode = mode; + drvdata->mode = CS_MODE_PERF; tmc_etb_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -214,9 +206,9 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) { switch (mode) { case CS_MODE_SYSFS: - return tmc_enable_etf_sink_sysfs(csdev, mode); + return tmc_enable_etf_sink_sysfs(csdev); case CS_MODE_PERF: - return tmc_enable_etf_sink_perf(csdev, mode); + return tmc_enable_etf_sink_perf(csdev); } /* We shouldn't be here */ diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index f23ef0c23303..3b84d0d38c22 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -93,7 +93,7 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { int ret = 0; bool used = false; @@ -102,9 +102,6 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) dma_addr_t paddr; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_SYSFS)) - return -EINVAL; /* * If we don't have a buffer release the lock and allocate memory. @@ -170,16 +167,12 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) return ret; } -static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etr_sink_perf(struct coresight_device *csdev) { int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_PERF)) - return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { ret = -EINVAL; @@ -208,9 +201,9 @@ static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) { switch (mode) { case CS_MODE_SYSFS: - return tmc_enable_etr_sink_sysfs(csdev, mode); + return tmc_enable_etr_sink_sysfs(csdev); case CS_MODE_PERF: - return tmc_enable_etr_sink_perf(csdev, mode); + return tmc_enable_etr_sink_perf(csdev); } /* We shouldn't be here */ -- GitLab From 788e3f2a25f904b325698593671ba40886dc7f44 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Tue, 29 Nov 2016 09:47:17 -0700 Subject: [PATCH 1277/1398] UPSTREAM: coresight: tmc: Remove duplicate memset The tmc_etr_enable_hw() fills the buffer with 0's before enabling the hardware. So, we don't need an explicit memset() in tmc_enable_etr_sink_sysfs() before calling the tmc_etr_enable_hw(). This patch removes the explicit memset from tmc_enable_etr_sink_sysfs. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 675a502305c49c6778a53ac47061783defc386ad) Change-Id: I36354f6124f2e25c7266d7dcd76029a5fb8bcb3d --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 3b84d0d38c22..5d312699b3b9 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -150,8 +150,6 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) drvdata->buf = drvdata->vaddr; } - memset(drvdata->vaddr, 0, drvdata->size); - drvdata->mode = CS_MODE_SYSFS; tmc_etr_enable_hw(drvdata); out: -- GitLab From 1576bec69edf73e698c8dacceea61173caa43764 Mon Sep 17 00:00:00 2001 From: Quentin Lambert Date: Tue, 29 Nov 2016 09:47:19 -0700 Subject: [PATCH 1278/1398] UPSTREAM: coresight: perf: Add a missing call to etm_free_aux Most error branches following the call to alloc_event_data contain a call to etm_free_aux. This patch add a call to etm_free_aux to an error branch that does not call it. This issue was found with Hector. Signed-off-by: Quentin Lambert Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ec98960e0bbbe49b569b09c14ccd82efeb232dc8) Change-Id: I69e4867aa0df992a4251da8dbdc99c37c41a6d35 --- drivers/hwtracing/coresight/coresight-etm-perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 5a346fc8ce06..17741969026e 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -215,7 +215,7 @@ static void *etm_setup_aux(int event_cpu, void **pages, */ sink = coresight_get_enabled_sink(true); if (!sink) - return NULL; + goto err; INIT_WORK(&event_data->work, free_event_data); -- GitLab From 3ed60046769f28f17241b8f9e5e444bd4ae2e147 Mon Sep 17 00:00:00 2001 From: Robert Walker Date: Mon, 18 Dec 2017 11:05:44 -0700 Subject: [PATCH 1279/1398] UPSTREAM: coresight: Fix disabling of CoreSight TPIU The CoreSight TPIU should be disabled when tracing to other sinks to allow them to operate at full bandwidth. This patch fixes tpiu_disable_hw() to correctly disable the TPIU by configuring the TPIU to stop on flush, initiating a manual flush, waiting for the flush to complete and then waits for the TPIU to indicate it has stopped. Signed-off-by: Robert Walker Tested-by: Mike Leach Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 11595db8e17faaa05fadc25746c870e31276962f) Change-Id: Ic59805e784442dfe6ebe1a4eb0856805c508785a --- drivers/hwtracing/coresight/coresight-tpiu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 0673baf0f2f5..ff579a7c6d00 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -46,8 +46,11 @@ #define TPIU_ITATBCTR0 0xef8 /** register definition **/ +/* FFSR - 0x300 */ +#define FFSR_FT_STOPPED BIT(1) /* FFCR - 0x304 */ #define FFCR_FON_MAN BIT(6) +#define FFCR_STOP_FI BIT(12) /** * @base: memory mapped base address for this component. @@ -85,10 +88,14 @@ static void tpiu_disable_hw(struct tpiu_drvdata *drvdata) { CS_UNLOCK(drvdata->base); - /* Clear formatter controle reg. */ - writel_relaxed(0x0, drvdata->base + TPIU_FFCR); + /* Clear formatter and stop on flush */ + writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR); /* Generate manual flush */ - writel_relaxed(FFCR_FON_MAN, drvdata->base + TPIU_FFCR); + writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR); + /* Wait for flush to complete */ + coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN, 0); + /* Wait for formatter to stop */ + coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED, 1); CS_LOCK(drvdata->base); } -- GitLab From 61be6a1e672bbad9df06abb6c80b225b7e7a15a4 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 23 Jan 2017 10:41:22 -0700 Subject: [PATCH 1280/1398] UPSTREAM: coresight: fix kernel panic caused by invalid CPU Commit d52c9750f150 ("coresight: reset "enable_sink" flag when need be") caused a kernel panic because of the using of an invalid value: after 'for_each_cpu(cpu, mask)', value of local variable 'cpu' become invalid, causes following 'cpu_to_node' access invalid memory area. This patch brings the deleted 'cpu = cpumask_first(mask)' back. Panic log: $ perf record -e cs_etm// ls Unable to handle kernel paging request at virtual address fffe801804af4f10 pgd = ffff8017ce031600 [fffe801804af4f10] *pgd=0000000000000000, *pud=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: CPU: 33 PID: 1619 Comm: perf Not tainted 4.7.1+ #16 Hardware name: Huawei Taishan 2280 /CH05TEVBA, BIOS 1.10 11/24/2016 task: ffff8017cb0c8400 ti: ffff8017cb154000 task.ti: ffff8017cb154000 PC is at tmc_alloc_etf_buffer+0x60/0xd4 LR is at tmc_alloc_etf_buffer+0x44/0xd4 pc : [] lr : [] pstate: 60000145 sp : ffff8017cb157b40 x29: ffff8017cb157b40 x28: 0000000000000000 ...skip... 7a60: ffff000008c64dc8 0000000000000006 0000000000000253 ffffffffffffffff 7a80: 0000000000000000 0000000000000000 ffff0000080872cc 0000000000000001 [] tmc_alloc_etf_buffer+0x60/0xd4 [] etm_setup_aux+0x1dc/0x1e8 [] rb_alloc_aux+0x2b0/0x338 [] perf_mmap+0x414/0x568 [] mmap_region+0x324/0x544 [] do_mmap+0x334/0x3e0 [] vm_mmap_pgoff+0xa4/0xc8 [] SyS_mmap_pgoff+0xb0/0x22c [] sys_mmap+0x18/0x28 [] el0_svc_naked+0x24/0x28 Code: 912040a5 d0001c00 f873d821 911c6000 (b8656822) ---[ end trace 98933da8f92b0c9a ]--- Signed-off-by: Wang Nan Cc: Xia Kaixu Cc: Li Zefan Cc: Mathieu Poirier Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Fixes: d52c9750f150 ("coresight: reset "enable_sink" flag when need be") Signed-off-by: Mathieu Poirier Cc: stable # 4.10 Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f09444639099584bc4784dfcd85ada67c6f33e0f) Change-Id: I6b42e02c2e85f4d480fbf138527eed4895ec100c --- drivers/hwtracing/coresight/coresight-etm-perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 17741969026e..26cfac3e6de7 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -242,6 +242,7 @@ static void *etm_setup_aux(int event_cpu, void **pages, if (!sink_ops(sink)->alloc_buffer) goto err; + cpu = cpumask_first(mask); /* Get the AUX specific data from the sink buffer */ event_data->snk_config = sink_ops(sink)->alloc_buffer(sink, cpu, pages, -- GitLab From 0d968ed1ad963f5d1252b8c3ff06e76d604a7529 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 5 Jun 2017 14:15:04 -0600 Subject: [PATCH 1281/1398] UPSTREAM: coresight: etm_perf: Fix using uninitialised work With 4.11-rc4, the following command triggers a WARN_ON, when a sink is not enabled. perf record -e cs_etm/@20010000.etf/ [88286.547741] ------------[ cut here ]------------ [88286.552332] WARNING: CPU: 3 PID: 2156 at kernel/workqueue.c:1442 __queue_work+0x29c/0x3b8 [88286.560427] Modules linked in: [88286.563451] [88286.564928] CPU: 3 PID: 2156 Comm: perf_v4.11 Not tainted 4.11.0-rc4 #217 [88286.573453] Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Aug 15 2016 [88286.584128] task: ffff80097597c200 task.stack: ffff8009768b0000 [88286.589990] PC is at __queue_work+0x29c/0x3b8 [88286.594303] LR is at __queue_work+0x104/0x3b8 [88286.598614] pc : [] lr : [] pstate: a00001c5 [88286.605934] sp : ffff8009768b3aa0 [88286.609212] x29: ffff8009768b3aa0 x28: ffff80097ff3da00 [88286.614477] x27: ffff80097ff89c00 x26: ffff8009751b0e00 [88286.619741] x25: ffff000008c9f000 x24: 0000000000000003 [88286.625004] x23: 0000000000000040 x22: ffff000008d3dab8 [88286.630268] x21: ffff800977804400 x20: 0000000000000007 [88286.635532] x19: ffff000008c54000 x18: 0000fffff9185160 [88286.640795] x17: 0000ffffb33d9a38 x16: ffff000008088270 [88286.646059] x15: 0000ffffb345b590 x14: 0000000000000000 [88286.651322] x13: 0000000000000004 x12: 0000000000000040 [88286.656586] x11: 0000000000000068 x10: 0000000000000000 [88286.661849] x9 : ffff800977400028 x8 : 0000000000000000 [88286.667113] x7 : 0000000000000000 x6 : ffff0000080d8ae4 [88286.672376] x5 : 0000000000000000 x4 : 0000000000000080 [88286.677639] x3 : 0000000000000000 x2 : 0000000000000000 [88286.682903] x1 : 0000000000000000 x0 : ffff8009751b0e08 [88286.688166] [88286.689638] ---[ end trace 31633f18fd33d4cb ]--- [88286.694206] Call trace: [88286.696627] Exception stack(0xffff8009768b38d0 to 0xffff8009768b3a00) [88286.703004] 38c0: ffff000008c54000 0001000000000000 [88286.710757] 38e0: ffff8009768b3aa0 ffff0000080d8c7c ffff8009768b3b50 ffff80097ff8a5b0 [88286.718511] 3900: 0000800977325000 0000000000000000 0000000000000040 ffff80097ffc6180 [88286.726264] 3920: ffff8009768b3940 ffff0000088a8694 ffff80097ffc5800 0000000000000000 [88286.734017] 3940: ffff8009768b3960 ffff0000081919c0 ffff80097ffc5280 0000000000000001 [88286.741771] 3960: ffff8009768b3a50 ffff00000819206c ffff8009751b0e08 0000000000000000 [88286.749523] 3980: 0000000000000000 0000000000000000 0000000000000080 0000000000000000 [88286.757277] 39a0: ffff0000080d8ae4 0000000000000000 0000000000000000 ffff800977400028 [88286.765029] 39c0: 0000000000000000 0000000000000068 0000000000000040 0000000000000004 [88286.772783] 39e0: 0000000000000000 0000ffffb345b590 ffff000008088270 0000ffffb33d9a38 [88286.780537] [] __queue_work+0x29c/0x3b8 [88286.785883] [] queue_work_on+0x60/0x78 [88286.791146] [] etm_setup_aux+0x178/0x238 [88286.796578] [] rb_alloc_aux+0x228/0x310 [88286.801925] [] perf_mmap+0x404/0x5a8 [88286.807015] [] mmap_region+0x394/0x5c0 [88286.812276] [] do_mmap+0x254/0x388 [88286.817191] [] vm_mmap_pgoff+0xbc/0xe0 [88286.822452] [] SyS_mmap_pgoff+0xac/0x228 [88286.827884] [] sys_mmap+0x18/0x28 [88286.832714] [] el0_svc_naked+0x24/0x28 The patch makes sure that the event_data->work is initialised properly before we could possibly use it. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Tested-by: Mike Leach Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d755209f6afddecfb1bb33efebb2a87039959205) Change-Id: I7a12c9fcf58e5ec2eb5998d0d6cc2cfbef289fcd --- drivers/hwtracing/coresight/coresight-etm-perf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 26cfac3e6de7..df6331536938 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -201,6 +201,7 @@ static void *etm_setup_aux(int event_cpu, void **pages, event_data = alloc_event_data(event_cpu); if (!event_data) return NULL; + INIT_WORK(&event_data->work, free_event_data); /* * In theory nothing prevent tracers in a trace session from being @@ -217,8 +218,6 @@ static void *etm_setup_aux(int event_cpu, void **pages, if (!sink) goto err; - INIT_WORK(&event_data->work, free_event_data); - mask = &event_data->mask; /* Setup the path for each CPU in a trace session */ -- GitLab From 62593d16d539cd09c792d6ec963a7444bcf258f9 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 10 Dec 2015 11:36:15 -0700 Subject: [PATCH 1282/1398] FROMLIST: coresight: tmc: implementing TMC-ETR AUX space API This patch implement the AUX area interfaces required to use the TMC (configured as an ETR) from the Perf sub-system. The ETR is configured to work with contiguous memory only. Although not optimal, it allows the IP block to be used while the scatter-gather mode of operation is being worked on. The heuristic is heavily borrowed from the ETB10 and TMC-ETF implementation. Signed-off-by: Mathieu Poirier (cherry picked from commit ea817df0ff9e657c2bc1014787562dbf49bfdccd) Change-Id: I6e5e28b63e6ffbfd0bde8db8c279206fec55ac5e --- .../hwtracing/coresight/coresight-tmc-etr.c | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 5d312699b3b9..2db48576be5b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -15,11 +15,30 @@ * this program. If not, see . */ +#include #include #include +#include + #include "coresight-priv.h" #include "coresight-tmc.h" +/** + * struct cs_etr_buffer - keep track of a recording session' specifics + * @tmc: generic portion of the TMC buffers + * @paddr: the physical address of a DMA'able contiguous memory area + * @vaddr: the virtual address associated to @paddr + * @size: how much memory we have, starting at @paddr + * @dev: the device @vaddr has been tied to + */ +struct cs_etr_buffers { + struct cs_buffers tmc; + dma_addr_t paddr; + void __iomem *vaddr; + u32 size; + struct device *dev; +}; + static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl; @@ -230,9 +249,233 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) dev_info(drvdata->dev, "TMC-ETR disabled\n"); } +static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, int cpu, + void **pages, int nr_pages, bool overwrite) +{ + int node; + struct cs_etr_buffers *buf; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (cpu == -1) + cpu = smp_processor_id(); + node = cpu_to_node(cpu); + + /* Allocate memory structure for interaction with Perf */ + buf = kzalloc_node(sizeof(struct cs_etr_buffers), GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->dev = drvdata->dev; + buf->size = drvdata->size; + buf->vaddr = dma_alloc_coherent(buf->dev, buf->size, + &buf->paddr, GFP_KERNEL); + if (!buf->vaddr) { + kfree(buf); + return NULL; + } + + buf->tmc.snapshot = overwrite; + buf->tmc.nr_pages = nr_pages; + buf->tmc.data_pages = pages; + + return buf; +} + +static void tmc_free_etr_buffer(void *config) +{ + struct cs_etr_buffers *buf = config; + + dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->paddr); + kfree(buf); +} + +static int tmc_set_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config) +{ + int ret = 0; + unsigned long head; + struct cs_etr_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + /* wrap head around to the amount of space we have */ + head = handle->head & ((buf->tmc.nr_pages << PAGE_SHIFT) - 1); + + /* find the page to write to */ + buf->tmc.cur = head / PAGE_SIZE; + + /* and offset within that page */ + buf->tmc.offset = head % PAGE_SIZE; + + local_set(&buf->tmc.data_size, 0); + + /* Tell the HW where to put the trace data */ + drvdata->vaddr = buf->vaddr; + drvdata->paddr = buf->paddr; + memset(drvdata->vaddr, 0, drvdata->size); + + return ret; +} + +static unsigned long tmc_reset_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config, bool *lost) +{ + long size = 0; + struct cs_etr_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (buf) { + /* + * In snapshot mode ->data_size holds the new address of the + * ring buffer's head. The size itself is the whole address + * range since we want the latest information. + */ + if (buf->tmc.snapshot) { + size = buf->tmc.nr_pages << PAGE_SHIFT; + handle->head = local_xchg(&buf->tmc.data_size, size); + } + + /* + * Tell the tracer PMU how much we got in this run and if + * something went wrong along the way. Nobody else can use + * this cs_etr_buffers instance until we are done. As such + * resetting parameters here and squaring off with the ring + * buffer API in the tracer PMU is fine. + */ + *lost = !!local_xchg(&buf->tmc.lost, 0); + size = local_xchg(&buf->tmc.data_size, 0); + } + + /* Get ready for another run */ + drvdata->vaddr = NULL; + drvdata->paddr = 0; + + return size; +} + +static void tmc_update_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config) +{ + int i, cur; + u32 *buf_ptr; + u32 read_ptr, write_ptr; + u32 status, to_read; + unsigned long offset; + struct cs_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (!buf) + return; + + /* This shouldn't happen */ + if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) + return; + + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + + read_ptr = readl_relaxed(drvdata->base + TMC_RRP); + write_ptr = readl_relaxed(drvdata->base + TMC_RWP); + + /* + * Get a hold of the status register and see if a wrap around + * has occurred. If so adjust things accordingly. + */ + status = readl_relaxed(drvdata->base + TMC_STS); + if (status & TMC_STS_FULL) { + local_inc(&buf->lost); + to_read = drvdata->size; + } else { + to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); + } + + /* + * The TMC RAM buffer may be bigger than the space available in the + * perf ring buffer (handle->size). If so advance the RRP so that we + * get the latest trace data. + */ + if (to_read > handle->size) { + u32 buffer_start, mask = 0; + + /* Read buffer start address in system memory */ + buffer_start = readl_relaxed(drvdata->base + TMC_DBALO); + + /* + * The value written to RRP must be byte-address aligned to + * the width of the trace memory databus _and_ to a frame + * boundary (16 byte), whichever is the biggest. For example, + * for 32-bit, 64-bit and 128-bit wide trace memory, the four + * LSBs must be 0s. For 256-bit wide trace memory, the five + * LSBs must be 0s. + */ + switch (drvdata->memwidth) { + case TMC_MEM_INTF_WIDTH_32BITS: + case TMC_MEM_INTF_WIDTH_64BITS: + case TMC_MEM_INTF_WIDTH_128BITS: + mask = GENMASK(31, 5); + break; + case TMC_MEM_INTF_WIDTH_256BITS: + mask = GENMASK(31, 6); + break; + } + + /* + * Make sure the new size is aligned in accordance with the + * requirement explained above. + */ + to_read = handle->size & mask; + /* Move the RAM read pointer up */ + read_ptr = (write_ptr + drvdata->size) - to_read; + /* Make sure we are still within our limits */ + if (read_ptr > (buffer_start + (drvdata->size - 1))) + read_ptr -= drvdata->size; + /* Tell the HW */ + writel_relaxed(read_ptr, drvdata->base + TMC_RRP); + local_inc(&buf->lost); + } + + cur = buf->cur; + offset = buf->offset; + + /* for every byte to read */ + for (i = 0; i < to_read; i += 4) { + buf_ptr = buf->data_pages[cur] + offset; + *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD); + + offset += 4; + if (offset >= PAGE_SIZE) { + offset = 0; + cur++; + /* wrap around at the end of the buffer */ + cur &= buf->nr_pages - 1; + } + } + + /* + * In snapshot mode all we have to do is communicate to + * perf_aux_output_end() the address of the current head. In full + * trace mode the same function expects a size to move rb->aux_head + * forward. + */ + if (buf->snapshot) + local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); + else + local_add(to_read, &buf->data_size); + + CS_LOCK(drvdata->base); +} + static const struct coresight_ops_sink tmc_etr_sink_ops = { .enable = tmc_enable_etr_sink, .disable = tmc_disable_etr_sink, + .alloc_buffer = tmc_alloc_etr_buffer, + .free_buffer = tmc_free_etr_buffer, + .set_buffer = tmc_set_etr_buffer, + .reset_buffer = tmc_reset_etr_buffer, + .update_buffer = tmc_update_etr_buffer, }; const struct coresight_ops tmc_etr_cs_ops = { -- GitLab From 1a938310b8af142ab21278da180e36c6d920d201 Mon Sep 17 00:00:00 2001 From: shiwanglai Date: Wed, 7 Feb 2018 16:47:02 +0800 Subject: [PATCH 1283/1398] FROMLIST: coresight: ETM: Add support for ARM Cortex-A73 Add ARM Cortex A-73 ETM PIDs to the known ETM ips. While at it also add description of the CPU to which the ETM belongs, to make it easier to identify the ETM devices. Change-Id: I031859b31e99d8b1fcd41474d4d339b685ef0ef4 Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose --- drivers/hwtracing/coresight/coresight-etm4x.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 4db8d6a4d0cb..83358ebad762 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1026,7 +1026,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) } pm_runtime_put(&adev->dev); - dev_info(dev, "%s initialized\n", (char *)id->data); + dev_info(dev, "CPU%d: %s initialized\n", + drvdata->cpu, (char *)id->data); if (boot_enable) { coresight_enable(drvdata->csdev); @@ -1045,20 +1046,25 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) } static struct amba_id etm4_ids[] = { - { /* ETM 4.0 - Cortex-A53 */ + { .id = 0x000bb95d, .mask = 0x000fffff, - .data = "ETM 4.0", + .data = "Cortex-A53 ETM v4.0", }, - { /* ETM 4.0 - Cortex-A57 */ + { .id = 0x000bb95e, .mask = 0x000fffff, - .data = "ETM 4.0", + .data = "Cortex-A57 ETM v4.0", }, - { /* ETM 4.0 - A72, Maia, HiSilicon */ + { .id = 0x000bb95a, .mask = 0x000fffff, - .data = "ETM 4.0", + .data = "Cortex-A72 ETM v4.0", + }, + { + .id = 0x000bb959, + .mask = 0x000fffff, + .data = "Cortex-A73 ETM v4.0", }, { 0, 0}, }; -- GitLab From d2c57b60569e8e2fe88eecfa1abe5e6aae217801 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 25 Aug 2017 15:57:04 -0700 Subject: [PATCH 1284/1398] time: Fix ktime_get_raw() incorrect base accumulation In comqit fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time handling"), the following code got mistakenly added to the update of the raw timekeeper: /* Update the monotonic raw base */ seconds = tk->raw_sec; nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); Which adds the raw_sec value and the shifted down raw xtime_nsec to the base value. But the read function adds the shifted down tk->tkr_raw.xtime_nsec value another time, The result of this is that ktime_get_raw() users (which are all internal users) see the raw time move faster then it should (the rate at which can vary with the current size of tkr_raw.xtime_nsec), which has resulted in at least problems with graphics rendering performance. The change tried to match the monotonic base update logic: seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec = (u32) tk->wall_to_monotonic.tv_nsec; tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); Which adds the wall_to_monotonic.tv_nsec value, but not the tk->tkr_mono.xtime_nsec value to the base. To fix this, simplify the tkr_raw.base accumulation to only accumulate the raw_sec portion, and do not include the tkr_raw.xtime_nsec portion, which will be added at read time. Fixes: fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time handling") Reported-and-tested-by: Chris Wilson Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Kevin Brodsky Cc: Richard Cochran Cc: Stephen Boyd Cc: Will Deacon Cc: Miroslav Lichvar Cc: Daniel Mentz Link: http://lkml.kernel.org/r/1503701824-1645-1-git-send-email-john.stultz@linaro.org (cherry picked from commit 0bcdc0987cce9880436b70836c6a92bb8e744fd1) Change-Id: I91d552bef42005d954f77963beafdca3cb6eb246 --- kernel/time/timekeeping.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 17c0d1490754..427e33dbb98d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -636,9 +636,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) tk->ktime_sec = seconds; /* Update the monotonic raw base */ - seconds = tk->raw_sec; - nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); - tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); } /* must hold timekeeper_lock */ -- GitLab From 2a5cc53ea19ee8c8364a291745f73832673d2c04 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 31 Aug 2017 10:37:00 +0200 Subject: [PATCH 1285/1398] BACKPORT: xfrm: Fix return value check of copy_sec_ctx. A recent commit added an output_mark. When copying this output_mark, the return value of copy_sec_ctx is overwitten without a check. Fix this by copying the output_mark before the security context. Fixes: 077fbac405bf ("net: xfrm: support setting an output mark.") Signed-off-by: Steffen Klassert (cherry picked from commit 8598112d04af21cf6c895670e72dcb8a9f58e74f) Change-Id: I25e9ac6cf79dc8d0ee599bbd23e9d5b5f34a4284 --- net/xfrm/xfrm_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2cade0276299..b09febe8782e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -872,13 +872,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, &x->replay); if (ret) goto out; - if (x->security) - ret = copy_sec_ctx(x->security, skb); if (x->props.output_mark) { ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark); if (ret) goto out; } + if (x->security) + ret = copy_sec_ctx(x->security, skb); out: return ret; } -- GitLab From 35724bccae7926557ebffd1f41f878616f5dff03 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 1 Mar 2017 13:29:48 -0800 Subject: [PATCH 1286/1398] BACKPORT: tcp: fix potential double free issue for fastopen_req commit 7db92362d2fee5887f6b0c41653b8c9f8f5d6020 upstream. tp->fastopen_req could potentially be double freed if a malicious user does the following: 1. Enable TCP_FASTOPEN_CONNECT sockopt and do a connect() on the socket. 2. Call connect() with AF_UNSPEC to disconnect the socket. 3. Make this socket a listening socket by calling listen(). 4. Accept incoming connections and generate child sockets. All child sockets will get a copy of the pointer of fastopen_req. 5. Call close() on all sockets. fastopen_req will get freed multiple times. Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support") Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Change-Id: I6bb174d6997470643df5ba34e1f9858e16f4989d Fixes: Change-Id: Icc181febd74e3117c2fc835d7ed935e107b5815e ("BACKPORT: net/tcp-fastopen: Add new API support") Signed-off-by: Amit Pundir --- net/ipv4/tcp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f701b16e068..ee3dde5ff32f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1115,9 +1115,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags); - inet->defer_connect = 0; - *copied = tp->fastopen_req->copied; - tcp_free_fastopen_req(tp); + /* fastopen_req could already be freed in __inet_stream_connect + * if the connection times out or gets rst + */ + if (tp->fastopen_req) { + *copied = tp->fastopen_req->copied; + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; + } return err; } @@ -2331,6 +2336,10 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); + /* Clean up fastopen related fields */ + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); if (sk->sk_frag.page) { -- GitLab From a25ea24f7b7d71c7338713bfe050342c3289aa86 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 26 Apr 2017 19:07:35 +0200 Subject: [PATCH 1287/1398] UPSTREAM: tcp: fix access to sk->sk_state in tcp_poll() commit d68be71ea14d609a5f31534003319be5db422595 upstream. avoid direct access to sk->sk_state when tcp_poll() is called on a socket using active TCP fastopen with deferred connect. Use local variable 'state', which stores the result of sk_state_load(), like it was done in commit 00fd38d938db ("tcp: ensure proper barriers in lockless contexts"). Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support") Signed-off-by: Davide Caratti Acked-by: Wei Wang Signed-off-by: David S. Miller Change-Id: I5f5510a070a3d634bac28c4edbb64e362bab59b0 Fixes: Change-Id: Icc181febd74e3117c2fc835d7ed935e107b5815e ("BACKPORT: net/tcp-fastopen: Add new API support") Signed-off-by: Amit Pundir --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ee3dde5ff32f..7ea802775a98 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -538,7 +538,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; - } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect * Return POLLOUT so application can call write() * in order for kernel to generate SYN+data -- GitLab From ba4f9c192d3b655af9a24e0b8273e58d9ed8aa63 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 15 Jan 2018 14:30:03 +0100 Subject: [PATCH 1288/1398] powerpc/pseries: include linux/types.h in asm/hvcall.h commit 1b689a95ce7427075f9ac9fb4aea1af530742b7f upstream. Commit 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") uses u64 in asm/hvcall.h without including linux/types.h This breaks hvcall.h users that do not include the header themselves. Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/hvcall.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 0e12cb2437d1..dc0996b9d75d 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -319,6 +319,7 @@ #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 #ifndef __ASSEMBLY__ +#include /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments -- GitLab From ee6858f72a39226728e5085fc7388785a8ddc5d3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 15 Dec 2017 12:48:32 -0800 Subject: [PATCH 1289/1398] cifs: Fix missing put_xid in cifs_file_strict_mmap commit f04a703c3d613845ae3141bfaf223489de8ab3eb upstream. If cifs_zap_mapping() returned an error, we would return without putting the xid that we got earlier. Restructure cifs_file_strict_mmap() and cifs_file_mmap() to be more similar to each other and have a single point of return that always puts the xid. Signed-off-by: Matthew Wilcox Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cf192f9ce254..02e403af9518 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3285,20 +3285,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = { int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) { - int rc, xid; + int xid, rc = 0; struct inode *inode = file_inode(file); xid = get_xid(); - if (!CIFS_CACHE_READ(CIFS_I(inode))) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) rc = cifs_zap_mapping(inode); - if (rc) - return rc; - } - - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } @@ -3308,16 +3306,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) int rc, xid; xid = get_xid(); + rc = cifs_revalidate_file(file); - if (rc) { + if (rc) cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", rc); - free_xid(xid); - return rc; - } - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } -- GitLab From f59eda16646ab3b0de9e7cac9528c40df46606ac Mon Sep 17 00:00:00 2001 From: Daniel N Pettersson Date: Thu, 11 Jan 2018 16:00:12 +0100 Subject: [PATCH 1290/1398] cifs: Fix autonegotiate security settings mismatch commit 9aca7e454415f7878b28524e76bebe1170911a88 upstream. Autonegotiation gives a security settings mismatch error if the SMB server selects an SMBv3 dialect that isn't SMB3.02. The exact error is "protocol revalidation - security settings mismatch". This can be tested using Samba v4.2 or by setting the global Samba setting max protocol = SMB3_00. The check that fails in smb3_validate_negotiate is the dialect verification of the negotiate info response. This is because it tries to verify against the protocol_id in the global smbdefault_values. The protocol_id in smbdefault_values is SMB3.02. In SMB2_negotiate the protocol_id in smbdefault_values isn't updated, it is global so it probably shouldn't be, but server->dialect is. This patch changes the check in smb3_validate_negotiate to use server->dialect instead of server->vals->protocol_id. The patch works with autonegotiate and when using a specific version in the vers mount option. Signed-off-by: Daniel N Pettersson Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 69b610ad3fdc..94c4c1901222 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -585,8 +585,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != - cpu_to_le16(tcon->ses->server->vals->protocol_id)) + if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect)) goto vneg_out; if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) -- GitLab From 7e68916c361ac6d730601bef4e8a4c43aa93495e Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 25 Jan 2018 15:59:39 +0100 Subject: [PATCH 1291/1398] CIFS: zero sensitive data when freeing commit 97f4b7276b829a8927ac903a119bef2f963ccc58 upstream. also replaces memset()+kfree() by kzfree(). Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsencrypt.c | 3 +-- fs/cifs/connect.c | 6 +++--- fs/cifs/misc.c | 14 ++++---------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5eb04129f938..73360df52ce9 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -318,9 +318,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, { int i; int rc; - char password_with_pad[CIFS_ENCPWD_SIZE]; + char password_with_pad[CIFS_ENCPWD_SIZE] = {0}; - memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); if (password) strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 580b3a4ca53a..441d434a48c1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1667,7 +1667,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ - kfree(vol->password); + kzfree(vol->password); vol->password = NULL; break; } @@ -1705,7 +1705,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } - kfree(vol->password); + kzfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -4159,7 +4159,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: kfree(vol_info->username); - kfree(vol_info->password); + kzfree(vol_info->password); kfree(vol_info); return tcon; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 5419afea0a36..323d8e34abde 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -99,14 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); - kfree(buf_to_free->auth_key.response); - kfree(buf_to_free); + kzfree(buf_to_free->auth_key.response); + kzfree(buf_to_free); } struct cifs_tcon * @@ -137,10 +134,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) } atomic_dec(&tconInfoAllocCount); kfree(buf_to_free->nativeFileSystem); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free); } -- GitLab From 297c7cc4b5651b174a62925b6c961085f04979fd Mon Sep 17 00:00:00 2001 From: Yang Shunyong Date: Mon, 29 Jan 2018 14:40:11 +0800 Subject: [PATCH 1292/1398] dmaengine: dmatest: fix container_of member in dmatest_callback commit 66b3bd2356e0a1531c71a3dcf96944621e25c17c upstream. The type of arg passed to dmatest_callback is struct dmatest_done. It refers to test_done in struct dmatest_thread, not done_wait. Fixes: 6f6a23a213be ("dmaengine: dmatest: move callback wait ...") Signed-off-by: Yang Shunyong Acked-by: Adam Wallis Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmatest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index e0bd578a253a..ebe72a466587 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -339,7 +339,7 @@ static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; struct dmatest_thread *thread = - container_of(arg, struct dmatest_thread, done_wait); + container_of(done, struct dmatest_thread, test_done); if (!thread->done) { done->done = true; wake_up_all(done->wait); -- GitLab From 83946c33b9b99b5bc6157cfbf3970265f006c2bf Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 13 Feb 2018 16:45:20 +0100 Subject: [PATCH 1293/1398] kaiser: fix compile error without vsyscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tobias noticed a compile error on 4.4.115, and it's the same on 4.9.80: arch/x86/mm/kaiser.c: In function ‘kaiser_init’: arch/x86/mm/kaiser.c:348:8: error: ‘vsyscall_pgprot’ undeclared (first use in this function) It seems like his combination of kernel options doesn't work for KAISER. X86_VSYSCALL_EMULATION is not set on his system, while LEGACY_VSYSCALL is set to NONE (LEGACY_VSYSCALL_NONE=y). He managed to get things compiling again, by moving the 'extern unsigned long vsyscall_pgprot' outside of the preprocessor statement. This works because the optimizer removes that code (vsyscall_enabled() is always false) - and that's how it was done in some older backports. Reported-by: Tobias Jakobi Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vsyscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 9ee85066f407..62210da19a92 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,7 +13,6 @@ extern void map_vsyscall(void); */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); extern bool vsyscall_enabled(void); -extern unsigned long vsyscall_pgprot; #else static inline void map_vsyscall(void) {} static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) @@ -22,5 +21,6 @@ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) } static inline bool vsyscall_enabled(void) { return false; } #endif +extern unsigned long vsyscall_pgprot; #endif /* _ASM_X86_VSYSCALL_H */ -- GitLab From 0b376535ad5493d2fcf70ab5f6539551aadb493e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Dec 2017 10:32:03 +0100 Subject: [PATCH 1294/1398] posix-timer: Properly check sigevent->sigev_notify commit cef31d9af908243421258f1df35a4a644604efbe upstream. timer_create() specifies via sigevent->sigev_notify the signal delivery for the new timer. The valid modes are SIGEV_NONE, SIGEV_SIGNAL, SIGEV_THREAD and (SIGEV_SIGNAL | SIGEV_THREAD_ID). The sanity check in good_sigevent() is only checking the valid combination for the SIGEV_THREAD_ID bit, i.e. SIGEV_SIGNAL, but if SIGEV_THREAD_ID is not set it accepts any random value. This has no real effects on the posix timer and signal delivery code, but it affects show_timer() which handles the output of /proc/$PID/timers. That function uses a string array to pretty print sigev_notify. The access to that array has no bound checks, so random sigev_notify cause access beyond the array bounds. Add proper checks for the valid notify modes and remove the SIGEV_THREAD_ID masking from various code pathes as SIGEV_NONE can never be set in combination with SIGEV_THREAD_ID. Reported-by: Eric Biggers Reported-by: Dmitry Vyukov Reported-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Cc: John Stultz Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-timers.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index f2826c35e918..fc7c37ad90a0 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -507,17 +507,22 @@ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; - if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || - !same_thread_group(rtn, current) || - (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) + switch (event->sigev_notify) { + case SIGEV_SIGNAL | SIGEV_THREAD_ID: + rtn = find_task_by_vpid(event->sigev_notify_thread_id); + if (!rtn || !same_thread_group(rtn, current)) + return NULL; + /* FALLTHRU */ + case SIGEV_SIGNAL: + case SIGEV_THREAD: + if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) + return NULL; + /* FALLTHRU */ + case SIGEV_NONE: + return task_pid(rtn); + default: return NULL; - - if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) - return NULL; - - return task_pid(rtn); + } } void posix_timers_register_clock(const clockid_t clock_id, @@ -745,8 +750,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) /* interval timer ? */ if (iv.tv64) cur_setting->it_interval = ktime_to_timespec(iv); - else if (!hrtimer_active(timer) && - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE) return; now = timer->base->get_time(); @@ -757,7 +761,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * expiry is > now. */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_sigev_notify == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); @@ -767,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * A single shot SIGEV_NONE timer must return 0, when * it is expired ! */ - if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + if (timr->it_sigev_notify != SIGEV_NONE) cur_setting->it_value.tv_nsec = 1; } else cur_setting->it_value = ktime_to_timespec(remaining); @@ -865,7 +869,7 @@ common_timer_set(struct k_itimer *timr, int flags, timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); /* SIGEV_NONE timers are not queued ! See common_timer_get */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { + if (timr->it_sigev_notify == SIGEV_NONE) { /* Setup correct expiry time for relative timers */ if (mode == HRTIMER_MODE_REL) { hrtimer_add_expires(timer, timer->base->get_time()); -- GitLab From 57ddb8eae517315f811c4915f1d5c11e59423707 Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Tue, 7 Mar 2017 00:57:20 +0100 Subject: [PATCH 1295/1398] usb: gadget: uvc: Missing files for configfs interface commit c8cd751060b149997b9de53a494fb1490ded72c5 upstream. Commit 76e0da34c7ce ("usb-gadget/uvc: use per-attribute show and store methods") caused a stringification of an undefined macro argument "aname", so three UVC parameters (streaming_interval, streaming_maxpacket and streaming_maxburst) were named "aname". Add the definition of "aname" to the main macro and name the filenames as originaly intended. Signed-off-by: Petr Cvek Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_configfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 31125a4a2658..d7dcd39fe12c 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2140,7 +2140,7 @@ static struct configfs_item_operations uvc_item_ops = { .release = uvc_attr_release, }; -#define UVCG_OPTS_ATTR(cname, conv, str2u, uxx, vnoc, limit) \ +#define UVCG_OPTS_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit) \ static ssize_t f_uvc_opts_##cname##_show( \ struct config_item *item, char *page) \ { \ @@ -2183,16 +2183,16 @@ end: \ return ret; \ } \ \ -UVC_ATTR(f_uvc_opts_, cname, aname) +UVC_ATTR(f_uvc_opts_, cname, cname) #define identity_conv(x) (x) -UVCG_OPTS_ATTR(streaming_interval, identity_conv, kstrtou8, u8, identity_conv, - 16); -UVCG_OPTS_ATTR(streaming_maxpacket, le16_to_cpu, kstrtou16, u16, le16_to_cpu, - 3072); -UVCG_OPTS_ATTR(streaming_maxburst, identity_conv, kstrtou8, u8, identity_conv, - 15); +UVCG_OPTS_ATTR(streaming_interval, streaming_interval, identity_conv, + kstrtou8, u8, identity_conv, 16); +UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, le16_to_cpu, + kstrtou16, u16, le16_to_cpu, 3072); +UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, identity_conv, + kstrtou8, u8, identity_conv, 15); #undef identity_conv -- GitLab From 1c679981309b4d36b024fc954cfcf2111a007de0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 20:45:37 -0500 Subject: [PATCH 1296/1398] sched/rt: Use container_of() to get root domain in rto_push_irq_work_func() commit ad0f1d9d65938aec72a698116cd73a980916895e upstream. When the rto_push_irq_work_func() is called, it looks at the RT overloaded bitmask in the root domain via the runqueue (rq->rd). The problem is that during CPU up and down, nothing here stops rq->rd from changing between taking the rq->rd->rto_lock and releasing it. That means the lock that is released is not the same lock that was taken. Instead of using this_rq()->rd to get the root domain, as the irq work is part of the root domain, we can simply get the root domain from the irq work that is passed to the routine: container_of(work, struct root_domain, rto_push_work) This keeps the root domain consistent. Reported-by: Pavan Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7a360d6f6798..27b6209a2928 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1895,9 +1895,8 @@ static void push_rt_tasks(struct rq *rq) * the rt_loop_next will cause the iterator to perform another scan. * */ -static int rto_next_cpu(struct rq *rq) +static int rto_next_cpu(struct root_domain *rd) { - struct root_domain *rd = rq->rd; int next; int cpu; @@ -1973,7 +1972,7 @@ static void tell_cpu_to_push(struct rq *rq) * Otherwise it is finishing up and an ipi needs to be sent. */ if (rq->rd->rto_cpu < 0) - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rq->rd); raw_spin_unlock(&rq->rd->rto_lock); @@ -1986,6 +1985,8 @@ static void tell_cpu_to_push(struct rq *rq) /* Called from hardirq context */ void rto_push_irq_work_func(struct irq_work *work) { + struct root_domain *rd = + container_of(work, struct root_domain, rto_push_work); struct rq *rq; int cpu; @@ -2001,18 +2002,18 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rq->lock); } - raw_spin_lock(&rq->rd->rto_lock); + raw_spin_lock(&rd->rto_lock); /* Pass the IPI to the next rt overloaded queue */ - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rd); - raw_spin_unlock(&rq->rd->rto_lock); + raw_spin_unlock(&rd->rto_lock); if (cpu < 0) return; /* Try the next RT overloaded CPU */ - irq_work_queue_on(&rq->rd->rto_push_work, cpu); + irq_work_queue_on(&rd->rto_push_work, cpu); } #endif /* HAVE_RT_PUSH_IPI */ -- GitLab From a384e5437f705972d2884cea17b931c1a2cd3277 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 20:45:38 -0500 Subject: [PATCH 1297/1398] sched/rt: Up the root domain ref count when passing it around via IPIs commit 364f56653708ba8bcdefd4f0da2a42904baa8eeb upstream. When issuing an IPI RT push, where an IPI is sent to each CPU that has more than one RT task scheduled on it, it references the root domain's rto_mask, that contains all the CPUs within the root domain that has more than one RT task in the runable state. The problem is, after the IPIs are initiated, the rq->lock is released. This means that the root domain that is associated to the run queue could be freed while the IPIs are going around. Add a sched_get_rd() and a sched_put_rd() that will increment and decrement the root domain's ref count respectively. This way when initiating the IPIs, the scheduler will up the root domain's ref count before releasing the rq->lock, ensuring that the root domain does not go away until the IPI round is complete. Reported-by: Pavan Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 13 +++++++++++++ kernel/sched/rt.c | 9 +++++++-- kernel/sched/sched.h | 2 ++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e5066955cc3a..bce3a7ad4253 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5864,6 +5864,19 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); } +void sched_get_rd(struct root_domain *rd) +{ + atomic_inc(&rd->refcount); +} + +void sched_put_rd(struct root_domain *rd) +{ + if (!atomic_dec_and_test(&rd->refcount)) + return; + + call_rcu_sched(&rd->rcu, free_rootdomain); +} + static int init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 27b6209a2928..f6d68ddfa2f3 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1978,8 +1978,11 @@ static void tell_cpu_to_push(struct rq *rq) rto_start_unlock(&rq->rd->rto_loop_start); - if (cpu >= 0) + if (cpu >= 0) { + /* Make sure the rd does not get freed while pushing */ + sched_get_rd(rq->rd); irq_work_queue_on(&rq->rd->rto_push_work, cpu); + } } /* Called from hardirq context */ @@ -2009,8 +2012,10 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rd->rto_lock); - if (cpu < 0) + if (cpu < 0) { + sched_put_rd(rd); return; + } /* Try the next RT overloaded CPU */ irq_work_queue_on(&rd->rto_push_work, cpu); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index cff985feb6e7..f564a1d2c9d5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -590,6 +590,8 @@ struct root_domain { }; extern struct root_domain def_root_domain; +extern void sched_get_rd(struct root_domain *rd); +extern void sched_put_rd(struct root_domain *rd); #ifdef HAVE_RT_PUSH_IPI extern void rto_push_irq_work_func(struct irq_work *work); -- GitLab From 7e2fb808d3c7c52f88ebc670949dbf1bae48f2a2 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 5 Dec 2017 20:58:35 +0000 Subject: [PATCH 1298/1398] dccp: CVE-2017-8824: use-after-free in DCCP code commit 69c64866ce072dea1d1e59a0d61e0f66c0dffb76 upstream. Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect() must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL. Signed-off-by: Mohamed Ghannam Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/proto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06a..9d43c1f40274 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; @@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; + dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); -- GitLab From 1ff1353a03c6cde5334a94cb67f8632141b0589b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 26 Sep 2017 17:10:20 -0400 Subject: [PATCH 1299/1398] media: dvb-usb-v2: lmedm04: Improve logic checking of warm start commit 3d932ee27e852e4904647f15b64dedca51187ad7 upstream. Warm start has no check as whether a genuine device has connected and proceeds to next execution path. Check device should read 0x47 at offset of 2 on USB descriptor read and it is the amount requested of 6 bytes. Fix for kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access as Reported-by: Andrey Konovalov Signed-off-by: Malcolm Priestley Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 0e8fb89896c4..50d02f879949 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -504,18 +504,23 @@ static int lme2510_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, static int lme2510_return_status(struct dvb_usb_device *d) { - int ret = 0; + int ret; u8 *data; - data = kzalloc(10, GFP_KERNEL); + data = kzalloc(6, GFP_KERNEL); if (!data) return -ENOMEM; - ret |= usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), - 0x06, 0x80, 0x0302, 0x00, data, 0x0006, 200); - info("Firmware Status: %x (%x)", ret , data[2]); + ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), + 0x06, 0x80, 0x0302, 0x00, + data, 0x6, 200); + if (ret != 6) + ret = -EINVAL; + else + ret = data[2]; + + info("Firmware Status: %6ph", data); - ret = (ret < 0) ? -ENODEV : data[2]; kfree(data); return ret; } @@ -1200,6 +1205,7 @@ static int lme2510_get_adapter_count(struct dvb_usb_device *d) static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) { struct lme2510_state *st = d->priv; + int status; usb_reset_configuration(d->udev); @@ -1208,12 +1214,16 @@ static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) st->dvb_usb_lme2510_firmware = dvb_usb_lme2510_firmware; - if (lme2510_return_status(d) == 0x44) { + status = lme2510_return_status(d); + if (status == 0x44) { *name = lme_firmware_switch(d, 0); return COLD; } - return 0; + if (status != 0x47) + return -EINVAL; + + return WARM; } static int lme2510_get_stream_config(struct dvb_frontend *fe, u8 *ts_type, -- GitLab From f320dd20224ca9193040c379f0243030dd67fa4b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 26 Sep 2017 17:10:21 -0400 Subject: [PATCH 1300/1398] media: dvb-usb-v2: lmedm04: move ts2020 attach to dm04_lme2510_tuner commit 7bf7a7116ed313c601307f7e585419369926ab05 upstream. When the tuner was split from m88rs2000 the attach function is in wrong place. Move to dm04_lme2510_tuner to trap errors on failure and removing a call to lme_coldreset. Prevents driver starting up without any tuner connected. Fixes to trap for ts2020 fail. LME2510(C): FE Found M88RS2000 ts2020: probe of 0-0060 failed with error -11 ... LME2510(C): TUN Found RS2000 tuner kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Reported-by: Andrey Konovalov Signed-off-by: Malcolm Priestley Tested-by: Andrey Konovalov Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 50d02f879949..5c4aa247d650 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -1084,8 +1084,6 @@ static int dm04_lme2510_frontend_attach(struct dvb_usb_adapter *adap) if (adap->fe[0]) { info("FE Found M88RS2000"); - dvb_attach(ts2020_attach, adap->fe[0], &ts2020_config, - &d->i2c_adap); st->i2c_tuner_gate_w = 5; st->i2c_tuner_gate_r = 5; st->i2c_tuner_addr = 0x60; @@ -1151,17 +1149,18 @@ static int dm04_lme2510_tuner(struct dvb_usb_adapter *adap) ret = st->tuner_config; break; case TUNER_RS2000: - ret = st->tuner_config; + if (dvb_attach(ts2020_attach, adap->fe[0], + &ts2020_config, &d->i2c_adap)) + ret = st->tuner_config; break; default: break; } - if (ret) + if (ret) { info("TUN Found %s tuner", tun_msg[ret]); - else { - info("TUN No tuner found --- resetting device"); - lme_coldreset(d); + } else { + info("TUN No tuner found"); return -ENODEV; } -- GitLab From 198a7ddaf5d2c76130b28f19ed6d768860ea2b8e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 22 Sep 2017 09:07:06 -0400 Subject: [PATCH 1301/1398] media: hdpvr: Fix an error handling path in hdpvr_probe() commit c0f71bbb810237a38734607ca4599632f7f5d47f upstream. Here, hdpvr_register_videodev() is responsible for setup and register a video device. Also defining and initializing a worker. hdpvr_register_videodev() is calling by hdpvr_probe at last. So no need to flush any work here. Unregister v4l2, free buffers and memory. If hdpvr_probe() will fail. Signed-off-by: Arvind Yadav Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/hdpvr/hdpvr-core.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c index a61d8fd63c12..a20b60ac66ca 100644 --- a/drivers/media/usb/hdpvr/hdpvr-core.c +++ b/drivers/media/usb/hdpvr/hdpvr-core.c @@ -295,7 +295,7 @@ static int hdpvr_probe(struct usb_interface *interface, /* register v4l2_device early so it can be used for printks */ if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) { dev_err(&interface->dev, "v4l2_device_register failed\n"); - goto error; + goto error_free_dev; } mutex_init(&dev->io_mutex); @@ -304,7 +304,7 @@ static int hdpvr_probe(struct usb_interface *interface, dev->usbc_buf = kmalloc(64, GFP_KERNEL); if (!dev->usbc_buf) { v4l2_err(&dev->v4l2_dev, "Out of memory\n"); - goto error; + goto error_v4l2_unregister; } init_waitqueue_head(&dev->wait_buffer); @@ -342,13 +342,13 @@ static int hdpvr_probe(struct usb_interface *interface, } if (!dev->bulk_in_endpointAddr) { v4l2_err(&dev->v4l2_dev, "Could not find bulk-in endpoint\n"); - goto error; + goto error_put_usb; } /* init the device */ if (hdpvr_device_init(dev)) { v4l2_err(&dev->v4l2_dev, "device init failed\n"); - goto error; + goto error_put_usb; } mutex_lock(&dev->io_mutex); @@ -356,7 +356,7 @@ static int hdpvr_probe(struct usb_interface *interface, mutex_unlock(&dev->io_mutex); v4l2_err(&dev->v4l2_dev, "allocating transfer buffers failed\n"); - goto error; + goto error_put_usb; } mutex_unlock(&dev->io_mutex); @@ -364,7 +364,7 @@ static int hdpvr_probe(struct usb_interface *interface, retval = hdpvr_register_i2c_adapter(dev); if (retval < 0) { v4l2_err(&dev->v4l2_dev, "i2c adapter register failed\n"); - goto error; + goto error_free_buffers; } client = hdpvr_register_ir_rx_i2c(dev); @@ -397,13 +397,17 @@ static int hdpvr_probe(struct usb_interface *interface, reg_fail: #if IS_ENABLED(CONFIG_I2C) i2c_del_adapter(&dev->i2c_adapter); +error_free_buffers: #endif + hdpvr_free_buffers(dev); +error_put_usb: + usb_put_dev(dev->udev); + kfree(dev->usbc_buf); +error_v4l2_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +error_free_dev: + kfree(dev); error: - if (dev) { - flush_work(&dev->worker); - /* this frees allocated memory */ - hdpvr_delete(dev); - } return retval; } -- GitLab From cbdabc7027b1fea4d6a7d86bc573096ef5126657 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:54:10 +0200 Subject: [PATCH 1302/1398] mtd: cfi: convert inline functions to macros commit 9e343e87d2c4c707ef8fae2844864d4dde3a2d13 upstream. The map_word_() functions, dating back to linux-2.6.8, try to perform bitwise operations on a 'map_word' structure. This may have worked with compilers that were current then (gcc-3.4 or earlier), but end up being rather inefficient on any version I could try now (gcc-4.4 or higher). Specifically we hit a problem analyzed in gcc PR81715 where we fail to reuse the stack space for local variables. This can be seen immediately in the stack consumption for cfi_staa_erase_varsize() and other functions that (with CONFIG_KASAN) can be up to 2200 bytes. Changing the inline functions into macros brings this down to 1280 bytes. Without KASAN, the same problem exists, but the stack consumption is lower to start with, my patch shrinks it from 920 to 496 bytes on with arm-linux-gnueabi-gcc-5.4, and saves around 1KB in .text size for cfi_cmdset_0020.c, as it avoids copying map_word structures for each call to one of these helpers. With the latest gcc-8 snapshot, the problem is fixed in upstream gcc, but nobody uses that yet, so we should still work around it in mainline kernels and probably backport the workaround to stable kernels as well. We had a couple of other functions that suffered from the same gcc bug, and all of those had a simpler workaround involving dummy variables in the inline function. Unfortunately that did not work here, the macro hack was the best I could come up with. It would also be helpful to have someone to a little performance testing on the patch, to see how much it helps in terms of CPU utilitzation. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Acked-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/map.h | 130 +++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 69 deletions(-) diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3aa56e3104bb..b5b43f94f311 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -270,75 +270,67 @@ void map_destroy(struct mtd_info *mtd); #define INVALIDATE_CACHED_RANGE(map, from, size) \ do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0) - -static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] != val2.x[i]) - return 0; - } - - return 1; -} - -static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & val2.x[i]; - - return r; -} - -static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & ~val2.x[i]; - - return r; -} - -static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] | val2.x[i]; - - return r; -} - -static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if ((val1.x[i] & val2.x[i]) != val3.x[i]) - return 0; - } - - return 1; -} - -static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] & val2.x[i]) - return 1; - } - - return 0; -} +#define map_word_equal(map, val1, val2) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) \ + if ((val1).x[i] != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + ret; \ +}) + +#define map_word_and(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & (val2).x[i]; \ + r; \ +}) + +#define map_word_clr(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & ~(val2).x[i]; \ + r; \ +}) + +#define map_word_or(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] | (val2).x[i]; \ + r; \ +}) + +#define map_word_andequal(map, val1, val2, val3) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) { \ + if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + } \ + ret; \ +}) + +#define map_word_bitsset(map, val1, val2) \ +({ \ + int i, ret = 0; \ + for (i = 0; i < map_words(map); i++) { \ + if ((val1).x[i] & (val2).x[i]) { \ + ret = 1; \ + break; \ + } \ + } \ + ret; \ +}) static inline map_word map_word_load(struct map_info *map, const void *ptr) { -- GitLab From d25d52ff1011faaee8174bac39b0c58a59833f22 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 8 Jan 2018 15:36:48 -0500 Subject: [PATCH 1303/1398] mtd: nand: brcmnand: Disable prefetch by default commit f953f0f89663c39f08f4baaa8a4a881401b65654 upstream. Brcm nand controller prefetch feature needs to be disabled by default. Enabling affects performance on random reads as well as dma reads. Signed-off-by: Kamal Dasu Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller") Acked-by: Florian Fainelli Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/brcmnand/brcmnand.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index d9fab2222eb3..1a4a790054e4 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -2193,16 +2193,9 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) if (ctrl->nand_version >= 0x0702) tmp |= ACC_CONTROL_RD_ERASED; tmp &= ~ACC_CONTROL_FAST_PGM_RDIN; - if (ctrl->features & BRCMNAND_HAS_PREFETCH) { - /* - * FIXME: Flash DMA + prefetch may see spurious erased-page ECC - * errors - */ - if (has_flash_dma(ctrl)) - tmp &= ~ACC_CONTROL_PREFETCH; - else - tmp |= ACC_CONTROL_PREFETCH; - } + if (ctrl->features & BRCMNAND_HAS_PREFETCH) + tmp &= ~ACC_CONTROL_PREFETCH; + nand_writereg(ctrl, offs, tmp); return 0; -- GitLab From d80cd3e93653e4868d99aed4eb18ff2fcd134574 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 12 Jan 2018 10:13:36 +0100 Subject: [PATCH 1304/1398] mtd: nand: Fix nand_do_read_oob() return value commit 87e89ce8d0d14f573c068c61bec2117751fb5103 upstream. Starting from commit 041e4575f034 ("mtd: nand: handle ECC errors in OOB"), nand_do_read_oob() (from the NAND core) did return 0 or a negative error, and the MTD layer expected it. However, the trend for the NAND layer is now to return an error or a positive number of bitflips. Deciding which status to return to the user belongs to the MTD layer. Commit e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") brought this logic to the mtd_read_oob() function while the return value coming from nand_do_read_oob() (called by the ->_read_oob() hook) was left unchanged. Fixes: e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index a77cfd74a92e..21c03086bb7f 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2320,6 +2320,7 @@ EXPORT_SYMBOL(nand_write_oob_syndrome); static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + unsigned int max_bitflips = 0; int page, realpage, chipnr; struct nand_chip *chip = mtd_to_nand(mtd); struct mtd_ecc_stats stats; @@ -2377,6 +2378,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, nand_wait_ready(mtd); } + max_bitflips = max_t(unsigned int, max_bitflips, ret); + readlen -= len; if (!readlen) break; @@ -2402,7 +2405,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, if (mtd->ecc_stats.failed - stats.failed) return -EBADMSG; - return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; + return max_bitflips; } /** -- GitLab From 44ebd641be56653803482756aca487b5152a22d5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 24 Jan 2018 23:49:31 +0100 Subject: [PATCH 1305/1398] mtd: nand: sunxi: Fix ECC strength choice commit f4c6cd1a7f2275d5bc0e494b21fff26f8dde80f0 upstream. When the requested ECC strength does not exactly match the strengths supported by the ECC engine, the driver is selecting the closest strength meeting the 'selected_strength > requested_strength' constraint. Fix the fact that, in this particular case, ecc->strength value was not updated to match the 'selected_strength'. For instance, one can encounter this issue when no ECC requirement is filled in the device tree while the NAND chip minimum requirement is not a strength/step_size combo natively supported by the ECC engine. Fixes: 1fef62c1423b ("mtd: nand: add sunxi NAND flash controller support") Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/sunxi_nand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index f9b2a771096b..e26c4f880df6 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1835,8 +1835,14 @@ static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, /* Add ECC info retrieval from DT */ for (i = 0; i < ARRAY_SIZE(strengths); i++) { - if (ecc->strength <= strengths[i]) + if (ecc->strength <= strengths[i]) { + /* + * Update ecc->strength value with the actual strength + * that will be used by the ECC engine. + */ + ecc->strength = strengths[i]; break; + } } if (i >= ARRAY_SIZE(strengths)) { -- GitLab From 84f9d8536c8bf440d84093eb749290d88f7e1d76 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 16:01:20 +0100 Subject: [PATCH 1306/1398] ubi: fastmap: Erase outdated anchor PEBs during attach commit f78e5623f45bab2b726eec29dc5cefbbab2d0b1c upstream. The fastmap update code might erase the current fastmap anchor PEB in case it doesn't find any new free PEB. When a power cut happens in this situation we must not have any outdated fastmap anchor PEB on the device, because that would be used to attach during next boot. The easiest way to make that sure is to erase all outdated fastmap anchor PEBs synchronously during attach. Signed-off-by: Sascha Hauer Reviewed-by: Richard Weinberger Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 77 ++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b5b8cd6f481c..668b46202507 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1528,6 +1528,46 @@ static void shutdown_work(struct ubi_device *ubi) } } +/** + * erase_aeb - erase a PEB given in UBI attach info PEB + * @ubi: UBI device description object + * @aeb: UBI attach info PEB + * @sync: If true, erase synchronously. Otherwise schedule for erasure + */ +static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) +{ + struct ubi_wl_entry *e; + int err; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (sync) { + err = sync_erase(ubi, e, false); + if (err) + goto out_free; + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } else { + err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); + if (err) + goto out_free; + } + + return 0; + +out_free: + wl_entry_destroy(ubi, e); + + return err; +} + /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object @@ -1566,18 +1606,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + err = erase_aeb(ubi, aeb, false); + if (err) goto out_free; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); - goto out_free; - } - found_pebs++; } @@ -1635,6 +1667,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { + bool sync = false; + /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face @@ -1644,18 +1678,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->lookuptbl[aeb->pnum]) continue; - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + /* + * The fastmap update code might not find a free PEB for + * writing the fastmap anchor to and then reuses the + * current fastmap anchor PEB. When this PEB gets erased + * and a power cut happens before it is written again we + * must make sure that the fastmap attach code doesn't + * find any outdated fastmap anchors, hence we erase the + * outdated fastmap anchor PEBs synchronously here. + */ + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) + sync = true; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi_assert(!ubi->lookuptbl[e->pnum]); - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); + err = erase_aeb(ubi, aeb, sync); + if (err) goto out_free; - } } found_pebs++; -- GitLab From de14d0c124ca496381872a42cd32a53433ed28b2 Mon Sep 17 00:00:00 2001 From: Bradley Bolen Date: Thu, 18 Jan 2018 08:55:20 -0500 Subject: [PATCH 1307/1398] ubi: block: Fix locking for idr_alloc/idr_remove commit 7f29ae9f977bcdc3654e68bc36d170223c52fd48 upstream. This fixes a race with idr_alloc where gd->first_minor can be set to the same value for two simultaneous calls to ubiblock_create. Each instance calls device_add_disk with the same first_minor. device_add_disk calls bdi_register_owner which generates several warnings. WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/devices/virtual/bdi/252:2' WARNING: CPU: 1 PID: 179 at kernel-source/lib/kobject.c:240 kobject_add_internal+0x1ec/0x2f8 kobject_add_internal failed for 252:2 with -EEXIST, don't try to register things with the same name in the same directory WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/dev/block/252:2' However, device_add_disk does not error out when bdi_register_owner returns an error. Control continues until reaching blk_register_queue. It then BUGs. kernel BUG at kernel-source/fs/sysfs/group.c:113! [] (internal_create_group) from [] (sysfs_create_group+0x20/0x24) [] (sysfs_create_group) from [] (blk_trace_init_sysfs+0x18/0x20) [] (blk_trace_init_sysfs) from [] (blk_register_queue+0xd8/0x154) [] (blk_register_queue) from [] (device_add_disk+0x194/0x44c) [] (device_add_disk) from [] (ubiblock_create+0x284/0x2e0) [] (ubiblock_create) from [] (vol_cdev_ioctl+0x450/0x554) [] (vol_cdev_ioctl) from [] (vfs_ioctl+0x30/0x44) [] (vfs_ioctl) from [] (do_vfs_ioctl+0xa0/0x790) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x44/0x68) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) Locking idr_alloc/idr_remove removes the race and keeps gd->first_minor unique. Fixes: 2bf50d42f3a4 ("UBI: block: Dynamically allocate minor numbers") Signed-off-by: Bradley Bolen Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/block.c | 42 +++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index d1e6931c132f..46913ef25bc0 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -99,6 +99,8 @@ struct ubiblock { /* Linked list of all ubiblock instances */ static LIST_HEAD(ubiblock_devices); +static DEFINE_IDR(ubiblock_minor_idr); +/* Protects ubiblock_devices and ubiblock_minor_idr */ static DEFINE_MUTEX(devices_mutex); static int ubiblock_major; @@ -353,8 +355,6 @@ static struct blk_mq_ops ubiblock_mq_ops = { .init_request = ubiblock_init_request, }; -static DEFINE_IDR(ubiblock_minor_idr); - int ubiblock_create(struct ubi_volume_info *vi) { struct ubiblock *dev; @@ -367,14 +367,15 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Check that the volume isn't already handled */ mutex_lock(&devices_mutex); if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { - mutex_unlock(&devices_mutex); - return -EEXIST; + ret = -EEXIST; + goto out_unlock; } - mutex_unlock(&devices_mutex); dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL); - if (!dev) - return -ENOMEM; + if (!dev) { + ret = -ENOMEM; + goto out_unlock; + } mutex_init(&dev->dev_mutex); @@ -439,14 +440,13 @@ int ubiblock_create(struct ubi_volume_info *vi) goto out_free_queue; } - mutex_lock(&devices_mutex); list_add_tail(&dev->list, &ubiblock_devices); - mutex_unlock(&devices_mutex); /* Must be the last step: anyone can call file ops from now on */ add_disk(dev->gd); dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", dev->ubi_num, dev->vol_id, vi->name); + mutex_unlock(&devices_mutex); return 0; out_free_queue: @@ -459,6 +459,8 @@ int ubiblock_create(struct ubi_volume_info *vi) put_disk(dev->gd); out_free_dev: kfree(dev); +out_unlock: + mutex_unlock(&devices_mutex); return ret; } @@ -480,30 +482,36 @@ static void ubiblock_cleanup(struct ubiblock *dev) int ubiblock_remove(struct ubi_volume_info *vi) { struct ubiblock *dev; + int ret; mutex_lock(&devices_mutex); dev = find_dev_nolock(vi->ubi_num, vi->vol_id); if (!dev) { - mutex_unlock(&devices_mutex); - return -ENODEV; + ret = -ENODEV; + goto out_unlock; } /* Found a device, let's lock it so we can check if it's busy */ mutex_lock(&dev->dev_mutex); if (dev->refcnt > 0) { - mutex_unlock(&dev->dev_mutex); - mutex_unlock(&devices_mutex); - return -EBUSY; + ret = -EBUSY; + goto out_unlock_dev; } /* Remove from device list */ list_del(&dev->list); - mutex_unlock(&devices_mutex); - ubiblock_cleanup(dev); mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + kfree(dev); return 0; + +out_unlock_dev: + mutex_unlock(&dev->dev_mutex); +out_unlock: + mutex_unlock(&devices_mutex); + return ret; } static int ubiblock_resize(struct ubi_volume_info *vi) @@ -632,6 +640,7 @@ static void ubiblock_remove_all(void) struct ubiblock *next; struct ubiblock *dev; + mutex_lock(&devices_mutex); list_for_each_entry_safe(dev, next, &ubiblock_devices, list) { /* The module is being forcefully removed */ WARN_ON(dev->desc); @@ -640,6 +649,7 @@ static void ubiblock_remove_all(void) ubiblock_cleanup(dev); kfree(dev); } + mutex_unlock(&devices_mutex); } int __init ubiblock_init(void) -- GitLab From 298dc6c6696b3d2abcdf18614d37278acb64abd3 Mon Sep 17 00:00:00 2001 From: Xiaolei Li Date: Fri, 23 Jun 2017 10:37:23 +0800 Subject: [PATCH 1308/1398] ubifs: Massage assert in ubifs_xattr_set() wrt. init_xattrs commit d8db5b1ca9d4c57e49893d0f78e6d5ce81450cc8 upstream. The inode is not locked in init_xattrs when creating a new inode. Without this patch, there will occurs assert when booting or creating a new file, if the kernel config CONFIG_SECURITY_SMACK is enabled. Log likes: UBIFS assert failed in ubifs_xattr_set at 298 (pid 1156) CPU: 1 PID: 1156 Comm: ldconfig Tainted: G S 4.12.0-rc1-207440-g1e70b02 #2 Hardware name: MediaTek MT2712 evaluation board (DT) Call trace: [] dump_backtrace+0x0/0x238 [] show_stack+0x14/0x20 [] dump_stack+0x9c/0xc0 [] ubifs_xattr_set+0x374/0x5e0 [] init_xattrs+0x5c/0xb8 [] security_inode_init_security+0x110/0x190 [] ubifs_init_security+0x30/0x68 [] ubifs_mkdir+0x100/0x200 [] vfs_mkdir+0x11c/0x1b8 [] SyS_mkdirat+0x74/0xd0 [] __sys_trace_return+0x0/0x4 Signed-off-by: Xiaolei Li Signed-off-by: Richard Weinberger Cc: stable@vger.kernel.org (julia: massaged to apply to 4.9.y, which doesn't contain fscrypto support) Signed-off-by: Julia Cartwright Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/xattr.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index d9f9615bfd71..3979d767a0cb 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -270,7 +270,8 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) } static int __ubifs_setxattr(struct inode *host, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags, + bool check_lock) { struct inode *inode; struct ubifs_info *c = host->i_sb->s_fs_info; @@ -279,7 +280,8 @@ static int __ubifs_setxattr(struct inode *host, const char *name, union ubifs_key key; int err; - ubifs_assert(inode_is_locked(host)); + if (check_lock) + ubifs_assert(inode_is_locked(host)); if (size > UBIFS_MAX_INO_DATA) return -ERANGE; @@ -548,7 +550,8 @@ static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, } strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); - err = __ubifs_setxattr(inode, name, xattr->value, xattr->value_len, 0); + err = __ubifs_setxattr(inode, name, xattr->value, + xattr->value_len, 0, false); kfree(name); if (err < 0) break; @@ -594,7 +597,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler, name = xattr_full_name(handler, name); if (value) - return __ubifs_setxattr(inode, name, value, size, flags); + return __ubifs_setxattr(inode, name, value, size, flags, + true); else return __ubifs_removexattr(inode, name); } -- GitLab From e1df8c682df646223dedc5352263f4a5286f9a86 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 15 Dec 2017 16:12:32 -0500 Subject: [PATCH 1309/1398] nfs/pnfs: fix nfs_direct_req ref leak when i/o falls back to the mds commit ba4a76f703ab7eb72941fdaac848502073d6e9ee upstream. Currently when falling back to doing I/O through the MDS (via pnfs_{read|write}_through_mds), the client frees the nfs_pgio_header without releasing the reference taken on the dreq via pnfs_generic_pg_{read|write}pages -> nfs_pgheader_init -> nfs_direct_pgio_init. It then takes another reference on the dreq via nfs_generic_pg_pgios -> nfs_pgheader_init -> nfs_direct_pgio_init and as a result the requester will become stuck in inode_dio_wait. Once that happens, other processes accessing the inode will become stuck as well. Ensure that pnfs_read_through_mds() and pnfs_write_through_mds() clean up correctly by calling hdr->completion_ops->completion() instead of calling hdr->release() directly. This can be reproduced (sometimes) by performing "storage failover takeover" commands on NetApp filer while doing direct I/O from a client. This can also be reproduced using SystemTap to simulate a failure while doing direct I/O from a client (from Dave Wysochanski ): stap -v -g -e 'probe module("nfs_layout_nfsv41_files").function("nfs4_fl_prepare_ds").return { $return=NULL; exit(); }' Suggested-by: Trond Myklebust Signed-off-by: Scott Mayhew Fixes: 1ca018d28d ("pNFS: Fix a memory leak when attempted pnfs fails") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b7a07ba8783a..b8e44746f761 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2145,7 +2145,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } static enum pnfs_try_status @@ -2256,7 +2256,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } /* -- GitLab From d1840343f9483b240b4ceb7bb21a909b7ba3eb9c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Dec 2017 14:39:13 -0500 Subject: [PATCH 1310/1398] NFS: Add a cond_resched() to nfs_commit_release_pages() commit 7f1bda447c9bd48b415acedba6b830f61591601f upstream. The commit list can get very large, and so we need a cond_resched() in nfs_commit_release_pages() in order to ensure we don't hog the CPU for excessive periods of time. Reported-by: Mike Galbraith Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9905735463a4..9a3b3820306d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1806,6 +1806,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); + /* Latency breaker */ + cond_resched(); } nfss = NFS_SERVER(data->inode); if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) -- GitLab From ca2c316f7cb490c350b101d59c1b892970ac33d0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 16 Jan 2018 10:08:00 -0500 Subject: [PATCH 1311/1398] NFS: commit direct writes even if they fail partially commit 1b8d97b0a837beaf48a8449955b52c650a7114b4 upstream. If some of the WRITE calls making up an O_DIRECT write syscall fail, we neglect to commit, even if some of the WRITEs succeed. We also depend on the commit code to free the reference count on the nfs_page taken in the "if (request_commit)" case at the end of nfs_direct_write_completion(). The problem was originally noticed because ENOSPC's encountered partway through a write would result in a closed file being sillyrenamed when it should have been unlinked. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/direct.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bd81bcf3ffcf..1ac1593aded3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -787,10 +787,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - dreq->flags = 0; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) dreq->error = hdr->error; - } if (dreq->error == 0) { nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { -- GitLab From 967f650f8835012e9d8bf96bdace29a6b84b43db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Jan 2018 15:15:34 -0800 Subject: [PATCH 1312/1398] NFS: reject request for id_legacy key without auxdata commit 49686cbbb3ebafe42e63868222f269d8053ead00 upstream. nfs_idmap_legacy_upcall() is supposed to be called with 'aux' pointing to a 'struct idmap', via the call to request_key_with_auxdata() in nfs_idmap_request_key(). However it can also be reached via the request_key() system call in which case 'aux' will be NULL, causing a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall(), assuming that the key description is valid enough to get that far. Fix this by making nfs_idmap_legacy_upcall() negate the key if no auxdata is provided. As usual, this bug was found by syzkaller. A simple reproducer using the command-line keyctl program is: keyctl request2 id_legacy uid:0 '' @s Fixes: 57e62324e469 ("NFS: Store the legacy idmapper result in the keyring") Reported-by: syzbot+5dfdbcf7b3eb5912abbb@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4idmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index c444285bb1b1..f1160cdd4682 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -567,9 +567,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; struct key *key = cons->key; - int ret = -ENOMEM; + int ret = -ENOKEY; + + if (!aux) + goto out1; /* msg and im are freed in idmap_pipe_destroy_msg */ + ret = -ENOMEM; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out1; -- GitLab From b79d8854ee0e222c5beb52fc50246ad0b6d9088e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Jan 2018 09:29:41 -0500 Subject: [PATCH 1313/1398] NFS: Fix a race between mmap() and O_DIRECT commit e231c6879cfd44e4fffd384bb6dd7d313249a523 upstream. When locking the file in order to do O_DIRECT on it, we must unmap any mmapped ranges on the pagecache so that we can flush out the dirty data. Fixes: a5864c999de67 ("NFS: Do not serialise O_DIRECT reads and writes") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/io.c b/fs/nfs/io.c index 1fc5d1ce327e..d18ccc1ce0b5 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c @@ -98,7 +98,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) { if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { set_bit(NFS_INO_ODIRECT, &nfsi->flags); - nfs_wb_all(inode); + nfs_sync_mapping(inode->i_mapping); } } -- GitLab From 058d13f85da6909c44319a21bc275f22c51451dc Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 19 Jan 2018 09:18:54 +0100 Subject: [PATCH 1314/1398] kernfs: fix regression in kernfs_fop_write caused by wrong type commit ba87977a49913129962af8ac35b0e13e0fa4382d upstream. Commit b7ce40cff0b9 ("kernfs: cache atomic_write_len in kernfs_open_file") changes type of local variable 'len' from ssize_t to size_t. This change caused that the *ppos value is updated also when the previous write callback failed. Mentioned snippet: ... len = ops->write(...); <- return value can be negative ... if (len > 0) <- true here in this case *ppos += len; ... Fixes: b7ce40cff0b9 ("kernfs: cache atomic_write_len in kernfs_open_file") Acked-by: Tejun Heo Signed-off-by: Ivan Vecera Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 78219d5644e9..d6512cd9ba02 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - size_t len; + ssize_t len; char *buf; if (of->atomic_write_len) { -- GitLab From 3332b6f3276b8d20fa0c8665529b43605d9b494b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 16:41:08 +0100 Subject: [PATCH 1315/1398] ahci: Annotate PCI ids for mobile Intel chipsets as such commit ca1b4974bd237f2373b0e980b11957aac3499b56 upstream. Intel uses different SATA PCI ids for the Desktop and Mobile SKUs of their chipsets. For older models the comment describing which chipset the PCI id is for, aksi indicates when we're dealing with a mobile SKU. Extend the comments for recent chipsets to also indicate mobile SKUs. The information this commit adds comes from Intel's chipset datasheets. This commit is a preparation patch for allowing a different default sata link powermanagement policy for mobile chipsets. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index c94038206c3a..15ac8c255b60 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -265,9 +265,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3b23), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH AHCI */ + { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH M AHCI */ { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ + { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH M RAID */ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */ @@ -290,9 +290,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */ - { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */ + { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT M AHCI */ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */ - { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT RAID */ + { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT M RAID */ { PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ @@ -301,20 +301,20 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */ { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */ { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */ - { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point AHCI */ + { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point M AHCI */ { PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */ - { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point RAID */ + { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point M RAID */ { PCI_VDEVICE(INTEL, 0x1e0e), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x8c02), board_ahci }, /* Lynx Point AHCI */ - { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point AHCI */ + { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point M AHCI */ { PCI_VDEVICE(INTEL, 0x8c04), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c06), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x9c02), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c03), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c04), board_ahci }, /* Lynx Point-LP RAID */ @@ -355,21 +355,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */ - { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */ + { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series M AHCI */ { PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */ - { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H M AHCI */ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */ - { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H M RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ -- GitLab From 72c0031a914e973c343f76bed7651308f3c31667 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 16:41:09 +0100 Subject: [PATCH 1316/1398] ahci: Add PCI ids for Intel Bay Trail, Cherry Trail and Apollo Lake AHCI commit 998008b779e424bd7513c434d0ab9c1268459009 upstream. Add PCI ids for Intel Bay Trail, Cherry Trail and Apollo Lake AHCI SATA controllers. This commit is a preparation patch for allowing a different default sata link powermanagement policy for mobile chipsets. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 15ac8c255b60..4cc8942817b3 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -383,6 +383,10 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci }, /* Apollo Lake AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, -- GitLab From 016572d31d325220ab2e1b54a80a5dcda0884eaf Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 11 Jan 2018 15:55:50 +0300 Subject: [PATCH 1317/1398] ahci: Add Intel Cannon Lake PCH-H PCI ID commit f919dde0772a894c693a1eeabc77df69d6a9b937 upstream. Add Intel Cannon Lake PCH-H PCI ID to the list of supported controllers. Signed-off-by: Mika Westerberg Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 4cc8942817b3..9b46ef4c851e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -383,6 +383,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */ { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ -- GitLab From d2b492bda56046fca0ae4579c142e2d15d235156 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:22 -0800 Subject: [PATCH 1318/1398] crypto: hash - introduce crypto_hash_alg_has_setkey() commit cd6ed77ad5d223dc6299fb58f62e0f5267f7e2ba upstream. Templates that use an shash spawn can use crypto_shash_alg_has_setkey() to determine whether the underlying algorithm requires a key or not. But there was no corresponding function for ahash spawns. Add it. Note that the new function actually has to support both shash and ahash algorithms, since the ahash API can be used with either. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 11 +++++++++++ include/crypto/internal/hash.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/crypto/ahash.c b/crypto/ahash.c index cce0268a13fe..f3fa104de479 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -625,5 +625,16 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(ahash_attr_alg); +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg) +{ + struct crypto_alg *alg = &halg->base; + + if (alg->cra_type != &crypto_ahash_type) + return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg)); + + return __crypto_ahash_alg(alg)->setkey != NULL; +} +EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index cac57358f7af..5203560f992e 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -88,6 +88,8 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) return alg->setkey != shash_no_setkey; } +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); + int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, struct hash_alg_common *alg, struct crypto_instance *inst); -- GitLab From c1ebf9f8354702eb229e623280f90ad5e456d75a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:23 -0800 Subject: [PATCH 1319/1398] crypto: cryptd - pass through absence of ->setkey() commit 841a3ff329713f796a63356fef6e2f72e4a3f6a3 upstream. When the cryptd template is used to wrap an unkeyed hash algorithm, don't install a ->setkey() method to the cryptd instance. This change is necessary for cryptd to keep working with unkeyed hash algorithms once we start enforcing that ->setkey() is called when present. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/cryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 0c654e59f215..af9ad45d1909 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -691,7 +691,8 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = cryptd_hash_finup_enqueue; inst->alg.export = cryptd_hash_export; inst->alg.import = cryptd_hash_import; - inst->alg.setkey = cryptd_hash_setkey; + if (crypto_shash_alg_has_setkey(salg)) + inst->alg.setkey = cryptd_hash_setkey; inst->alg.digest = cryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); -- GitLab From 45f31106baa3fb77f5bdebca824bbc2956c51b7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:24 -0800 Subject: [PATCH 1320/1398] crypto: mcryptd - pass through absence of ->setkey() commit fa59b92d299f2787e6bae1ff078ee0982e80211f upstream. When the mcryptd template is used to wrap an unkeyed hash algorithm, don't install a ->setkey() method to the mcryptd instance. This change is necessary for mcryptd to keep working with unkeyed hash algorithms once we start enforcing that ->setkey() is called when present. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/mcryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index a14100e74754..6e9389c8bfbd 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -534,7 +534,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = mcryptd_hash_finup_enqueue; inst->alg.export = mcryptd_hash_export; inst->alg.import = mcryptd_hash_import; - inst->alg.setkey = mcryptd_hash_setkey; + if (crypto_hash_alg_has_setkey(halg)) + inst->alg.setkey = mcryptd_hash_setkey; inst->alg.digest = mcryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); -- GitLab From b93728341fb72fe0cf7ab24ada232ce0273e92db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:25 -0800 Subject: [PATCH 1321/1398] crypto: poly1305 - remove ->setkey() method commit a16e772e664b9a261424107784804cffc8894977 upstream. Since Poly1305 requires a nonce per invocation, the Linux kernel implementations of Poly1305 don't use the crypto API's keying mechanism and instead expect the key and nonce as the first 32 bytes of the data. But ->setkey() is still defined as a stub returning an error code. This prevents Poly1305 from being used through AF_ALG and will also break it completely once we start enforcing that all crypto API users (not just AF_ALG) call ->setkey() if present. Fix it by removing crypto_poly1305_setkey(), leaving ->setkey as NULL. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/poly1305_glue.c | 1 - crypto/poly1305_generic.c | 17 +++++------------ include/crypto/poly1305.h | 2 -- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e32142bc071d..28c372003e44 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -164,7 +164,6 @@ static struct shash_alg alg = { .init = poly1305_simd_init, .update = poly1305_simd_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_simd_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 2df9835dfbc0..bca99238948f 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -51,17 +51,6 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - /* Poly1305 requires a unique key for each tag, which implies that - * we can't set it on the tfm that gets accessed by multiple users - * simultaneously. Instead we expect the key as the first 32 bytes in - * the update() call. */ - return -ENOTSUPP; -} -EXPORT_SYMBOL_GPL(crypto_poly1305_setkey); - static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ @@ -80,6 +69,11 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) dctx->s[3] = le32_to_cpuvp(key + 12); } +/* + * Poly1305 requires a unique key for each tag, which implies that we can't set + * it on the tfm that gets accessed by multiple users simultaneously. Instead we + * expect the key as the first 32 bytes in the update() call. + */ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { @@ -285,7 +279,6 @@ static struct shash_alg poly1305_alg = { .init = crypto_poly1305_init, .update = crypto_poly1305_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 894df59b74e4..d586f741cab5 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -30,8 +30,6 @@ struct poly1305_desc_ctx { }; int crypto_poly1305_init(struct shash_desc *desc); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen); int crypto_poly1305_update(struct shash_desc *desc, -- GitLab From daaa81c48402da28cc9e32ad55c48fb05e61b005 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 19 Apr 2017 15:11:00 -0700 Subject: [PATCH 1322/1398] nsfs: mark dentry with DCACHE_RCUACCESS commit 073c516ff73557a8f7315066856c04b50383ac34 upstream. Andrey reported a use-after-free in __ns_get_path(): spin_lock include/linux/spinlock.h:299 [inline] lockref_get_not_dead+0x19/0x80 lib/lockref.c:179 __ns_get_path+0x197/0x860 fs/nsfs.c:66 open_related_ns+0xda/0x200 fs/nsfs.c:143 sock_ioctl+0x39d/0x440 net/socket.c:1001 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1bf/0x1780 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 We are under rcu read lock protection at that point: rcu_read_lock(); d = atomic_long_read(&ns->stashed); if (!d) goto slow; dentry = (struct dentry *)d; if (!lockref_get_not_dead(&dentry->d_lockref)) goto slow; rcu_read_unlock(); but don't use a proper RCU API on the free path, therefore a parallel __d_free() could free it at the same time. We need to mark the stashed dentry with DCACHE_RCUACCESS so that __d_free() will be called after all readers leave RCU. Fixes: e149ed2b805f ("take the targets of /proc/*/ns/* symlinks to separate fs") Cc: Alexander Viro Cc: Andrew Morton Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: Linus Torvalds Cc: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/nsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eab..80fdfad7c215 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -90,6 +90,7 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns) return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { -- GitLab From e78d9fdf5ecce2830d76d54017c3d8531bf9b119 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:18 +0100 Subject: [PATCH 1323/1398] media: v4l2-ioctl.c: don't copy back the result for -ENOTTY commit 181a4a2d5a0a7b43cab08a70710d727e7764ccdd upstream. If the ioctl returned -ENOTTY, then don't bother copying back the result as there is no point. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index c52d94c018bb..4510e8a37244 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2862,8 +2862,11 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, /* Handles IOCTL */ err = func(file, cmd, parg); - if (err == -ENOIOCTLCMD) + if (err == -ENOTTY || err == -ENOIOCTLCMD) { err = -ENOTTY; + goto out; + } + if (err == 0) { if (cmd == VIDIOC_DQBUF) trace_v4l2_dqbuf(video_devdata(file)->minor, parg); -- GitLab From f294548da6455cae64456a9dfeff1e96390171c0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:19 +0100 Subject: [PATCH 1324/1398] media: v4l2-compat-ioctl32.c: add missing VIDIOC_PREPARE_BUF commit 3ee6d040719ae09110e5cdf24d5386abe5d1b776 upstream. The result of the VIDIOC_PREPARE_BUF ioctl was never copied back to userspace since it was missing in the switch. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index dc51dd86377d..049380bbf4cf 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1022,6 +1022,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar err = put_v4l2_create32(&karg.v2crt, up); break; + case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: -- GitLab From 02129c9bc23582a48194e89cbbeb15169115b8b9 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:20 +0100 Subject: [PATCH 1325/1398] media: v4l2-compat-ioctl32.c: fix the indentation commit b7b957d429f601d6d1942122b339474f31191d75 upstream. The indentation of this source is all over the place. Fix this. This patch only changes whitespace. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 208 +++++++++--------- 1 file changed, 104 insertions(+), 104 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 049380bbf4cf..57211c7fc491 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -48,11 +48,11 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount)) - return -EFAULT; + copy_from_user(&kp->w, &up->w, sizeof(up->w)) || + get_user(kp->field, &up->field) || + get_user(kp->chromakey, &up->chromakey) || + get_user(kp->clipcount, &up->clipcount)) + return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; if (kp->clipcount) { @@ -82,10 +82,10 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount)) - return -EFAULT; + put_user(kp->field, &up->field) || + put_user(kp->chromakey, &up->chromakey) || + put_user(kp->clipcount, &up->clipcount)) + return -EFAULT; return 0; } @@ -97,7 +97,7 @@ static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pi } static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) + struct v4l2_pix_format_mplane __user *up) { if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) return -EFAULT; @@ -112,7 +112,7 @@ static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pi } static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) + struct v4l2_pix_format_mplane __user *up) { if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) return -EFAULT; @@ -218,7 +218,7 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); + kp->type); return -EINVAL; } } @@ -265,7 +265,7 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); + kp->type); return -EINVAL; } } @@ -299,7 +299,7 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 { /* other fields are not set by the user, nor used by the driver */ if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || - get_user(kp->index, &up->index)) + get_user(kp->index, &up->index)) return -EFAULT; return 0; } @@ -307,13 +307,13 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || - copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) - return -EFAULT; + put_user(kp->index, &up->index) || + put_user(kp->id, &up->id) || + copy_to_user(up->name, kp->name, 24) || + copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || + put_user(kp->framelines, &up->framelines) || + copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) + return -EFAULT; return 0; } @@ -353,14 +353,14 @@ struct v4l2_buffer32 { }; static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) + enum v4l2_memory memory) { void __user *up_pln; compat_long_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || - copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + copy_in_user(&up->data_offset, &up32->data_offset, + sizeof(__u32))) return -EFAULT; if (memory == V4L2_MEMORY_USERPTR) { @@ -374,7 +374,7 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + sizeof(__u32))) return -EFAULT; } @@ -382,23 +382,23 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ } static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) + enum v4l2_memory memory) { if (copy_in_user(up32, up, 2 * sizeof(__u32)) || - copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + copy_in_user(&up32->data_offset, &up->data_offset, + sizeof(__u32))) return -EFAULT; /* For MMAP, driver might've set up the offset, so copy it back. * USERPTR stays the same (was userspace-provided), so no copying. */ if (memory == V4L2_MEMORY_MMAP) if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(__u32))) return -EFAULT; /* For DMABUF, driver might've set up the fd, so copy it back. */ if (memory == V4L2_MEMORY_DMABUF) if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + sizeof(int))) return -EFAULT; return 0; @@ -413,19 +413,19 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int ret; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) - return -EFAULT; + get_user(kp->index, &up->index) || + get_user(kp->type, &up->type) || + get_user(kp->flags, &up->flags) || + get_user(kp->memory, &up->memory) || + get_user(kp->length, &up->length)) + return -EFAULT; if (V4L2_TYPE_IS_OUTPUT(kp->type)) if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + get_user(kp->field, &up->field) || + get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + get_user(kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -442,13 +442,13 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - num_planes * sizeof(struct v4l2_plane32))) + num_planes * sizeof(struct v4l2_plane32))) return -EFAULT; /* We don't really care if userspace decides to kill itself * by passing a very big num_planes value */ uplane = compat_alloc_user_space(num_planes * - sizeof(struct v4l2_plane)); + sizeof(struct v4l2_plane)); kp->m.planes = (__force struct v4l2_plane *)uplane; while (--num_planes >= 0) { @@ -466,12 +466,12 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user break; case V4L2_MEMORY_USERPTR: { - compat_long_t tmp; + compat_long_t tmp; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; + if (get_user(tmp, &up->m.userptr)) + return -EFAULT; - kp->m.userptr = (unsigned long)compat_ptr(tmp); + kp->m.userptr = (unsigned long)compat_ptr(tmp); } break; case V4L2_MEMORY_OVERLAY: @@ -497,22 +497,22 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int ret; if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) - return -EFAULT; + put_user(kp->index, &up->index) || + put_user(kp->type, &up->type) || + put_user(kp->flags, &up->flags) || + put_user(kp->memory, &up->memory)) + return -EFAULT; if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) - return -EFAULT; + put_user(kp->field, &up->field) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || + copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || + put_user(kp->sequence, &up->sequence) || + put_user(kp->reserved2, &up->reserved2) || + put_user(kp->reserved, &up->reserved) || + put_user(kp->length, &up->length)) + return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { num_planes = kp->length; @@ -576,11 +576,11 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame u32 tmp; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || - get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) - return -EFAULT; + get_user(tmp, &up->base) || + get_user(kp->capability, &up->capability) || + get_user(kp->flags, &up->flags) || + copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) + return -EFAULT; kp->base = (__force void *)compat_ptr(tmp); return 0; } @@ -590,11 +590,11 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame u32 tmp = (u32)((unsigned long)kp->base); if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) - return -EFAULT; + put_user(tmp, &up->base) || + put_user(kp->capability, &up->capability) || + put_user(kp->flags, &up->flags) || + copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) + return -EFAULT; return 0; } @@ -669,12 +669,12 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) - return -EFAULT; + get_user(kp->which, &up->which) || + get_user(kp->count, &up->count) || + get_user(kp->error_idx, &up->error_idx) || + copy_from_user(kp->reserved, up->reserved, + sizeof(kp->reserved))) + return -EFAULT; n = kp->count; if (n == 0) { kp->controls = NULL; @@ -684,7 +684,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; ucontrols = compat_ptr(p); if (!access_ok(VERIFY_READ, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + n * sizeof(struct v4l2_ext_control32))) return -EFAULT; kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control)); kp->controls = (__force struct v4l2_ext_control *)kcontrols; @@ -719,11 +719,11 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext compat_caddr_t p; if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + put_user(kp->which, &up->which) || + put_user(kp->count, &up->count) || + put_user(kp->error_idx, &up->error_idx) || + copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; if (!kp->count) return 0; @@ -731,7 +731,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; ucontrols = compat_ptr(p); if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + n * sizeof(struct v4l2_ext_control32))) return -EFAULT; while (--n >= 0) { @@ -769,15 +769,15 @@ struct v4l2_event32 { static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) - return -EFAULT; + put_user(kp->type, &up->type) || + copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || + put_user(kp->pending, &up->pending) || + put_user(kp->sequence, &up->sequence) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || + put_user(kp->id, &up->id) || + copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) + return -EFAULT; return 0; } @@ -794,12 +794,12 @@ static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) u32 tmp; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || - get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) - return -EFAULT; + get_user(kp->pad, &up->pad) || + get_user(kp->start_block, &up->start_block) || + get_user(kp->blocks, &up->blocks) || + get_user(tmp, &up->edid) || + copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + return -EFAULT; kp->edid = (__force u8 *)compat_ptr(tmp); return 0; } @@ -809,12 +809,12 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) u32 tmp = (u32)((unsigned long)kp->edid); if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + put_user(kp->pad, &up->pad) || + put_user(kp->start_block, &up->start_block) || + put_user(kp->blocks, &up->blocks) || + put_user(tmp, &up->edid) || + copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; return 0; } -- GitLab From 81e0acf07015dbd3e0b45e8f8a053d64b804bb46 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:21 +0100 Subject: [PATCH 1326/1398] media: v4l2-compat-ioctl32.c: move 'helper' functions to __get/put_v4l2_format32 commit 486c521510c44a04cd756a9267e7d1e271c8a4ba upstream. These helper functions do not really help. Move the code to the __get/put_v4l2_format32 functions. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 104 ++++-------------- 1 file changed, 20 insertions(+), 84 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 57211c7fc491..64bc493edd7f 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -89,78 +89,6 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return 0; } -static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sdr_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sdr_format))) - return -EFAULT; - return 0; -} - struct v4l2_format32 { __u32 type; /* enum v4l2_buf_type */ union { @@ -199,23 +127,27 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_from_user(&kp->fmt.pix, &up->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return get_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_from_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_from_user(&kp->fmt.vbi, &up->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_from_user(&kp->fmt.sliced, &up->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); @@ -246,23 +178,27 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_to_user(&up->fmt.pix, &kp->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return put_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_to_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_to_user(&up->fmt.vbi, &kp->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_to_user(&up->fmt.sliced, &kp->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); -- GitLab From daff4d009f4f7fb3b1f041b76c0782cb96d99d56 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:22 +0100 Subject: [PATCH 1327/1398] media: v4l2-compat-ioctl32.c: avoid sizeof(type) commit 333b1e9f96ce05f7498b581509bb30cde03018bf upstream. Instead of doing sizeof(struct foo) use sizeof(*up). There even were cases where 4 * sizeof(__u32) was used instead of sizeof(kp->reserved), which is very dangerous when the size of the reserved array changes. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 77 +++++++++---------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 64bc493edd7f..64e3977ab851 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -47,7 +47,7 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || get_user(kp->chromakey, &up->chromakey) || @@ -64,7 +64,7 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(struct v4l2_clip)); + kclips = compat_alloc_user_space(n * sizeof(*kclips)); kp->clips = kclips; while (--n >= 0) { if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) @@ -157,14 +157,14 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; return __get_v4l2_format32(kp, up); } static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_create_buffers32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) return -EFAULT; return __get_v4l2_format32(&kp->format, &up->format); @@ -208,14 +208,14 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; @@ -234,7 +234,7 @@ struct v4l2_standard32 { static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->index, &up->index)) return -EFAULT; return 0; @@ -242,13 +242,13 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->index, &up->index) || put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || + copy_to_user(up->name, kp->name, sizeof(up->name)) || copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) + copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return 0; } @@ -296,7 +296,7 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ if (copy_in_user(up, up32, 2 * sizeof(__u32)) || copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + sizeof(up->data_offset))) return -EFAULT; if (memory == V4L2_MEMORY_USERPTR) { @@ -306,11 +306,11 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; } else if (memory == V4L2_MEMORY_DMABUF) { - if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int))) + if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + sizeof(up32->m.mem_offset))) return -EFAULT; } @@ -322,19 +322,19 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ { if (copy_in_user(up32, up, 2 * sizeof(__u32)) || copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + sizeof(up->data_offset))) return -EFAULT; /* For MMAP, driver might've set up the offset, so copy it back. * USERPTR stays the same (was userspace-provided), so no copying. */ if (memory == V4L2_MEMORY_MMAP) if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(up->m.mem_offset))) return -EFAULT; /* For DMABUF, driver might've set up the fd, so copy it back. */ if (memory == V4L2_MEMORY_DMABUF) if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + sizeof(up->m.fd))) return -EFAULT; return 0; @@ -348,7 +348,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int num_planes; int ret; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->index, &up->index) || get_user(kp->type, &up->type) || get_user(kp->flags, &up->flags) || @@ -360,8 +360,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (get_user(kp->bytesused, &up->bytesused) || get_user(kp->field, &up->field) || get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + get_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -378,13 +377,12 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - num_planes * sizeof(struct v4l2_plane32))) + num_planes * sizeof(*uplane32))) return -EFAULT; /* We don't really care if userspace decides to kill itself * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(num_planes * - sizeof(struct v4l2_plane)); + uplane = compat_alloc_user_space(num_planes * sizeof(*uplane)); kp->m.planes = (__force struct v4l2_plane *)uplane; while (--num_planes >= 0) { @@ -432,7 +430,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int num_planes; int ret; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->index, &up->index) || put_user(kp->type, &up->type) || put_user(kp->flags, &up->flags) || @@ -443,7 +441,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user put_user(kp->field, &up->field) || put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || + copy_to_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || put_user(kp->sequence, &up->sequence) || put_user(kp->reserved2, &up->reserved2) || put_user(kp->reserved, &up->reserved) || @@ -511,7 +509,7 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame { u32 tmp; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(tmp, &up->base) || get_user(kp->capability, &up->capability) || get_user(kp->flags, &up->flags) || @@ -525,7 +523,7 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame { u32 tmp = (u32)((unsigned long)kp->base); - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(tmp, &up->base) || put_user(kp->capability, &up->capability) || put_user(kp->flags, &up->flags) || @@ -549,14 +547,14 @@ struct v4l2_input32 { Otherwise it is identical to the 32-bit version. */ static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(struct v4l2_input32))) + if (copy_from_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(struct v4l2_input32))) + if (copy_to_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -604,12 +602,11 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext int n; compat_caddr_t p; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->which, &up->which) || get_user(kp->count, &up->count) || get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) + copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; n = kp->count; if (n == 0) { @@ -619,10 +616,9 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_READ, ucontrols, n * sizeof(*ucontrols))) return -EFAULT; - kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control)); + kcontrols = compat_alloc_user_space(n * sizeof(*kcontrols)); kp->controls = (__force struct v4l2_ext_control *)kcontrols; while (--n >= 0) { u32 id; @@ -654,7 +650,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext int n = kp->count; compat_caddr_t p; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->which, &up->which) || put_user(kp->count, &up->count) || put_user(kp->error_idx, &up->error_idx) || @@ -666,8 +662,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(*ucontrols))) return -EFAULT; while (--n >= 0) { @@ -704,7 +699,7 @@ struct v4l2_event32 { static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->type, &up->type) || copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || put_user(kp->pending, &up->pending) || @@ -712,7 +707,7 @@ static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *u put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) + copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return 0; } @@ -729,7 +724,7 @@ static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) { u32 tmp; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->pad, &up->pad) || get_user(kp->start_block, &up->start_block) || get_user(kp->blocks, &up->blocks) || @@ -744,7 +739,7 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) { u32 tmp = (u32)((unsigned long)kp->edid); - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->pad, &up->pad) || put_user(kp->start_block, &up->start_block) || put_user(kp->blocks, &up->blocks) || -- GitLab From eec955463de3259c0db5b38952f79c3e39e03f65 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:23 +0100 Subject: [PATCH 1328/1398] media: v4l2-compat-ioctl32.c: copy m.userptr in put_v4l2_plane32 commit 8ed5a59dcb47a6f76034ee760b36e089f3e82529 upstream. The struct v4l2_plane32 should set m.userptr as well. The same happens in v4l2_buffer32 and v4l2-compliance tests for this. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 64e3977ab851..2ddeecdababe 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -299,19 +299,24 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ sizeof(up->data_offset))) return -EFAULT; - if (memory == V4L2_MEMORY_USERPTR) { + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: + if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, + sizeof(up32->m.mem_offset))) + return -EFAULT; + break; + case V4L2_MEMORY_USERPTR: if (get_user(p, &up32->m.userptr)) return -EFAULT; up_pln = compat_ptr(p); if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; - } else if (memory == V4L2_MEMORY_DMABUF) { + break; + case V4L2_MEMORY_DMABUF: if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; - } else { - if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(up32->m.mem_offset))) - return -EFAULT; + break; } return 0; @@ -320,22 +325,32 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { + unsigned long p; + if (copy_in_user(up32, up, 2 * sizeof(__u32)) || copy_in_user(&up32->data_offset, &up->data_offset, sizeof(up->data_offset))) return -EFAULT; - /* For MMAP, driver might've set up the offset, so copy it back. - * USERPTR stays the same (was userspace-provided), so no copying. */ - if (memory == V4L2_MEMORY_MMAP) + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, sizeof(up->m.mem_offset))) return -EFAULT; - /* For DMABUF, driver might've set up the fd, so copy it back. */ - if (memory == V4L2_MEMORY_DMABUF) + break; + case V4L2_MEMORY_USERPTR: + if (get_user(p, &up->m.userptr) || + put_user((compat_ulong_t)ptr_to_compat((__force void *)p), + &up32->m.userptr)) + return -EFAULT; + break; + case V4L2_MEMORY_DMABUF: if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; + break; + } return 0; } @@ -395,6 +410,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (get_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; @@ -408,10 +424,6 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user kp->m.userptr = (unsigned long)compat_ptr(tmp); } break; - case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) - return -EFAULT; - break; case V4L2_MEMORY_DMABUF: if (get_user(kp->m.fd, &up->m.fd)) return -EFAULT; @@ -468,6 +480,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (put_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; @@ -475,10 +488,6 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (put_user(kp->m.userptr, &up->m.userptr)) return -EFAULT; break; - case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) - return -EFAULT; - break; case V4L2_MEMORY_DMABUF: if (put_user(kp->m.fd, &up->m.fd)) return -EFAULT; -- GitLab From 9a7cd41be3ade82208da3a9792a0bca175f6aafc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:24 +0100 Subject: [PATCH 1329/1398] media: v4l2-compat-ioctl32.c: fix ctrl_is_pointer commit b8c601e8af2d08f733d74defa8465303391bb930 upstream. ctrl_is_pointer just hardcoded two known string controls, but that caused problems when using e.g. custom controls that use a pointer for the payload. Reimplement this function: it now finds the v4l2_ctrl (if the driver uses the control framework) or it calls vidioc_query_ext_ctrl (if the driver implements that directly). In both cases it can now check if the control is a pointer control or not. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 2ddeecdababe..c8dd39884f6e 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -587,24 +589,39 @@ struct v4l2_ext_control32 { }; } __attribute__ ((packed)); -/* The following function really belong in v4l2-common, but that causes - a circular dependency between modules. We need to think about this, but - for now this will do. */ - -/* Return non-zero if this control is a pointer type. Currently only - type STRING is a pointer type. */ -static inline int ctrl_is_pointer(u32 id) +/* Return true if this control is a pointer type. */ +static inline bool ctrl_is_pointer(struct file *file, u32 id) { - switch (id) { - case V4L2_CID_RDS_TX_PS_NAME: - case V4L2_CID_RDS_TX_RADIO_TEXT: - return 1; - default: - return 0; + struct video_device *vdev = video_devdata(file); + struct v4l2_fh *fh = NULL; + struct v4l2_ctrl_handler *hdl = NULL; + struct v4l2_query_ext_ctrl qec = { id }; + const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops; + + if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags)) + fh = file->private_data; + + if (fh && fh->ctrl_handler) + hdl = fh->ctrl_handler; + else if (vdev->ctrl_handler) + hdl = vdev->ctrl_handler; + + if (hdl) { + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, id); + + return ctrl && ctrl->is_ptr; } + + if (!ops->vidioc_query_ext_ctrl) + return false; + + return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && + (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); } -static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int get_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; @@ -636,7 +653,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; if (get_user(id, &kcontrols->id)) return -EFAULT; - if (ctrl_is_pointer(id)) { + if (ctrl_is_pointer(file, id)) { void __user *s; if (get_user(p, &ucontrols->string)) @@ -651,7 +668,9 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return 0; } -static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int put_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols = @@ -683,7 +702,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext /* Do not modify the pointer when copying a pointer control. The contents of the pointer was changed, not the pointer itself. */ - if (ctrl_is_pointer(id)) + if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; @@ -897,7 +916,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(&karg.v2ecs, up); + err = get_v4l2_ext_controls32(file, &karg.v2ecs, up); compatible_arg = 0; break; case VIDIOC_DQEVENT: @@ -924,7 +943,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(&karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, &karg.v2ecs, up)) err = -EFAULT; break; } -- GitLab From 8465657a3be4421d564b01cc92188db9b4b06dc1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:25 +0100 Subject: [PATCH 1330/1398] media: v4l2-compat-ioctl32.c: make ctrl_is_pointer work for subdevs commit 273caa260035c03d89ad63d72d8cd3d9e5c5e3f1 upstream. If the device is of type VFL_TYPE_SUBDEV then vdev->ioctl_ops is NULL so the 'if (!ops->vidioc_query_ext_ctrl)' check would crash. Add a test for !ops to the condition. All sub-devices that have controls will use the control framework, so they do not have an equivalent to ops->vidioc_query_ext_ctrl. Returning false if ops is NULL is the correct thing to do here. Fixes: b8c601e8af ("v4l2-compat-ioctl32.c: fix ctrl_is_pointer") Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Reported-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index c8dd39884f6e..da55322bbb0f 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -612,7 +612,7 @@ static inline bool ctrl_is_pointer(struct file *file, u32 id) return ctrl && ctrl->is_ptr; } - if (!ops->vidioc_query_ext_ctrl) + if (!ops || !ops->vidioc_query_ext_ctrl) return false; return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && -- GitLab From 55e3f3e6846c5cef80a66c7a0c4b5ae9c2888224 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Wed, 14 Feb 2018 12:48:26 +0100 Subject: [PATCH 1331/1398] media: v4l2-compat-ioctl32: Copy v4l2_window->global_alpha commit 025a26fa14f8fd55d50ab284a30c016a5be953d0 upstream. Commit b2787845fb91 ("V4L/DVB (5289): Add support for video output overlays.") added the field global_alpha to struct v4l2_window but did not update the compat layer accordingly. This change adds global_alpha to struct v4l2_window32 and copies the value for global_alpha back and forth. Signed-off-by: Daniel Mentz Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index da55322bbb0f..c32feb94b3e5 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -45,6 +45,7 @@ struct v4l2_window32 { compat_caddr_t clips; /* actually struct v4l2_clip32 * */ __u32 clipcount; compat_caddr_t bitmap; + __u8 global_alpha; }; static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) @@ -53,7 +54,8 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount)) + get_user(kp->clipcount, &up->clipcount) || + get_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; @@ -86,7 +88,8 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || put_user(kp->field, &up->field) || put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount)) + put_user(kp->clipcount, &up->clipcount) || + put_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; return 0; } -- GitLab From 30ac343c422270df7cedb4513243196412dee3b2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:27 +0100 Subject: [PATCH 1332/1398] media: v4l2-compat-ioctl32.c: copy clip list in put_v4l2_window32 commit a751be5b142ef6bcbbb96d9899516f4d9c8d0ef4 upstream. put_v4l2_window32() didn't copy back the clip list to userspace. Drivers can update the clip rectangles, so this should be done. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 59 +++++++++++++------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index c32feb94b3e5..3b5f3c8956f2 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -50,6 +50,11 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { + struct v4l2_clip32 __user *uclips; + struct v4l2_clip __user *kclips; + compat_caddr_t p; + u32 n; + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || @@ -59,38 +64,54 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; - if (kp->clipcount) { - struct v4l2_clip32 __user *uclips; - struct v4l2_clip __user *kclips; - int n = kp->clipcount; - compat_caddr_t p; + if (!kp->clipcount) { + kp->clips = NULL; + return 0; + } - if (get_user(p, &up->clips)) + n = kp->clipcount; + if (get_user(p, &up->clips)) + return -EFAULT; + uclips = compat_ptr(p); + kclips = compat_alloc_user_space(n * sizeof(*kclips)); + kp->clips = kclips; + while (n--) { + if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) return -EFAULT; - uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(*kclips)); - kp->clips = kclips; - while (--n >= 0) { - if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) - return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) - return -EFAULT; - uclips += 1; - kclips += 1; - } - } else - kp->clips = NULL; + if (put_user(n ? kclips + 1 : NULL, &kclips->next)) + return -EFAULT; + uclips++; + kclips++; + } return 0; } static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { + struct v4l2_clip __user *kclips = kp->clips; + struct v4l2_clip32 __user *uclips; + u32 n = kp->clipcount; + compat_caddr_t p; + if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || put_user(kp->field, &up->field) || put_user(kp->chromakey, &up->chromakey) || put_user(kp->clipcount, &up->clipcount) || put_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; + + if (!kp->clipcount) + return 0; + + if (get_user(p, &up->clips)) + return -EFAULT; + uclips = compat_ptr(p); + while (n--) { + if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) + return -EFAULT; + uclips++; + kclips++; + } return 0; } -- GitLab From 30dcb0756b0f026324b8f1537d01c6c9d6ca92fc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:28 +0100 Subject: [PATCH 1333/1398] media: v4l2-compat-ioctl32.c: drop pr_info for unknown buffer type commit 169f24ca68bf0f247d111aef07af00dd3a02ae88 upstream. There is nothing wrong with using an unknown buffer type. So stop spamming the kernel log whenever this happens. The kernel will just return -EINVAL to signal this. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 3b5f3c8956f2..75653d756dd9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -175,8 +175,6 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } @@ -226,8 +224,6 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } -- GitLab From 437c4ec62efbc6704e95cfe8a05103708a02b9d7 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:29 +0100 Subject: [PATCH 1334/1398] media: v4l2-compat-ioctl32.c: don't copy back the result for certain errors commit d83a8243aaefe62ace433e4384a4f077bed86acb upstream. Some ioctls need to copy back the result even if the ioctl returned an error. However, don't do this for the error code -ENOTTY. It makes no sense in that cases. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 75653d756dd9..a40f40f65fc9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -956,6 +956,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar set_fs(old_fs); } + if (err == -ENOTTY) + return err; + /* Special case: even after an error we need to put the results back for these ioctls since the error_idx will contain information on which control failed. */ -- GitLab From f2d4bed9eabf1636adbf538865e06876e18807e8 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Wed, 14 Feb 2018 12:48:30 +0100 Subject: [PATCH 1335/1398] media: v4l2-compat-ioctl32.c: refactor compat ioctl32 logic commit a1dfb4c48cc1e64eeb7800a27c66a6f7e88d075a upstream. The 32-bit compat v4l2 ioctl handling is implemented based on its 64-bit equivalent. It converts 32-bit data structures into its 64-bit equivalents and needs to provide the data to the 64-bit ioctl in user space memory which is commonly allocated using compat_alloc_user_space(). However, due to how that function is implemented, it can only be called a single time for every syscall invocation. Supposedly to avoid this limitation, the existing code uses a mix of memory from the kernel stack and memory allocated through compat_alloc_user_space(). Under normal circumstances, this would not work, because the 64-bit ioctl expects all pointers to point to user space memory. As a workaround, set_fs(KERNEL_DS) is called to temporarily disable this extra safety check and allow kernel pointers. However, this might introduce a security vulnerability: The result of the 32-bit to 64-bit conversion is writeable by user space because the output buffer has been allocated via compat_alloc_user_space(). A malicious user space process could then manipulate pointers inside this output buffer, and due to the previous set_fs(KERNEL_DS) call, functions like get_user() or put_user() no longer prevent kernel memory access. The new approach is to pre-calculate the total amount of user space memory that is needed, allocate it using compat_alloc_user_space() and then divide up the allocated memory to accommodate all data structures that need to be converted. An alternative approach would have been to retain the union type karg that they allocated on the kernel stack in do_video_ioctl(), copy all data from user space into karg and then back to user space. However, we decided against this approach because it does not align with other compat syscall implementations. Instead, we tried to replicate the get_user/put_user pairs as found in other places in the kernel: if (get_user(clipcount, &up->clipcount) || put_user(clipcount, &kp->clipcount)) return -EFAULT; Notes from hans.verkuil@cisco.com: This patch was taken from: https://github.com/LineageOS/android_kernel_samsung_apq8084/commit/97b733953c06e4f0398ade18850f0817778255f7 Clearly nobody could be bothered to upstream this patch or at minimum tell us :-( We only heard about this a week ago. This patch was rebased and cleaned up. Compared to the original I also swapped the order of the convert_in_user arguments so that they matched copy_in_user. It was hard to review otherwise. I also replaced the ALLOC_USER_SPACE/ALLOC_AND_GET by a normal function. Fixes: 6b5a9492ca ("v4l: introduce string control support.") Signed-off-by: Daniel Mentz Co-developed-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 744 ++++++++++++------ 1 file changed, 483 insertions(+), 261 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index a40f40f65fc9..48a39222fdf9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -22,6 +22,14 @@ #include #include +/* Use the same argument order as copy_in_user */ +#define assign_in_user(to, from) \ +({ \ + typeof(*from) __assign_tmp; \ + \ + get_user(__assign_tmp, from) || put_user(__assign_tmp, to); \ +}) + static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret = -ENOIOCTLCMD; @@ -35,12 +43,12 @@ static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct v4l2_clip32 { struct v4l2_rect c; - compat_caddr_t next; + compat_caddr_t next; }; struct v4l2_window32 { struct v4l2_rect w; - __u32 field; /* enum v4l2_field */ + __u32 field; /* enum v4l2_field */ __u32 chromakey; compat_caddr_t clips; /* actually struct v4l2_clip32 * */ __u32 clipcount; @@ -48,37 +56,41 @@ struct v4l2_window32 { __u8 global_alpha; }; -static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) +static int get_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_clip32 __user *uclips; struct v4l2_clip __user *kclips; compat_caddr_t p; - u32 n; + u32 clipcount; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount) || - get_user(kp->global_alpha, &up->global_alpha)) + copy_in_user(&kp->w, &up->w, sizeof(up->w)) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->chromakey, &up->chromakey) || + assign_in_user(&kp->global_alpha, &up->global_alpha) || + get_user(clipcount, &up->clipcount) || + put_user(clipcount, &kp->clipcount)) return -EFAULT; - if (kp->clipcount > 2048) + if (clipcount > 2048) return -EINVAL; - if (!kp->clipcount) { - kp->clips = NULL; - return 0; - } + if (!clipcount) + return put_user(NULL, &kp->clips); - n = kp->clipcount; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(*kclips)); - kp->clips = kclips; - while (n--) { + if (aux_space < clipcount * sizeof(*kclips)) + return -EFAULT; + kclips = aux_buf; + if (put_user(kclips, &kp->clips)) + return -EFAULT; + + while (clipcount--) { if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) + if (put_user(clipcount ? kclips + 1 : NULL, &kclips->next)) return -EFAULT; uclips++; kclips++; @@ -86,27 +98,28 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return 0; } -static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) +static int put_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up) { struct v4l2_clip __user *kclips = kp->clips; struct v4l2_clip32 __user *uclips; - u32 n = kp->clipcount; compat_caddr_t p; - - if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount) || - put_user(kp->global_alpha, &up->global_alpha)) + u32 clipcount; + + if (copy_in_user(&up->w, &kp->w, sizeof(kp->w)) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->chromakey, &kp->chromakey) || + assign_in_user(&up->global_alpha, &kp->global_alpha) || + get_user(clipcount, &kp->clipcount) || + put_user(clipcount, &up->clipcount)) return -EFAULT; - - if (!kp->clipcount) + if (!clipcount) return 0; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - while (n--) { + while (clipcount--) { if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) return -EFAULT; uclips++; @@ -145,101 +158,158 @@ struct v4l2_create_buffers32 { __u32 reserved[8]; }; -static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int __bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) { - if (get_user(kp->type, &up->type)) + u32 type; + + if (get_user(type, &up->type)) return -EFAULT; - switch (kp->type) { + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OVERLAY: + case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: { + u32 clipcount; + + if (get_user(clipcount, &up->fmt.win.clipcount)) + return -EFAULT; + if (clipcount > 2048) + return -EINVAL; + *size = clipcount * sizeof(struct v4l2_clip); + return 0; + } + default: + *size = 0; + return 0; + } +} + +static int bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up))) + return -EFAULT; + return __bufsize_v4l2_format(up, size); +} + +static int __get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + u32 type; + + if (get_user(type, &up->type) || put_user(type, &kp->type)) + return -EFAULT; + + switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return copy_from_user(&kp->fmt.pix, &up->fmt.pix, - sizeof(kp->fmt.pix)) ? -EFAULT : 0; + return copy_in_user(&kp->fmt.pix, &up->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return copy_from_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, - sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; + return copy_in_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); + return get_v4l2_window32(&kp->fmt.win, &up->fmt.win, + aux_buf, aux_space); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return copy_from_user(&kp->fmt.vbi, &up->fmt.vbi, - sizeof(kp->fmt.vbi)) ? -EFAULT : 0; + return copy_in_user(&kp->fmt.vbi, &up->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return copy_from_user(&kp->fmt.sliced, &up->fmt.sliced, - sizeof(kp->fmt.sliced)) ? -EFAULT : 0; + return copy_in_user(&kp->fmt.sliced, &up->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, - sizeof(kp->fmt.sdr)) ? -EFAULT : 0; + return copy_in_user(&kp->fmt.sdr, &up->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: return -EINVAL; } } -static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up))) + return -EFAULT; + return __get_v4l2_format32(kp, up, aux_buf, aux_space); +} + +static int bufsize_v4l2_create(struct v4l2_create_buffers32 __user *up, + u32 *size) { if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; - return __get_v4l2_format32(kp, up); + return __bufsize_v4l2_format(&up->format, size); } -static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int get_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up, + void __user *aux_buf, u32 aux_space) { if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) + copy_in_user(kp, up, + offsetof(struct v4l2_create_buffers32, format))) return -EFAULT; - return __get_v4l2_format32(&kp->format, &up->format); + return __get_v4l2_format32(&kp->format, &up->format, + aux_buf, aux_space); } -static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int __put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) { - if (put_user(kp->type, &up->type)) + u32 type; + + if (get_user(type, &kp->type)) return -EFAULT; - switch (kp->type) { + switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return copy_to_user(&up->fmt.pix, &kp->fmt.pix, + return copy_in_user(&up->fmt.pix, &kp->fmt.pix, sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return copy_to_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + return copy_in_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return copy_to_user(&up->fmt.vbi, &kp->fmt.vbi, + return copy_in_user(&up->fmt.vbi, &kp->fmt.vbi, sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return copy_to_user(&up->fmt.sliced, &kp->fmt.sliced, + return copy_in_user(&up->fmt.sliced, &kp->fmt.sliced, sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, + return copy_in_user(&up->fmt.sdr, &kp->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: return -EINVAL; } } -static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } -static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int put_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + copy_in_user(up, kp, + offsetof(struct v4l2_create_buffers32, format)) || + copy_in_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return __put_v4l2_format32(&kp->format, &up->format); } @@ -253,24 +323,27 @@ struct v4l2_standard32 { __u32 reserved[4]; }; -static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int get_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->index, &up->index)) + assign_in_user(&kp->index, &up->index)) return -EFAULT; return 0; } -static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int put_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, sizeof(up->name)) || - copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + assign_in_user(&up->index, &kp->index) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->name, kp->name, sizeof(up->name)) || + copy_in_user(&up->frameperiod, &kp->frameperiod, + sizeof(up->frameperiod)) || + assign_in_user(&up->framelines, &kp->framelines) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -310,11 +383,11 @@ struct v4l2_buffer32 { __u32 reserved; }; -static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, +static int get_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { - void __user *up_pln; - compat_long_t p; + compat_ulong_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || copy_in_user(&up->data_offset, &up32->data_offset, @@ -329,10 +402,8 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (get_user(p, &up32->m.userptr)) - return -EFAULT; - up_pln = compat_ptr(p); - if (put_user((unsigned long)up_pln, &up->m.userptr)) + if (get_user(p, &up32->m.userptr) || + put_user((unsigned long)compat_ptr(p), &up->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: @@ -344,7 +415,8 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return 0; } -static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, +static int put_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { unsigned long p; @@ -368,8 +440,7 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(up->m.fd))) + if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; break; } @@ -377,37 +448,75 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return 0; } -static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int bufsize_v4l2_buffer(struct v4l2_buffer32 __user *up, u32 *size) +{ + u32 type; + u32 length; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(type, &up->type) || + get_user(length, &up->length)) + return -EFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + if (length > VIDEO_MAX_PLANES) + return -EINVAL; + + /* + * We don't really care if userspace decides to kill itself + * by passing a very big length value + */ + *size = length * sizeof(struct v4l2_plane); + } else { + *size = 0; + } + return 0; +} + +static int get_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up, + void __user *aux_buf, u32 aux_space) { + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) + assign_in_user(&kp->index, &up->index) || + get_user(type, &up->type) || + put_user(type, &kp->type) || + assign_in_user(&kp->flags, &up->flags) || + get_user(memory, &up->memory) || + put_user(memory, &kp->memory) || + get_user(length, &up->length) || + put_user(length, &kp->length)) return -EFAULT; - if (V4L2_TYPE_IS_OUTPUT(kp->type)) - if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec)) + if (V4L2_TYPE_IS_OUTPUT(type)) + if (assign_in_user(&kp->bytesused, &up->bytesused) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->timestamp.tv_sec, + &up->timestamp.tv_sec) || + assign_in_user(&kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; + if (num_planes == 0) { - kp->m.planes = NULL; - /* num_planes == 0 is legal, e.g. when userspace doesn't - * need planes array on DQBUF*/ - return 0; + /* + * num_planes == 0 is legal, e.g. when userspace doesn't + * need planes array on DQBUF + */ + return put_user(NULL, &kp->m.planes); } + if (num_planes > VIDEO_MAX_PLANES) + return -EINVAL; if (get_user(p, &up->m.planes)) return -EFAULT; @@ -417,37 +526,43 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user num_planes * sizeof(*uplane32))) return -EFAULT; - /* We don't really care if userspace decides to kill itself - * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(num_planes * sizeof(*uplane)); - kp->m.planes = (__force struct v4l2_plane *)uplane; + /* + * We don't really care if userspace decides to kill itself + * by passing a very big num_planes value + */ + if (aux_space < num_planes * sizeof(*uplane)) + return -EFAULT; - while (--num_planes >= 0) { - ret = get_v4l2_plane32(uplane, uplane32, kp->memory); + uplane = aux_buf; + if (put_user((__force struct v4l2_plane *)uplane, + &kp->m.planes)) + return -EFAULT; + + while (num_planes--) { + ret = get_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; - ++uplane; - ++uplane32; + uplane++; + uplane32++; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&kp->m.offset, &up->m.offset)) return -EFAULT; break; - case V4L2_MEMORY_USERPTR: - { - compat_long_t tmp; + case V4L2_MEMORY_USERPTR: { + compat_ulong_t userptr; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; - - kp->m.userptr = (unsigned long)compat_ptr(tmp); - } + if (get_user(userptr, &up->m.userptr) || + put_user((unsigned long)compat_ptr(userptr), + &kp->m.userptr)) + return -EFAULT; break; + } case V4L2_MEMORY_DMABUF: - if (get_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&kp->m.fd, &up->m.fd)) return -EFAULT; break; } @@ -456,62 +571,70 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user return 0; } -static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int put_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up) { + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) + assign_in_user(&up->index, &kp->index) || + get_user(type, &kp->type) || + put_user(type, &up->type) || + assign_in_user(&up->flags, &kp->flags) || + get_user(memory, &kp->memory) || + put_user(memory, &up->memory)) return -EFAULT; - if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) + if (assign_in_user(&up->bytesused, &kp->bytesused) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_usec, &kp->timestamp.tv_usec) || + copy_in_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->reserved2, &kp->reserved2) || + assign_in_user(&up->reserved, &kp->reserved) || + get_user(length, &kp->length) || + put_user(length, &up->length)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; + if (num_planes == 0) return 0; - uplane = (__force struct v4l2_plane __user *)kp->m.planes; + if (get_user(uplane, ((__force struct v4l2_plane __user **)&kp->m.planes))) + return -EFAULT; if (get_user(p, &up->m.planes)) return -EFAULT; uplane32 = compat_ptr(p); - while (--num_planes >= 0) { - ret = put_v4l2_plane32(uplane, uplane32, kp->memory); + while (num_planes--) { + ret = put_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; ++uplane; ++uplane32; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&up->m.offset, &kp->m.offset)) return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (put_user(kp->m.userptr, &up->m.userptr)) + if (assign_in_user(&up->m.userptr, &kp->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (put_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&up->m.fd, &kp->m.fd)) return -EFAULT; break; } @@ -523,7 +646,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user struct v4l2_framebuffer32 { __u32 capability; __u32 flags; - compat_caddr_t base; + compat_caddr_t base; struct { __u32 width; __u32 height; @@ -536,29 +659,32 @@ struct v4l2_framebuffer32 { } fmt; }; -static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int get_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp; + compat_caddr_t tmp; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) + put_user((__force void *)compat_ptr(tmp), &kp->base) || + assign_in_user(&kp->capability, &up->capability) || + assign_in_user(&kp->flags, &up->flags) || + copy_in_user(&kp->fmt, &up->fmt, sizeof(kp->fmt))) return -EFAULT; - kp->base = (__force void *)compat_ptr(tmp); return 0; } -static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int put_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->base); + void *base; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) + get_user(base, &kp->base) || + put_user(ptr_to_compat(base), &up->base) || + assign_in_user(&up->capability, &kp->capability) || + assign_in_user(&up->flags, &kp->flags) || + copy_in_user(&up->fmt, &kp->fmt, sizeof(kp->fmt))) return -EFAULT; return 0; } @@ -571,21 +697,26 @@ struct v4l2_input32 { __u32 tuner; /* Associated tuner */ compat_u64 std; __u32 status; - __u32 reserved[4]; + __u32 capabilities; + __u32 reserved[3]; }; -/* The 64-bit v4l2_input struct has extra padding at the end of the struct. - Otherwise it is identical to the 32-bit version. */ -static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +/* + * The 64-bit v4l2_input struct has extra padding at the end of the struct. + * Otherwise it is identical to the 32-bit version. + */ +static inline int get_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(*up))) + if (copy_in_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } -static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +static inline int put_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(*up))) + if (copy_in_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -639,40 +770,64 @@ static inline bool ctrl_is_pointer(struct file *file, u32 id) (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); } +static int bufsize_v4l2_ext_controls(struct v4l2_ext_controls32 __user *up, + u32 *size) +{ + u32 count; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(count, &up->count)) + return -EFAULT; + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; + *size = count * sizeof(struct v4l2_ext_control); + return 0; +} + static int get_v4l2_ext_controls32(struct file *file, - struct v4l2_ext_controls *kp, - struct v4l2_ext_controls32 __user *up) + struct v4l2_ext_controls __user *kp, + struct v4l2_ext_controls32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; - int n; + u32 count; + u32 n; compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + assign_in_user(&kp->which, &up->which) || + get_user(count, &up->count) || + put_user(count, &kp->count) || + assign_in_user(&kp->error_idx, &up->error_idx) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; - n = kp->count; - if (n == 0) { - kp->controls = NULL; - return 0; - } + + if (count == 0) + return put_user(NULL, &kp->controls); + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, n * sizeof(*ucontrols))) + if (!access_ok(VERIFY_READ, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - kcontrols = compat_alloc_user_space(n * sizeof(*kcontrols)); - kp->controls = (__force struct v4l2_ext_control *)kcontrols; - while (--n >= 0) { + if (aux_space < count * sizeof(*kcontrols)) + return -EFAULT; + kcontrols = aux_buf; + if (put_user((__force struct v4l2_ext_control *)kcontrols, + &kp->controls)) + return -EFAULT; + + for (n = 0; n < count; n++) { u32 id; if (copy_in_user(kcontrols, ucontrols, sizeof(*ucontrols))) return -EFAULT; + if (get_user(id, &kcontrols->id)) return -EFAULT; + if (ctrl_is_pointer(file, id)) { void __user *s; @@ -689,43 +844,54 @@ static int get_v4l2_ext_controls32(struct file *file, } static int put_v4l2_ext_controls32(struct file *file, - struct v4l2_ext_controls *kp, + struct v4l2_ext_controls __user *kp, struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; - struct v4l2_ext_control __user *kcontrols = - (__force struct v4l2_ext_control __user *)kp->controls; - int n = kp->count; + struct v4l2_ext_control __user *kcontrols; + u32 count; + u32 n; compat_caddr_t p; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + assign_in_user(&up->which, &kp->which) || + get_user(count, &kp->count) || + put_user(count, &up->count) || + assign_in_user(&up->error_idx, &kp->error_idx) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)) || + get_user(kcontrols, &kp->controls)) return -EFAULT; - if (!kp->count) - return 0; + if (!count) + return 0; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(*ucontrols))) + if (!access_ok(VERIFY_WRITE, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - while (--n >= 0) { - unsigned size = sizeof(*ucontrols); + for (n = 0; n < count; n++) { + unsigned int size = sizeof(*ucontrols); u32 id; - if (get_user(id, &kcontrols->id)) + if (get_user(id, &kcontrols->id) || + put_user(id, &ucontrols->id) || + assign_in_user(&ucontrols->size, &kcontrols->size) || + copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2, + sizeof(ucontrols->reserved2))) return -EFAULT; - /* Do not modify the pointer when copying a pointer control. - The contents of the pointer was changed, not the pointer - itself. */ + + /* + * Do not modify the pointer when copying a pointer control. + * The contents of the pointer was changed, not the pointer + * itself. + */ if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); + if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; + ucontrols++; kcontrols++; } @@ -745,17 +911,18 @@ struct v4l2_event32 { __u32 reserved[8]; }; -static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) +static int put_v4l2_event32(struct v4l2_event __user *kp, + struct v4l2_event32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + assign_in_user(&up->type, &kp->type) || + copy_in_user(&up->u, &kp->u, sizeof(kp->u)) || + assign_in_user(&up->pending, &kp->pending) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_nsec, &kp->timestamp.tv_nsec) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -768,31 +935,34 @@ struct v4l2_edid32 { compat_caddr_t edid; }; -static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int get_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp; + compat_uptr_t tmp; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || + assign_in_user(&kp->pad, &up->pad) || + assign_in_user(&kp->start_block, &up->start_block) || + assign_in_user(&kp->blocks, &up->blocks) || get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + put_user(compat_ptr(tmp), &kp->edid) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; - kp->edid = (__force u8 *)compat_ptr(tmp); return 0; } -static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int put_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->edid); + void *edid; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + assign_in_user(&up->pad, &kp->pad) || + assign_in_user(&up->start_block, &kp->start_block) || + assign_in_user(&up->blocks, &kp->blocks) || + get_user(edid, &kp->edid) || + put_user(ptr_to_compat(edid), &up->edid) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -809,7 +979,7 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_ENUMINPUT32 _IOWR('V', 26, struct v4l2_input32) #define VIDIOC_G_EDID32 _IOWR('V', 40, struct v4l2_edid32) #define VIDIOC_S_EDID32 _IOWR('V', 41, struct v4l2_edid32) -#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) +#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) #define VIDIOC_G_EXT_CTRLS32 _IOWR('V', 71, struct v4l2_ext_controls32) #define VIDIOC_S_EXT_CTRLS32 _IOWR('V', 72, struct v4l2_ext_controls32) #define VIDIOC_TRY_EXT_CTRLS32 _IOWR('V', 73, struct v4l2_ext_controls32) @@ -825,22 +995,23 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_G_OUTPUT32 _IOR ('V', 46, s32) #define VIDIOC_S_OUTPUT32 _IOWR('V', 47, s32) +static int alloc_userspace(unsigned int size, u32 aux_space, + void __user **up_native) +{ + *up_native = compat_alloc_user_space(size + aux_space); + if (!*up_native) + return -ENOMEM; + if (clear_user(*up_native, size)) + return -EFAULT; + return 0; +} + static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - union { - struct v4l2_format v2f; - struct v4l2_buffer v2b; - struct v4l2_framebuffer v2fb; - struct v4l2_input v2i; - struct v4l2_standard v2s; - struct v4l2_ext_controls v2ecs; - struct v4l2_event v2ev; - struct v4l2_create_buffers v2crt; - struct v4l2_edid v2edid; - unsigned long vx; - int vi; - } karg; void __user *up = compat_ptr(arg); + void __user *up_native = NULL; + void __user *aux_buf; + u32 aux_space; int compatible_arg = 1; long err = 0; @@ -879,30 +1050,52 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_STREAMOFF: case VIDIOC_S_INPUT: case VIDIOC_S_OUTPUT: - err = get_user(karg.vi, (s32 __user *)up); + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); + if (!err && assign_in_user((unsigned int __user *)up_native, + (compat_uint_t __user *)up)) + err = -EFAULT; compatible_arg = 0; break; case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); compatible_arg = 0; break; case VIDIOC_G_EDID: case VIDIOC_S_EDID: - err = get_v4l2_edid32(&karg.v2edid, up); + err = alloc_userspace(sizeof(struct v4l2_edid), 0, &up_native); + if (!err) + err = get_v4l2_edid32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = get_v4l2_format32(&karg.v2f, up); + err = bufsize_v4l2_format(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_format), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_format); + err = get_v4l2_format32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_CREATE_BUFS: - err = get_v4l2_create32(&karg.v2crt, up); + err = bufsize_v4l2_create(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_create_buffers), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_create_buffers); + err = get_v4l2_create32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; @@ -910,36 +1103,63 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = get_v4l2_buffer32(&karg.v2b, up); + err = bufsize_v4l2_buffer(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_buffer), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_buffer); + err = get_v4l2_buffer32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_S_FBUF: - err = get_v4l2_framebuffer32(&karg.v2fb, up); + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); + if (!err) + err = get_v4l2_framebuffer32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FBUF: + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); compatible_arg = 0; break; case VIDIOC_ENUMSTD: - err = get_v4l2_standard32(&karg.v2s, up); + err = alloc_userspace(sizeof(struct v4l2_standard), 0, + &up_native); + if (!err) + err = get_v4l2_standard32(up_native, up); compatible_arg = 0; break; case VIDIOC_ENUMINPUT: - err = get_v4l2_input32(&karg.v2i, up); + err = alloc_userspace(sizeof(struct v4l2_input), 0, &up_native); + if (!err) + err = get_v4l2_input32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(file, &karg.v2ecs, up); + err = bufsize_v4l2_ext_controls(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_ext_controls), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_ext_controls); + err = get_v4l2_ext_controls32(file, up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_DQEVENT: + err = alloc_userspace(sizeof(struct v4l2_event), 0, &up_native); compatible_arg = 0; break; } @@ -948,25 +1168,26 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar if (compatible_arg) err = native_ioctl(file, cmd, (unsigned long)up); - else { - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - err = native_ioctl(file, cmd, (unsigned long)&karg); - set_fs(old_fs); - } + else + err = native_ioctl(file, cmd, (unsigned long)up_native); if (err == -ENOTTY) return err; - /* Special case: even after an error we need to put the - results back for these ioctls since the error_idx will - contain information on which control failed. */ + /* + * Special case: even after an error we need to put the + * results back for these ioctls since the error_idx will + * contain information on which control failed. + */ switch (cmd) { case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(file, &karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, up_native, up)) + err = -EFAULT; + break; + case VIDIOC_S_EDID: + if (put_v4l2_edid32(up_native, up)) err = -EFAULT; break; } @@ -978,45 +1199,46 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_S_OUTPUT: case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: - err = put_user(((s32)karg.vi), (s32 __user *)up); + if (assign_in_user((compat_uint_t __user *)up, + ((unsigned int __user *)up_native))) + err = -EFAULT; break; case VIDIOC_G_FBUF: - err = put_v4l2_framebuffer32(&karg.v2fb, up); + err = put_v4l2_framebuffer32(up_native, up); break; case VIDIOC_DQEVENT: - err = put_v4l2_event32(&karg.v2ev, up); + err = put_v4l2_event32(up_native, up); break; case VIDIOC_G_EDID: - case VIDIOC_S_EDID: - err = put_v4l2_edid32(&karg.v2edid, up); + err = put_v4l2_edid32(up_native, up); break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = put_v4l2_format32(&karg.v2f, up); + err = put_v4l2_format32(up_native, up); break; case VIDIOC_CREATE_BUFS: - err = put_v4l2_create32(&karg.v2crt, up); + err = put_v4l2_create32(up_native, up); break; case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = put_v4l2_buffer32(&karg.v2b, up); + err = put_v4l2_buffer32(up_native, up); break; case VIDIOC_ENUMSTD: - err = put_v4l2_standard32(&karg.v2s, up); + err = put_v4l2_standard32(up_native, up); break; case VIDIOC_ENUMINPUT: - err = put_v4l2_input32(&karg.v2i, up); + err = put_v4l2_input32(up_native, up); break; } return err; -- GitLab From a96e820790253c8971ef08670c9c8dd43b619fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 5 Feb 2018 11:15:52 +0200 Subject: [PATCH 1336/1398] crypto: caam - fix endless loop when DECO acquire fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 225ece3e7dad4cfc44cca38ce7a3a80f255ea8f1 upstream. In case DECO0 cannot be acquired - i.e. run_descriptor_deco0() fails with -ENODEV, caam_probe() enters an endless loop: run_descriptor_deco0 ret -ENODEV -> instantiate_rng -ENODEV, overwritten by -EAGAIN ret -EAGAIN -> caam_probe -EAGAIN results in endless loop It turns out the error path in instantiate_rng() is incorrect, the checks are done in the wrong order. Fixes: 1005bccd7a4a6 ("crypto: caam - enable instantiation of all RNG4 state handles") Reported-by: Bryan O'Donoghue Suggested-by: Auer Lukas Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/ctrl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 98468b96c32f..2ca101ac0c17 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, * without any error (HW optimizations for later * CAAM eras), then try again. */ + if (ret) + break; + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) + !(rdsta_val & (1 << sh_idx))) { ret = -EAGAIN; - if (ret) break; + } + dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); /* Clear the contents before recreating the descriptor */ memset(desc, 0x00, CAAM_CMD_SZ * 7); -- GitLab From 68f2013e1ff864dad8bd09f1d346bce5535764bd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Jan 2018 00:31:27 -0800 Subject: [PATCH 1337/1398] crypto: sha512-mb - initialize pending lengths correctly commit eff84b379089cd8b4e83599639c1f5f6e34ef7bf upstream. The SHA-512 multibuffer code keeps track of the number of blocks pending in each lane. The minimum of these values is used to identify the next lane that will be completed. Unused lanes are set to a large number (0xFFFFFFFF) so that they don't affect this calculation. However, it was forgotten to set the lengths to this value in the initial state, where all lanes are unused. As a result it was possible for sha512_mb_mgr_get_comp_job_avx2() to select an unused lane, causing a NULL pointer dereference. Specifically this could happen in the case where ->update() was passed fewer than SHA512_BLOCK_SIZE bytes of data, so it then called sha_complete_job() without having actually submitted any blocks to the multi-buffer code. This hit a NULL pointer dereference if another task happened to have submitted blocks concurrently to the same CPU and the flush timer had not yet expired. Fix this by initializing sha512_mb_mgr->lens correctly. As usual, this bug was found by syzkaller. Fixes: 45691e2d9b18 ("crypto: sha512-mb - submit/flush routines for AVX2") Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a..d08805032f01 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) { unsigned int j; - state->lens[0] = 0; - state->lens[1] = 1; - state->lens[2] = 2; - state->lens[3] = 3; + /* initially all lanes are unused */ + state->lens[0] = 0xFFFFFFFF00000000; + state->lens[1] = 0xFFFFFFFF00000001; + state->lens[2] = 0xFFFFFFFF00000002; + state->lens[3] = 0xFFFFFFFF00000003; + state->unused_lanes = 0xFF03020100; for (j = 0; j < 4; j++) state->ldata[j].job_in_lane = NULL; -- GitLab From 51e22c571fadce7d016979cf88d8411ee03032ea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:06 +0000 Subject: [PATCH 1338/1398] arm: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls commit 20e8175d246e9f9deb377f2784b3e7dfb2ad3e86 upstream. KVM doesn't follow the SMCCC when it comes to unimplemented calls, and inject an UNDEF instead of returning an error. Since firmware calls are now used for security mitigation, they are becoming more common, and the undef is counter productive. Instead, let's follow the SMCCC which states that -1 must be returned to the caller when getting an unknown function number. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/handle_exit.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 42f5daf715d0..4e57ebca6e69 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,7 +38,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } -- GitLab From ba88289e7acbf609b44e27883a621d8f0c3e4d04 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 9 Nov 2017 20:27:20 +0200 Subject: [PATCH 1339/1398] KVM: nVMX: Fix races when sending nested PI while dest enters/leaves L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b6977117f50d60455ace86b2d256f6fb4f3de05 upstream. Consider the following scenario: 1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI to CPU B via virtual posted-interrupt mechanism. 2. CPU B is currently executing L2 guest. 3. vmx_deliver_nested_posted_interrupt() calls kvm_vcpu_trigger_posted_interrupt() which will note that vcpu->mode == IN_GUEST_MODE. 4. Assume that before CPU A sends the physical POSTED_INTR_NESTED_VECTOR IPI, CPU B exits from L2 to L0 during event-delivery (valid IDT-vectoring-info). 5. CPU A now sends the physical IPI. The IPI is received in host and it's handler (smp_kvm_posted_intr_nested_ipi()) does nothing. 6. Assume that before CPU A sets pi_pending=true and KVM_REQ_EVENT, CPU B continues to run in L0 and reach vcpu_enter_guest(). As KVM_REQ_EVENT is not set yet, vcpu_enter_guest() will continue and resume L2 guest. 7. At this point, CPU A sets pi_pending=true and KVM_REQ_EVENT but it's too late! CPU B already entered L2 and KVM_REQ_EVENT will only be consumed at next L2 entry! Another scenario to consider: 1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI to CPU B via virtual posted-interrupt mechanism. 2. Assume that before CPU A calls kvm_vcpu_trigger_posted_interrupt(), CPU B is at L0 and is about to resume into L2. Further assume that it is in vcpu_enter_guest() after check for KVM_REQ_EVENT. 3. At this point, CPU A calls kvm_vcpu_trigger_posted_interrupt() which will note that vcpu->mode != IN_GUEST_MODE. Therefore, do nothing and return false. Then, will set pi_pending=true and KVM_REQ_EVENT. 4. Now CPU B continue and resumes into L2 guest without processing the posted-interrupt until next L2 entry! To fix both issues, we just need to change vmx_deliver_nested_posted_interrupt() to set pi_pending=true and KVM_REQ_EVENT before calling kvm_vcpu_trigger_posted_interrupt(). It will fix the first scenario by chaging step (6) to note that KVM_REQ_EVENT and pi_pending=true and therefore process nested posted-interrupt. It will fix the second scenario by two possible ways: 1. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B has changed vcpu->mode to IN_GUEST_MODE, physical IPI will be sent and will be received when CPU resumes into L2. 2. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B hasn't yet changed vcpu->mode to IN_GUEST_MODE, then after CPU B will change vcpu->mode it will call kvm_request_pending() which will return true and therefore force another round of vcpu_enter_guest() which will note that KVM_REQ_EVENT and pi_pending=true and therefore process nested posted-interrupt. Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Krish Sadhukhan [Add kvm_vcpu_kick to also handle the case where L1 doesn't intercept L2 HLT and L2 executes HLT instruction. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d49da86e3099..d66224e695cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4967,14 +4967,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { - /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. */ vmx->nested.pi_pending = true; kvm_make_request(KVM_REQ_EVENT, vcpu); + /* the PIR and ON have been set by L1. */ + if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) + kvm_vcpu_kick(vcpu); return 0; } return -1; -- GitLab From f6741799aa5321c52a83ff5d9ce753165774d63c Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jan 2018 18:19:06 +0000 Subject: [PATCH 1340/1398] KVM: arm/arm64: Handle CPU_PM_ENTER_FAILED commit 58d6b15e9da5042a99c9c30ad725792e4569150e upstream. cpu_pm_enter() calls the pm notifier chain with CPU_PM_ENTER, then if there is a failure: CPU_PM_ENTER_FAILED. When KVM receives CPU_PM_ENTER it calls cpu_hyp_reset() which will return us to the hyp-stub. If we subsequently get a CPU_PM_ENTER_FAILED, KVM does nothing, leaving the CPU running with the hyp-stub, at odds with kvm_arm_hardware_enabled. Add CPU_PM_ENTER_FAILED as a fallthrough for CPU_PM_EXIT, this reloads KVM based on kvm_arm_hardware_enabled. This is safe even if CPU_PM_ENTER never gets as far as KVM, as cpu_hyp_reinit() calls cpu_hyp_reset() to make sure the hyp-stub is loaded before reloading KVM. Fixes: 67f691976662 ("arm64: kvm: allows kvm cpu hotplug") CC: Lorenzo Pieralisi Reviewed-by: Christoffer Dall Signed-off-by: James Morse Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 19b5f5c1c0ff..c38bfbeec306 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1165,6 +1165,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, cpu_hyp_reset(); return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: if (__this_cpu_read(kvm_arm_hardware_enabled)) /* The hardware was enabled before suspend. */ -- GitLab From 76376783a453c6893d38266cbbaed9fc0d3b7f74 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 8 Jan 2018 16:01:04 +0000 Subject: [PATCH 1341/1398] ASoC: rockchip: i2s: fix playback after runtime resume commit c66234cfedfc3e6e3b62563a5f2c1562be09a35d upstream. When restoring registers during runtime resume, we must not write to I2S_TXDR which is the transmit FIFO as this queues up a sample to be output and pushes all of the output channels down by one. This can be demonstrated with the speaker-test utility: for i in a b c; do speaker-test -c 2 -s 1; done which should play a test through the left speaker three times but if the I2S hardware starts runtime suspended the first sample will be played through the right speaker. Fix this by marking I2S_TXDR as volatile (which also requires marking it as readble, even though it technically isn't). This seems to be the most robust fix, the alternative of giving I2S_TXDR a default value is more fragile since it does not prevent regcache writing to the register in all circumstances. While here, also fix the configuration of I2S_RXDR and I2S_FIFOLR; these are not writable so they do not suffer from the same problem as I2S_TXDR but reading from I2S_RXDR does suffer from a similar problem. Fixes: f0447f6cbb20 ("ASoC: rockchip: i2s: restore register during runtime_suspend/resume cycle", 2016-09-07) Signed-off-by: John Keeping Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/rockchip/rockchip_i2s.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 974915cb4c4f..08bfee447a36 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -476,6 +476,7 @@ static bool rockchip_i2s_rd_reg(struct device *dev, unsigned int reg) case I2S_INTCR: case I2S_XFER: case I2S_CLR: + case I2S_TXDR: case I2S_RXDR: case I2S_FIFOLR: case I2S_INTSR: @@ -490,6 +491,9 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) switch (reg) { case I2S_INTSR: case I2S_CLR: + case I2S_FIFOLR: + case I2S_TXDR: + case I2S_RXDR: return true; default: return false; @@ -499,6 +503,8 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) static bool rockchip_i2s_precious_reg(struct device *dev, unsigned int reg) { switch (reg) { + case I2S_RXDR: + return true; default: return false; } -- GitLab From eb10c5973eb2bf293fdfb1421577afb8acd37555 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Jan 2018 16:38:46 +0100 Subject: [PATCH 1342/1398] ASoC: skl: Fix kernel warning due to zero NHTL entry commit 20a1ea2222e7cbf96e9bf8579362e971491e6aea upstream. I got the following kernel warning when loading snd-soc-skl module on Dell Latitude 7270 laptop: memremap attempted on mixed range 0x0000000000000000 size: 0x0 WARNING: CPU: 0 PID: 484 at kernel/memremap.c:98 memremap+0x8a/0x180 Call Trace: skl_nhlt_init+0x82/0xf0 [snd_soc_skl] skl_probe+0x2ee/0x7c0 [snd_soc_skl] .... It seems that the machine doesn't support the SKL DSP gives the empty NHLT entry, and it triggers the warning. For avoiding it, let do the zero check before calling memremap(). Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-nhlt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 3f8e6f0b7eb5..dcf03691ebc8 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -41,7 +41,8 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL); if (obj && obj->type == ACPI_TYPE_BUFFER) { nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer; - nhlt_table = (struct nhlt_acpi_table *) + if (nhlt_ptr->length) + nhlt_table = (struct nhlt_acpi_table *) memremap(nhlt_ptr->min_addr, nhlt_ptr->length, MEMREMAP_WB); ACPI_FREE(obj); -- GitLab From b7f9df60f4107d683bfda88dbc2c83039ee89ce8 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 1 Jan 2018 18:26:47 +0100 Subject: [PATCH 1343/1398] watchdog: imx2_wdt: restore previous timeout after suspend+resume commit 0be267255cef64e1c58475baa7b25568355a3816 upstream. When the watchdog device is suspended, its timeout is set to the maximum value. During resume, the previously set timeout should be restored. This does not work at the moment. The suspend function calls imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); and resume reverts this by calling imx2_wdt_set_timeout(wdog, wdog->timeout); However, imx2_wdt_set_timeout() updates wdog->timeout. Therefore, wdog->timeout is set to IMX2_WDT_MAX_TIME when we enter the resume function. Fix this by adding a new function __imx2_wdt_set_timeout() which only updates the hardware settings. imx2_wdt_set_timeout() now calls __imx2_wdt_set_timeout() and then saves the new timeout to wdog->timeout. During suspend, we call __imx2_wdt_set_timeout() directly so that wdog->timeout won't be updated and we can restore the previous value during resume. This approach makes wdog->timeout different from the actual setting in the hardware which is usually not a good thing. However, the two differ only while we're suspended and no kernel code is running, so it should be ok in this case. Signed-off-by: Martin Kaiser Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/imx2_wdt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 4874b0f18650..518dfa1047cb 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -169,15 +169,21 @@ static int imx2_wdt_ping(struct watchdog_device *wdog) return 0; } -static int imx2_wdt_set_timeout(struct watchdog_device *wdog, - unsigned int new_timeout) +static void __imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) { struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); - wdog->timeout = new_timeout; - regmap_update_bits(wdev->regmap, IMX2_WDT_WCR, IMX2_WDT_WCR_WT, WDOG_SEC_TO_COUNT(new_timeout)); +} + +static int imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) +{ + __imx2_wdt_set_timeout(wdog, new_timeout); + + wdog->timeout = new_timeout; return 0; } @@ -371,7 +377,11 @@ static int imx2_wdt_suspend(struct device *dev) /* The watchdog IP block is running */ if (imx2_wdt_is_running(wdev)) { - imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); + /* + * Don't update wdog->timeout, we'll restore the current value + * during resume. + */ + __imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); imx2_wdt_ping(wdog); } -- GitLab From d1d85ae79d5e5592dccba6890658c0999b864ddc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Nov 2017 11:55:46 -0500 Subject: [PATCH 1344/1398] media: dvb-frontends: fix i2c access helpers for KASAN commit 3cd890dbe2a4f14cc44c85bb6cf37e5e22d4dd0e upstream. A typical code fragment was copied across many dvb-frontend drivers and causes large stack frames when built with with CONFIG_KASAN on gcc-5/6/7: drivers/media/dvb-frontends/cxd2841er.c:3225:1: error: the frame size of 3992 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/cxd2841er.c:3404:1: error: the frame size of 3136 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv0367.c:3143:1: error: the frame size of 4016 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv090x.c:3430:1: error: the frame size of 5312 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv090x.c:4248:1: error: the frame size of 4872 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] gcc-8 now solves this by consolidating the stack slots for the argument variables, but on older compilers we can get the same behavior by taking the pointer of a local variable rather than the inline function argument. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/ascot2e.c | 4 +++- drivers/media/dvb-frontends/cxd2841er.c | 4 +++- drivers/media/dvb-frontends/helene.c | 4 +++- drivers/media/dvb-frontends/horus3a.c | 4 +++- drivers/media/dvb-frontends/itd1000.c | 5 +++-- drivers/media/dvb-frontends/mt312.c | 5 ++++- drivers/media/dvb-frontends/stb0899_drv.c | 3 ++- drivers/media/dvb-frontends/stb6100.c | 6 ++++-- drivers/media/dvb-frontends/stv0367.c | 4 +++- drivers/media/dvb-frontends/stv090x.c | 4 +++- drivers/media/dvb-frontends/stv6110x.c | 4 +++- drivers/media/dvb-frontends/zl10039.c | 4 +++- 12 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c index ad304eed656d..c61227cfff25 100644 --- a/drivers/media/dvb-frontends/ascot2e.c +++ b/drivers/media/dvb-frontends/ascot2e.c @@ -155,7 +155,9 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv, static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val) { - return ascot2e_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return ascot2e_write_regs(priv, reg, &tmp, 1); } static int ascot2e_read_regs(struct ascot2e_priv *priv, diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index fd0f25ee251f..b97647cd7dc6 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -261,7 +261,9 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv, static int cxd2841er_write_reg(struct cxd2841er_priv *priv, u8 addr, u8 reg, u8 val) { - return cxd2841er_write_regs(priv, addr, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return cxd2841er_write_regs(priv, addr, reg, &tmp, 1); } static int cxd2841er_read_regs(struct cxd2841er_priv *priv, diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c index dc43c5f6d0ea..e06bcd4b3ddc 100644 --- a/drivers/media/dvb-frontends/helene.c +++ b/drivers/media/dvb-frontends/helene.c @@ -331,7 +331,9 @@ static int helene_write_regs(struct helene_priv *priv, static int helene_write_reg(struct helene_priv *priv, u8 reg, u8 val) { - return helene_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return helene_write_regs(priv, reg, &tmp, 1); } static int helene_read_regs(struct helene_priv *priv, diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c index 0c089b5986a1..4ebddc895137 100644 --- a/drivers/media/dvb-frontends/horus3a.c +++ b/drivers/media/dvb-frontends/horus3a.c @@ -89,7 +89,9 @@ static int horus3a_write_regs(struct horus3a_priv *priv, static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val) { - return horus3a_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return horus3a_write_regs(priv, reg, &tmp, 1); } static int horus3a_enter_power_save(struct horus3a_priv *priv) diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c index cadcae4cff89..ac9d2591bb6f 100644 --- a/drivers/media/dvb-frontends/itd1000.c +++ b/drivers/media/dvb-frontends/itd1000.c @@ -99,8 +99,9 @@ static int itd1000_read_reg(struct itd1000_state *state, u8 reg) static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v) { - int ret = itd1000_write_regs(state, r, &v, 1); - state->shadow[r] = v; + u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + int ret = itd1000_write_regs(state, r, &tmp, 1); + state->shadow[r] = tmp; return ret; } diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c index fc08429c99b7..7824926a3744 100644 --- a/drivers/media/dvb-frontends/mt312.c +++ b/drivers/media/dvb-frontends/mt312.c @@ -142,7 +142,10 @@ static inline int mt312_readreg(struct mt312_state *state, static inline int mt312_writereg(struct mt312_state *state, const enum mt312_reg_addr reg, const u8 val) { - return mt312_write(state, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + + return mt312_write(state, reg, &tmp, 1); } static inline u32 mt312_div(u32 a, u32 b) diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c index 3d171b0e00c2..3deddbcaa8b7 100644 --- a/drivers/media/dvb-frontends/stb0899_drv.c +++ b/drivers/media/dvb-frontends/stb0899_drv.c @@ -552,7 +552,8 @@ int stb0899_write_regs(struct stb0899_state *state, unsigned int reg, u8 *data, int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data) { - return stb0899_write_regs(state, reg, &data, 1); + u8 tmp = data; + return stb0899_write_regs(state, reg, &tmp, 1); } /* diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index 5add1182c3ca..4746b1e0d637 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -226,12 +226,14 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data) { + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + if (unlikely(reg >= STB6100_NUMREGS)) { dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg); return -EREMOTEIO; } - data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set; - return stb6100_write_reg_range(state, &data, reg, 1); + tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set; + return stb6100_write_reg_range(state, &tmp, reg, 1); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index abc379aea713..94cec81d0a5c 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -804,7 +804,9 @@ int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - return stv0367_writeregs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv0367_writeregs(state, reg, &tmp, 1); } static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c index 25bdf6e0f963..f0377e2b341b 100644 --- a/drivers/media/dvb-frontends/stv090x.c +++ b/drivers/media/dvb-frontends/stv090x.c @@ -761,7 +761,9 @@ static int stv090x_write_regs(struct stv090x_state *state, unsigned int reg, u8 static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data) { - return stv090x_write_regs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv090x_write_regs(state, reg, &tmp, 1); } static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable) diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c index c611ad210b5c..924f16fee1fb 100644 --- a/drivers/media/dvb-frontends/stv6110x.c +++ b/drivers/media/dvb-frontends/stv6110x.c @@ -97,7 +97,9 @@ static int stv6110x_write_regs(struct stv6110x_state *stv6110x, int start, u8 da static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data) { - return stv6110x_write_regs(stv6110x, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv6110x_write_regs(stv6110x, reg, &tmp, 1); } static int stv6110x_init(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c index f8c271be196c..0d2bef62ff05 100644 --- a/drivers/media/dvb-frontends/zl10039.c +++ b/drivers/media/dvb-frontends/zl10039.c @@ -138,7 +138,9 @@ static inline int zl10039_writereg(struct zl10039_state *state, const enum zl10039_reg_addr reg, const u8 val) { - return zl10039_write(state, reg, &val, 1); + const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return zl10039_write(state, reg, &tmp, 1); } static int zl10039_init(struct dvb_frontend *fe) -- GitLab From b2e7c63cad18341525643716bc62e5ac4ee50485 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 10 Jan 2018 07:20:39 -0500 Subject: [PATCH 1345/1398] media: ts2020: avoid integer overflows on 32 bit machines commit 81742be14b6a90c9fd0ff6eb4218bdf696ad8e46 upstream. Before this patch, when compiled for arm32, the signal strength were reported as: Lock (0x1f) Signal= 4294908.66dBm C/N= 12.79dB Because of a 32 bit integer overflow. After it, it is properly reported as: Lock (0x1f) Signal= -58.64dBm C/N= 12.79dB Fixes: 0f91c9d6bab9 ("[media] TS2020: Calculate tuner gain correctly") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/ts2020.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c index a9f6bbea6df3..103b9c824f1f 100644 --- a/drivers/media/dvb-frontends/ts2020.c +++ b/drivers/media/dvb-frontends/ts2020.c @@ -369,7 +369,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain2 = clamp_t(long, gain2, 0, 13); v_agc = clamp_t(long, v_agc, 400, 1100); - *_gain = -(gain1 * 2330 + + *_gain = -((__s64)gain1 * 2330 + gain2 * 3500 + v_agc * 24 / 10 * 10 + 10000); @@ -387,7 +387,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain3 = clamp_t(long, gain3, 0, 6); v_agc = clamp_t(long, v_agc, 600, 1600); - *_gain = -(gain1 * 2650 + + *_gain = -((__s64)gain1 * 2650 + gain2 * 3380 + gain3 * 2850 + v_agc * 176 / 100 * 10 - -- GitLab From 1666d38f4ed6cbf2bacaad21f04d77ca64c4808a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 24 Jan 2018 06:01:57 -0500 Subject: [PATCH 1346/1398] media: cxusb, dib0700: ignore XC2028_I2C_FLUSH commit 9893b905e743ded332575ca04486bd586c0772f7 upstream. The XC2028_I2C_FLUSH only needs to be implemented on a few devices. Others can safely ignore it. That prevents filling the dmesg with lots of messages like: dib0700: stk7700ph_xc3028_callback: unknown command 2, arg 0 Fixes: 4d37ece757a8 ("[media] tuner/xc2028: Add I2C flush callback") Reported-by: Enrico Mioso Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/cxusb.c | 2 ++ drivers/media/usb/dvb-usb/dib0700_devices.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index 9fd43a37154c..b20f03d86e00 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -820,6 +820,8 @@ static int dvico_bluebird_xc2028_callback(void *ptr, int component, case XC2028_RESET_CLK: deb_info("%s: XC2028_RESET_CLK %d\n", __func__, arg); break; + case XC2028_I2C_FLUSH: + break; default: deb_info("%s: unknown command %d, arg %d\n", __func__, command, arg); diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index caa55402052e..2868766893c8 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -431,6 +431,7 @@ static int stk7700ph_xc3028_callback(void *ptr, int component, state->dib7000p_ops.set_gpio(adap->fe_adap[0].fe, 8, 0, 1); break; case XC2028_RESET_CLK: + case XC2028_I2C_FLUSH: break; default: err("%s: unknown command %d, arg %d\n", __func__, -- GitLab From da3b224658d3d8aa8ec7877fb1fe808b4b4da029 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2018 15:37:13 -0800 Subject: [PATCH 1347/1398] fs/proc/kcore.c: use probe_kernel_read() instead of memcpy() commit d0290bc20d4739b7a900ae37eb5d4cc3be2b393f upstream. Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") added a bounce buffer to avoid hardened usercopy checks. Copying to the bounce buffer was implemented with a simple memcpy() assuming that it is always valid to read from kernel memory iff the kern_addr_valid() check passed. A simple, but pointless, test case like "dd if=/proc/kcore of=/dev/null" now can easily crash the kernel, since the former execption handling on invalid kernel addresses now doesn't work anymore. Also adding a kern_addr_valid() implementation wouldn't help here. Most architectures simply return 1 here, while a couple implemented a page table walk to figure out if something is mapped at the address in question. With DEBUG_PAGEALLOC active mappings are established and removed all the time, so that relying on the result of kern_addr_valid() before executing the memcpy() also doesn't work. Therefore simply use probe_kernel_read() to copy to the bounce buffer. This also allows to simplify read_kcore(). At least on s390 this fixes the observed crashes and doesn't introduce warnings that were removed with df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data"), even though the generic probe_kernel_read() implementation uses uaccess functions. While looking into this I'm also wondering if kern_addr_valid() could be completely removed...(?) Link: http://lkml.kernel.org/r/20171202132739.99971-1-heiko.carstens@de.ibm.com Fixes: df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Heiko Carstens Acked-by: Kees Cook Cc: Jiri Olsa Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/kcore.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 5c89a07e3d7f..df7e07986ead 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -507,23 +507,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { -- GitLab From 33a4459bdef12df44f31abbf645489a39fcff321 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 6 Feb 2018 15:37:55 -0800 Subject: [PATCH 1348/1398] kernel/async.c: revert "async: simplify lowest_in_progress()" commit 4f7e988e63e336827f4150de48163bed05d653bd upstream. This reverts commit 92266d6ef60c ("async: simplify lowest_in_progress()") which was simply wrong: In the case where domain is NULL, we now use the wrong offsetof() in the list_first_entry macro, so we don't actually fetch the ->cookie value, but rather the eight bytes located sizeof(struct list_head) further into the struct async_entry. On 64 bit, that's the data member, while on 32 bit, that's a u64 built from func and data in some order. I think the bug happens to be harmless in practice: It obviously only affects callers which pass a NULL domain, and AFAICT the only such caller is async_synchronize_full() -> async_synchronize_full_domain(NULL) -> async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, NULL) and the ASYNC_COOKIE_MAX means that in practice we end up waiting for the async_global_pending list to be empty - but it would break if somebody happened to pass (void*)-1 as the data element to async_schedule, and of course also if somebody ever does a async_synchronize_cookie_domain(, NULL) with a "finite" cookie value. Maybe the "harmless in practice" means this isn't -stable material. But I'm not completely confident my quick git grep'ing is enough, and there might be affected code in one of the earlier kernels that has since been removed, so I'll leave the decision to the stable guys. Link: http://lkml.kernel.org/r/20171128104938.3921-1-linux@rasmusvillemoes.dk Fixes: 92266d6ef60c "async: simplify lowest_in_progress()" Signed-off-by: Rasmus Villemoes Acked-by: Tejun Heo Cc: Arjan van de Ven Cc: Adam Wallis Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/async.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index d2edd6efec56..d84d4860992e 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; -- GitLab From 91cebf98cd940099a25a351558e52003f53a7e25 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:40:24 -0800 Subject: [PATCH 1349/1398] kernel/relay.c: revert "kernel/relay.c: fix potential memory leak" commit a1be1f3931bfe0a42b46fef77a04593c2b136e7f upstream. This reverts commit ba62bafe942b ("kernel/relay.c: fix potential memory leak"). This commit introduced a double free bug, because 'chan' is already freed by the line: kref_put(&chan->kref, relay_destroy_channel); This bug was found by syzkaller, using the BLKTRACESETUP ioctl. Link: http://lkml.kernel.org/r/20180127004759.101823-1-ebiggers3@gmail.com Fixes: ba62bafe942b ("kernel/relay.c: fix potential memory leak") Signed-off-by: Eric Biggers Reported-by: syzbot Reviewed-by: Andrew Morton Cc: Zhouyi Zhou Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/relay.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 8f18d314a96a..2603e04f55f9 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -611,7 +611,6 @@ struct rchan *relay_open(const char *base_filename, kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); - kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); -- GitLab From a705c24b5d5012be1165f0bc43997babd9ed2614 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:53 -0800 Subject: [PATCH 1350/1398] pipe: actually allow root to exceed the pipe buffer limits commit 85c2dd5473b2718b4b63e74bfeb1ca876868e11f upstream. pipe-user-pages-hard and pipe-user-pages-soft are only supposed to apply to unprivileged users, as documented in both Documentation/sysctl/fs.txt and the pipe(7) man page. However, the capabilities are actually only checked when increasing a pipe's size using F_SETPIPE_SZ, not when creating a new pipe. Therefore, if pipe-user-pages-hard has been set, the root user can run into it and be unable to create pipes. Similarly, if pipe-user-pages-soft has been set, the root user can run into it and have their pipes limited to 1 page each. Fix this by allowing the privileged override in both cases. Link: http://lkml.kernel.org/r/20180111052902.14409-4-ebiggers3@gmail.com Fixes: 759c01142a5d ("pipe: limit the per-user amount of pages allocated in pipes") Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 9faecf1b4a27..54e803f89743 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -617,6 +617,11 @@ static bool too_many_pipe_buffers_hard(unsigned long user_bufs) return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; } +static bool is_unprivileged_user(void) +{ + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); +} + struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; @@ -633,12 +638,12 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs)) { + if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs)) + if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -1069,7 +1074,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (nr_pages > pipe->buffers && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } -- GitLab From 71baf27d8c2b3db340e07412756093474be90883 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:56 -0800 Subject: [PATCH 1351/1398] pipe: fix off-by-one error when checking buffer limits commit 9903a91c763ecdae333a04a9d89d79d2b8966503 upstream. With pipe-user-pages-hard set to 'N', users were actually only allowed up to 'N - 1' buffers; and likewise for pipe-user-pages-soft. Fix this to allow up to 'N' buffers, as would be expected. Link: http://lkml.kernel.org/r/20180111052902.14409-5-ebiggers3@gmail.com Fixes: b0b91d18e2e9 ("pipe: fix limit checking in pipe_set_size()") Signed-off-by: Eric Biggers Acked-by: Willy Tarreau Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 54e803f89743..34345535f63d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -609,12 +609,12 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; + return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; + return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard; } static bool is_unprivileged_user(void) -- GitLab From df9658e806052cb9f2b0f9bbfb2aaac5bc9e9993 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 Jan 2018 21:05:55 +0100 Subject: [PATCH 1352/1398] HID: quirks: Fix keyboard + touchpad on Toshiba Click Mini not working commit edfc3722cfef4217c7fe92b272cbe0288ba1ff57 upstream. The Toshiba Click Mini uses an i2c attached keyboard/touchpad combo (single i2c_hid device for both) which has a vid:pid of 04F3:0401, which is also used by a bunch of Elan touchpads which are handled by the drivers/input/mouse/elan_i2c driver, but that driver deals with pure touchpads and does not work for a combo device such as the one on the Toshiba Click Mini. The combo on the Mini has an ACPI id of ELAN0800, which is not claimed by the elan_i2c driver, so check for that and if it is found do not ignore the device. This fixes the keyboard/touchpad combo on the Mini not working (although with the touchpad in mouse emulation mode). Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e32862ca5223..03cac5731afc 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2365,7 +2365,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0401) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, @@ -2635,6 +2634,17 @@ bool hid_ignore(struct hid_device *hdev) strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0) return true; break; + case USB_VENDOR_ID_ELAN: + /* + * Many Elan devices have a product id of 0x0401 and are handled + * by the elan_i2c input driver. But the ACPI HID ELAN0800 dev + * is not (and cannot be) handled by that driver -> + * Ignore all 0x0401 devs except for the ELAN0800 dev. + */ + if (hdev->product == 0x0401 && + strncmp(hdev->name, "ELAN0800", 8) != 0) + return true; + break; } if (hdev->type == HID_TYPE_USBMOUSE && -- GitLab From 6913d1b190b99855b740f648ea70cede43e52eff Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 29 Nov 2017 20:29:07 +0100 Subject: [PATCH 1353/1398] Bluetooth: btsdio: Do not bind to non-removable BCM43341 commit b4cdaba274247c9c841c6a682c08fa91fb3aa549 upstream. BCM43341 devices soldered onto the PCB (non-removable) always (AFAICT) use an UART connection for bluetooth. But they also advertise btsdio support on their 3th sdio function, this causes 2 problems: 1) A non functioning BT HCI getting registered 2) Since the btsdio driver does not have suspend/resume callbacks, mmc_sdio_pre_suspend will return -ENOSYS, causing mmc_pm_notify() to react as if the SDIO-card is removed and since the slot is marked as non-removable it will never get detected as inserted again. Which results in wifi no longer working after a suspend/resume. This commit fixes both by making btsdio ignore BCM43341 devices when connected to a slot which is marked non-removable. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btsdio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 1cb958e199eb..94e914a33a99 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -291,6 +292,14 @@ static int btsdio_probe(struct sdio_func *func, tuple = tuple->next; } + /* BCM43341 devices soldered onto the PCB (non-removable) use an + * uart connection for bluetooth, ignore the BT SDIO interface. + */ + if (func->vendor == SDIO_VENDOR_ID_BROADCOM && + func->device == SDIO_DEVICE_ID_BROADCOM_43341 && + !mmc_card_is_removable(func->card->host)) + return -ENODEV; + data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; -- GitLab From 84bf682f53422a08086dd3f1b0db1dbda46fead3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Dec 2017 19:00:07 +0800 Subject: [PATCH 1354/1398] Revert "Bluetooth: btusb: fix QCA Rome suspend/resume" commit 7d06d5895c159f64c46560dc258e553ad8670fe0 upstream. This reverts commit fd865802c66bc451dc515ed89360f84376ce1a56. This commit causes a regression on some QCA ROME chips. The USB device reset happens in btusb_open(), hence firmware loading gets interrupted. Furthermore, this commit stops working after commit ("a0085f2510e8976614ad8f766b209448b385492f Bluetooth: btusb: driver to enable the usb-wakeup feature"). Reset-resume quirk only gets enabled in btusb_suspend() when it's not a wakeup source. If we really want to reset the USB device, we need to do it before btusb_open(). Let's handle it in drivers/usb/core/quirks.c. Cc: Leif Liddy Cc: Matthias Kaehlcke Cc: Brian Norris Cc: Daniel Drake Signed-off-by: Kai-Heng Feng Reviewed-by: Brian Norris Tested-by: Brian Norris Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 693028659ccc..74e677ac8e37 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2925,12 +2925,6 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; - - /* QCA Rome devices lose their updated firmware over suspend, - * but the USB hub doesn't notice any status change. - * Explicitly request a device reset on resume. - */ - set_bit(BTUSB_RESET_RESUME, &data->flags); } #ifdef CONFIG_BT_HCIBTUSB_RTL -- GitLab From 5795b076bd7f98fc80ce088f8f79bbe02e2553ca Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Jan 2018 10:44:16 +0100 Subject: [PATCH 1355/1398] Bluetooth: btusb: Restore QCA Rome suspend/resume fix with a "rewritten" version commit 61f5acea8737d9b717fcc22bb6679924f3c82b98 upstream. Commit 7d06d5895c15 ("Revert "Bluetooth: btusb: fix QCA...suspend/resume"") removed the setting of the BTUSB_RESET_RESUME quirk for QCA Rome devices, instead favoring adding USB_QUIRK_RESET_RESUME quirks in usb/core/quirks.c. This was done because the DIY BTUSB_RESET_RESUME reset-resume handling has several issues (see the original commit message). An added advantage of moving over to the USB-core reset-resume handling is that it also disables autosuspend for these devices, which is similarly broken on these. But there are 2 issues with this approach: 1) It leaves the broken DIY BTUSB_RESET_RESUME code in place for Realtek devices. 2) Sofar only 2 of the 10 QCA devices known to the btusb code have been added to usb/core/quirks.c and if we fix the Realtek case the same way we need to add an additional 14 entries. So in essence we need to duplicate a large part of the usb_device_id table in btusb.c in usb/core/quirks.c and manually keep them in sync. This commit instead restores setting a reset-resume quirk for QCA devices in the btusb.c code, avoiding the duplicate usb_device_id table problem. This commit avoids the problems with the original DIY BTUSB_RESET_RESUME code by simply setting the USB_QUIRK_RESET_RESUME quirk directly on the usb_device. This commit also moves the BTUSB_REALTEK case over to directly setting the USB_QUIRK_RESET_RESUME on the usb_device and removes the now unused BTUSB_RESET_RESUME code. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Fixes: 7d06d5895c15 ("Revert "Bluetooth: btusb: fix QCA...suspend/resume"") Cc: Leif Liddy Cc: Matthias Kaehlcke Cc: Brian Norris Cc: Daniel Drake Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 74e677ac8e37..3257647d4f74 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -369,8 +370,8 @@ static const struct usb_device_id blacklist_table[] = { #define BTUSB_FIRMWARE_LOADED 7 #define BTUSB_FIRMWARE_FAILED 8 #define BTUSB_BOOTING 9 -#define BTUSB_RESET_RESUME 10 -#define BTUSB_DIAG_RUNNING 11 +#define BTUSB_DIAG_RUNNING 10 +#define BTUSB_OOB_WAKE_ENABLED 11 struct btusb_data { struct hci_dev *hdev; @@ -2925,6 +2926,12 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; + + /* QCA Rome devices lose their updated firmware over suspend, + * but the USB hub doesn't notice any status change. + * explicitly request a device reset on resume. + */ + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #ifdef CONFIG_BT_HCIBTUSB_RTL @@ -2935,7 +2942,7 @@ static int btusb_probe(struct usb_interface *intf, * but the USB hub doesn't notice any status change. * Explicitly request a device reset on resume. */ - set_bit(BTUSB_RESET_RESUME, &data->flags); + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #endif @@ -3092,14 +3099,6 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) btusb_stop_traffic(data); usb_kill_anchored_urbs(&data->tx_anchor); - /* Optionally request a device reset on resume, but only when - * wakeups are disabled. If wakeups are enabled we assume the - * device will stay powered up throughout suspend. - */ - if (test_bit(BTUSB_RESET_RESUME, &data->flags) && - !device_may_wakeup(&data->udev->dev)) - data->udev->reset_resume = 1; - return 0; } -- GitLab From 498b8b7453a345460fa22a13f5011d43d311b60a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Aug 2017 04:16:47 -0500 Subject: [PATCH 1356/1398] signal/openrisc: Fix do_unaligned_access to send the proper signal commit 500d58300571b6602341b041f97c082a461ef994 upstream. While reviewing the signal sending on openrisc the do_unaligned_access function stood out because it is obviously wrong. A comment about an si_code set above when actually si_code is never set. Leading to a random si_code being sent to userspace in the event of an unaligned access. Looking further SIGBUS BUS_ADRALN is the proper pair of signal and si_code to send for an unaligned access. That is what other architectures do and what is required by posix. Given that do_unaligned_access is broken in a way that no one can be relying on it on openrisc fix the code to just do the right thing. Fixes: 769a8a96229e ("OpenRISC: Traps") Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: Arnd Bergmann Cc: openrisc@lists.librecores.org Acked-by: Stafford Horne Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/kernel/traps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 3d3f6062f49c..605a284922fb 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -302,12 +302,12 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) siginfo_t info; if (user_mode(regs)) { - /* Send a SIGSEGV */ - info.si_signo = SIGSEGV; + /* Send a SIGBUS */ + info.si_signo = SIGBUS; info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, current); + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); -- GitLab From 21f94109d0f17d2fb775ae0e51ef98feda2b9a47 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Jul 2017 17:30:30 -0500 Subject: [PATCH 1357/1398] signal/sh: Ensure si_signo is initialized in do_divide_error commit 0e88bb002a9b2ee8cc3cc9478ce2dc126f849696 upstream. Set si_signo. Cc: Yoshinori Sato Cc: Rich Felker Cc: Paul Mundt Cc: linux-sh@vger.kernel.org Fixes: 0983b31849bb ("sh: Wire up division and address error exceptions on SH-2A.") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index ff639342a8be..c5b997757988 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -607,7 +607,8 @@ asmlinkage void do_divide_error(unsigned long r4) break; } - force_sig_info(SIGFPE, &info, current); + info.si_signo = SIGFPE; + force_sig_info(info.si_signo, &info, current); } #endif -- GitLab From 68d18e90eeec864b38e1313e0d6d10b4e1e5a1fc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 14:01:34 -0500 Subject: [PATCH 1358/1398] alpha: fix crash if pthread_create races with signal delivery commit 21ffceda1c8b3807615c40d440d7815e0c85d366 upstream. On alpha, a process will crash if it attempts to start a thread and a signal is delivered at the same time. The crash can be reproduced with this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html The reason for the crash is this: * we call the clone syscall * we go to the function copy_process * copy process calls copy_thread_tls, it is a wrapper around copy_thread * copy_thread sets the tls pointer: childti->pcb.unique = regs->r20 * copy_thread sets regs->r20 to zero * we go back to copy_process * copy process checks "if (signal_pending(current))" and returns -ERESTARTNOINTR * the clone syscall is restarted, but this time, regs->r20 is zero, so the new thread is created with zero tls pointer * the new thread crashes in start_thread when attempting to access tls The comment in the code says that setting the register r20 is some compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug by zeroing regs->r20 only if CLONE_SETTLS is not set. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index b483156698d5..60c17b9bf04d 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp, application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork; -- GitLab From 7d22d92ca6c9cc72629eb00b576de4f0f0dc8f74 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 13:59:54 -0500 Subject: [PATCH 1359/1398] alpha: fix reboot on Avanti platform commit 55fc633c41a08ce9244ff5f528f420b16b1e04d6 upstream. We need to define NEED_SRM_SAVE_RESTORE on the Avanti, otherwise we get machine check exception when attempting to reboot the machine. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/pci_impl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 2b0ac429f5eb..412bb3c24f36 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -143,7 +143,8 @@ struct pci_iommu_arena }; #if defined(CONFIG_ALPHA_SRM) && \ - (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA)) + (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \ + defined(CONFIG_ALPHA_AVANTI)) # define NEED_SRM_SAVE_RESTORE #else # undef NEED_SRM_SAVE_RESTORE -- GitLab From 71611b37cca4982da64d1807ab54749826628805 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 14:00:32 -0500 Subject: [PATCH 1360/1398] alpha: fix formating of stack content commit 4b01abdb32fc36abe877503bfbd33019159fad71 upstream. Since version 4.9, the kernel automatically breaks printk calls into multiple newlines unless pr_cont is used. Fix the alpha stacktrace code, so that it prints stack trace in four columns, as it was initially intended. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/traps.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 74aceead06e9..32ba92cdf5f6 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -158,11 +158,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) for(i=0; i < kstack_depth_to_print; i++) { if (((long) stack & (THREAD_SIZE-1)) == 0) break; - if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + if ((i % 4) == 0) { + if (i) + pr_cont("\n"); + printk(" "); + } else { + pr_cont(" "); + } + pr_cont("%016lx", *stack++); } - printk("\n"); + pr_cont("\n"); dik_show_trace(sp); } -- GitLab From 2d4e295284a9614726cf02b3da1bb2591ec60770 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 5 Jan 2018 14:27:58 -0800 Subject: [PATCH 1361/1398] xtensa: fix futex_atomic_cmpxchg_inatomic commit ca47480921587ae30417dd234a9f79af188e3666 upstream. Return 0 if the operation was successful, not the userspace memory value. Check that userspace value equals passed oldval, not itself. Don't update *uval if the value wasn't read from userspace memory. This fixes process hang due to infinite loop in futex_lock_pi. It also fixes a bunch of glibc tests nptl/tst-mutexpi*. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/futex.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index b39531babec0..72bfc1cbc2b5 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h @@ -109,7 +109,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; - u32 prev; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; @@ -120,26 +119,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __asm__ __volatile__ ( " # futex_atomic_cmpxchg_inatomic\n" - "1: l32i %1, %3, 0\n" - " mov %0, %5\n" - " wsr %1, scompare1\n" - "2: s32c1i %0, %3, 0\n" - "3:\n" + " wsr %5, scompare1\n" + "1: s32c1i %1, %4, 0\n" + " s32i %1, %6, 0\n" + "2:\n" " .section .fixup,\"ax\"\n" " .align 4\n" - "4: .long 3b\n" - "5: l32r %1, 4b\n" - " movi %0, %6\n" + "3: .long 2b\n" + "4: l32r %1, 3b\n" + " movi %0, %7\n" " jx %1\n" " .previous\n" " .section __ex_table,\"a\"\n" - " .long 1b,5b,2b,5b\n" + " .long 1b,4b\n" " .previous\n" - : "+r" (ret), "=&r" (prev), "+m" (*uaddr) - : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT) + : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval) + : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT) : "memory"); - *uval = prev; return ret; } -- GitLab From 10ddc77ffb92152fe97341b66fcb2d735c2955be Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 13 Nov 2017 16:12:06 +0000 Subject: [PATCH 1362/1398] EDAC, octeon: Fix an uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 544e92581a2ac44607d7cc602c6b54d18656f56d upstream. Fix an uninitialized variable warning in the Octeon EDAC driver, as seen in MIPS cavium_octeon_defconfig builds since v4.14 with Codescape GNU Tools 2016.05-03: drivers/edac/octeon_edac-lmc.c In function ‘octeon_lmc_edac_poll_o2’: drivers/edac/octeon_edac-lmc.c:87:24: warning: ‘((long unsigned int*)&int_reg)[1]’ may \ be used uninitialized in this function [-Wmaybe-uninitialized] if (int_reg.s.sec_err || int_reg.s.ded_err) { ^ Iinitialise the whole int_reg variable to zero before the conditional assignments in the error injection case. Signed-off-by: James Hogan Acked-by: David Daney Cc: linux-edac Cc: linux-mips@linux-mips.org Fixes: 1bc021e81565 ("EDAC: Octeon: Add error injection support") Link: http://lkml.kernel.org/r/20171113161206.20990-1-james.hogan@mips.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/octeon_edac-lmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index cda6dab5067a..6b65a102b49d 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -79,6 +79,7 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) if (!pvt->inject) int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); else { + int_reg.u64 = 0; if (pvt->error_type == 1) int_reg.s.sec_err = 1; if (pvt->error_type == 2) -- GitLab From 86d408d10efd1d6161f456ddba59cb2a0fb5f763 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 29 Nov 2017 16:25:44 +0300 Subject: [PATCH 1363/1398] pinctrl: intel: Initialize GPIO properly when used through irqchip commit f5a26acf0162477af6ee4c11b4fb9cffe5d3e257 upstream. When a GPIO is requested using gpiod_get_* APIs the intel pinctrl driver switches the pin to GPIO mode and makes sure interrupts are routed to the GPIO hardware instead of IOAPIC. However, if the GPIO is used directly through irqchip, as is the case with many I2C-HID devices where I2C core automatically configures interrupt for the device, the pin is not initialized as GPIO. Instead we rely that the BIOS configures the pin accordingly which seems not to be the case at least in Asus X540NA SKU3 with Focaltech touchpad. When the pin is not properly configured it might result weird behaviour like interrupts suddenly stop firing completely and the touchpad stops responding to user input. Fix this by properly initializing the pin to GPIO mode also when it is used directly through irqchip. Fixes: 7981c0015af2 ("pinctrl: intel: Add Intel Sunrisepoint pin controller and GPIO support") Reported-by: Daniel Drake Reported-and-tested-by: Chris Chiu Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-intel.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index b40a074822cf..df63b7d997e8 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -368,6 +368,18 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) +{ + u32 value; + + /* Put the pad into GPIO mode */ + value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; + /* Disable SCI/SMI/NMI generation */ + value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); + value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); + writel(value, padcfg0); +} + static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) @@ -375,7 +387,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); void __iomem *padcfg0; unsigned long flags; - u32 value; raw_spin_lock_irqsave(&pctrl->lock, flags); @@ -385,13 +396,7 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, } padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); - /* Put the pad into GPIO mode */ - value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; - /* Disable SCI/SMI/NMI generation */ - value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); - value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); - writel(value, padcfg0); - + intel_gpio_set_gpio_mode(padcfg0); /* Disable TX buffer and enable RX (this will be input) */ __intel_gpio_set_direction(padcfg0, true); @@ -770,6 +775,8 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned type) raw_spin_lock_irqsave(&pctrl->lock, flags); + intel_gpio_set_gpio_mode(reg); + value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); -- GitLab From 944723bf84d3ff53d09f949d9a4a659c26b53ccf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Jan 2018 11:39:47 -0800 Subject: [PATCH 1364/1398] pktcdvd: Fix pkt_setup_dev() error path commit 5a0ec388ef0f6e33841aeb810d7fa23f049ec4cd upstream. Commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") modified add_disk() and disk_release() but did not update any of the error paths that trigger a put_disk() call after disk->queue has been assigned. That introduced the following behavior in the pktcdvd driver if pkt_new_dev() fails: Kernel BUG at 00000000e98fd882 [verbose debug info unavailable] Since disk_release() calls blk_put_queue() anyway if disk->queue != NULL, fix this by removing the blk_cleanup_queue() call from the pkt_setup_dev() error path. Fixes: commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") Signed-off-by: Bart Van Assche Cc: Tejun Heo Cc: Maciej S. Szmigiero Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90fa4ac149db..7e4ef0502796 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2779,7 +2779,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) - goto out_new_dev; + goto out_mem2; /* inherit events of the host device */ disk->events = pd->bdev->bd_disk->events; @@ -2797,8 +2797,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) mutex_unlock(&ctl_mutex); return 0; -out_new_dev: - blk_cleanup_queue(disk->queue); out_mem2: put_disk(disk); out_mem: -- GitLab From 1bb09d05a41cbfcdc5cfe6ba953780e9bc63abcc Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:50 +0100 Subject: [PATCH 1365/1398] clocksource/drivers/stm32: Fix kernel panic with multiple timers commit e0aeca3d8cbaea514eb98df1149faa918f9ec42d upstream. The current code hides a couple of bugs: - The global variable 'clock_event_ddata' is overwritten each time the init function is invoked. This is fixed with a kmemdup() instead of assigning the global variable. That prevents a memory corruption when several timers are defined in the DT. - The clockevent's event_handler is NULL if the time framework does not select the clockevent when registering it, this is fine but the init code generates in any case an interrupt leading to dereference this NULL pointer. The stm32 timer works with shadow registers, a mechanism to cache the registers. When a change is done in one buffered register, we need to artificially generate an event to force the timer to copy the content of the register to the shadowed register. The auto-reload register (ARR) is one of the shadowed register as well as the prescaler register (PSC), so in order to force the copy, we issue an event which in turn leads to an interrupt and the NULL dereference. This is fixed by inverting two lines where we clear the status register before enabling the update event interrupt. As this kernel crash is resulting from the combination of these two bugs, the fixes are grouped into a single patch. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-11-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-stm32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 1b2574c4fb97..b167cc634fae 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,7 @@ #include #include #include +#include #define TIM_CR1 0x00 #define TIM_DIER 0x0c @@ -106,6 +107,10 @@ static int __init stm32_clockevent_init(struct device_node *np) unsigned long rate, max_delta; int irq, ret, bits, prescaler = 1; + data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + clk = of_clk_get(np, 0); if (IS_ERR(clk)) { ret = PTR_ERR(clk); @@ -156,8 +161,8 @@ static int __init stm32_clockevent_init(struct device_node *np) writel_relaxed(prescaler - 1, data->base + TIM_PSC); writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); @@ -184,6 +189,7 @@ static int __init stm32_clockevent_init(struct device_node *np) err_clk_enable: clk_put(clk); err_clk_get: + kfree(data); return ret; } -- GitLab From 3f8130127c0c2439559050ff0ee2ba85af5d8693 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 6 Feb 2018 15:40:38 -0800 Subject: [PATCH 1366/1398] lib/ubsan.c: s/missaligned/misaligned/ commit b8fe1120b4ba342b4f156d24e952d6e686b20298 upstream. A vist from the spelling fairy. Cc: David Laight Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index fb0409df1bcf..1e2328fa002d 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -281,7 +281,7 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_missaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data *data, unsigned long ptr) { unsigned long flags; @@ -322,7 +322,7 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, if (!ptr) handle_null_ptr_deref(data); else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) - handle_missaligned_access(data, ptr); + handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); } -- GitLab From 3c83fe52b5c1559ea8c1da23ad2c600acc9c6ab3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:42 -0800 Subject: [PATCH 1367/1398] lib/ubsan: add type mismatch handler for new GCC/Clang commit 42440c1f9911b4b7b8ba3dc4e90c1197bc561211 upstream. UBSAN=y fails to build with new GCC/clang: arch/x86/kernel/head64.o: In function `sanitize_boot_params': arch/x86/include/asm/bootparam_utils.h:37: undefined reference to `__ubsan_handle_type_mismatch_v1' because Clang and GCC 8 slightly changed ABI for 'type mismatch' errors. Compiler now uses new __ubsan_handle_type_mismatch_v1() function with slightly modified 'struct type_mismatch_data'. Let's add new 'struct type_mismatch_data_common' which is independent from compiler's layout of 'struct type_mismatch_data'. And make __ubsan_handle_type_mismatch[_v1]() functions transform compiler-dependent type mismatch data to our internal representation. This way, we can support both old and new compilers with minimal amount of change. Link: http://lkml.kernel.org/r/20180119152853.16806-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Sodagudi Prasad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 48 ++++++++++++++++++++++++++++++++++++++---------- lib/ubsan.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 1e2328fa002d..50d1d5c25deb 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -265,14 +265,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); -static void handle_null_ptr_deref(struct type_mismatch_data *data) +static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], @@ -281,15 +281,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_misaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], @@ -299,15 +299,15 @@ static void handle_misaligned_access(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -static void handle_object_size_mismatch(struct type_mismatch_data *data, +static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); @@ -315,7 +315,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, +static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { @@ -326,8 +326,36 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, else handle_object_size_mismatch(data, ptr); } + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = data->alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} EXPORT_SYMBOL(__ubsan_handle_type_mismatch); +void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, + unsigned long ptr) +{ + + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = 1UL << data->log_alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); + void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) { unsigned long flags; diff --git a/lib/ubsan.h b/lib/ubsan.h index b2d18d4a53f5..d8b8085e5dac 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -36,6 +36,20 @@ struct type_mismatch_data { unsigned char type_check_kind; }; +struct type_mismatch_data_v1 { + struct source_location location; + struct type_descriptor *type; + unsigned char log_alignment; + unsigned char type_check_kind; +}; + +struct type_mismatch_data_common { + struct source_location *location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + struct nonnull_arg_data { struct source_location location; struct source_location attr_location; -- GitLab From 8fe7ceaf8a4ef194793365339e3e84f7fabdca97 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 5 Dec 2017 09:29:19 +0200 Subject: [PATCH 1368/1398] btrfs: Handle btrfs_set_extent_delalloc failure in fixup worker commit f3038ee3a3f1017a1cbe9907e31fa12d366c5dcb upstream. This function was introduced by 247e743cbe6e ("Btrfs: Use async helpers to deal with pages that have been improperly dirtied") and it didn't do any error handling then. This function might very well fail in ENOMEM situation, yet it's not handled, this could lead to inconsistent state. So let's handle the failure by setting the mapping error bit. Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 894d56361ea9..a8a1fb40e258 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2063,8 +2063,15 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state, 0); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + ClearPageChecked(page); set_page_dirty(page); out: -- GitLab From 3169a7c06e913d3575713753f0807554b1165996 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 29 Nov 2017 19:51:37 +0200 Subject: [PATCH 1369/1398] drm/i915: Avoid PPS HW/SW state mismatch due to rounding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5643205c6340b565a3be0fe0e7305dc4aa551c74 upstream. We store a SW state of the t11_t12 timing in 100usec units but have to program it in 100msec as required by HW. The rounding used during programming means there will be a mismatch between the SW and HW states of this value triggering a "PPS state mismatch" error. Avoid this by storing the already rounded-up value in the SW state. Note that we still calculate panel_power_cycle_delay with the finer 100usec granularity to avoid any needless waits using that version of the delay. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103903 Cc: joks Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171129175137.2889-1-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7fdc42e5aac8..74163a928cba 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5063,6 +5063,12 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, */ final->t8 = 1; final->t9 = 1; + + /* + * HW has only a 100msec granularity for t11_t12 so round it up + * accordingly. + */ + final->t11_t12 = roundup(final->t11_t12, 100 * 10); } static void -- GitLab From 623c28ee02b36a9f45780be0ded6d13ad74e2d0e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 19 Jan 2018 10:06:03 +0100 Subject: [PATCH 1370/1398] ACPI: sbshc: remove raw pointer from printk() message commit 43cdd1b716b26f6af16da4e145b6578f98798bf6 upstream. There's no need to be printing a raw kernel pointer to the kernel log at every boot. So just remove it, and change the whole message to use the correct dev_info() call at the same time. Reported-by: Wang Qize Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sbshc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 2fa8304171e0..7a3431018e0a 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -275,8 +275,8 @@ static int acpi_smbus_hc_add(struct acpi_device *device) device->driver_data = hc; acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc); - printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n", - hc->ec, hc->offset, hc->query_bit); + dev_info(&device->dev, "SBS HC: offset = 0x%0x, query_bit = 0x%0x\n", + hc->offset, hc->query_bit); return 0; } -- GitLab From a468a3749bb5630b8744fe2c1e41ed86f2a27f79 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 2 Feb 2018 14:00:36 -0700 Subject: [PATCH 1371/1398] acpi, nfit: fix register dimm error handling commit 23fbd7c70aec7600e3227eb24259fc55bf6e4881 upstream. A NULL pointer reference kernel bug was observed when acpi_nfit_add_dimm() called in acpi_nfit_register_dimms() failed. This error path does not set nfit_mem->nvdimm, but the 2nd list_for_each_entry() loop in the function assumes it's always set. Add a check to nfit_mem->nvdimm. Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support") Signed-off-by: Toshi Kani Cc: "Rafael J. Wysocki" Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fe03d00de22b..b1815b20a99c 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1535,6 +1535,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) struct kernfs_node *nfit_kernfs; nvdimm = nfit_mem->nvdimm; + if (!nvdimm) + continue; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, -- GitLab From 38e3bc59e0dd2885b223dc5466304af3eba1dd4b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Nov 2017 09:39:46 +0200 Subject: [PATCH 1372/1398] ovl: fix failure to fsync lower dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d796e77f1dd541fe34481af2eee6454688d13982 upstream. As a writable mount, it is not expected for overlayfs to return EINVAL/EROFS for fsync, even if dir/file is not changed. This commit fixes the case of fsync of directory, which is easier to address, because overlayfs already implements fsync file operation for directories. The problem reported by Raphael is that new PostgreSQL 10.0 with a database in overlayfs where lower layer in squashfs fails to start. The failure is due to fsync error, when PostgreSQL does fsync on all existing db directories on startup and a specific directory exists lower layer with no changes. Reported-by: Raphael Hertzog Signed-off-by: Amir Goldstein Tested-by: Raphaël Hertzog Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/readdir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index f241b4ee3d8a..a1be6baabca3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -434,10 +434,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = lockless_dereference(od->upperfile); -- GitLab From df113487f844b9ba36cd87ed6c68d006b3f2e519 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Aug 2017 05:02:38 -0500 Subject: [PATCH 1373/1398] mn10300/misalignment: Use SIGSEGV SEGV_MAPERR to report a failed user copy commit 6ac1dc736b323011a55ecd1fc5897c24c4f77cbd upstream. Setting si_code to 0 is the same a setting si_code to SI_USER which is definitely not correct. With si_code set to SI_USER si_pid and si_uid will be copied to userspace instead of si_addr. Which is very wrong. So fix this by using a sensible si_code (SEGV_MAPERR) for this failure. Fixes: b920de1b77b7 ("mn10300: add the MN10300/AM33 architecture to the kernel") Cc: David Howells Cc: Masakazu Urade Cc: Koichi Yasutake Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/mn10300/mm/misalignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index b9920b1edd5a..70cef54dc40f 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -437,7 +437,7 @@ asmlinkage void misalignment(struct pt_regs *regs, enum exception_code code) info.si_signo = SIGSEGV; info.si_errno = 0; - info.si_code = 0; + info.si_code = SEGV_MAPERR; info.si_addr = (void *) regs->pc; force_sig_info(SIGSEGV, &info, current); return; -- GitLab From 2de1085e8deb0339effd860c2929848301d20d3d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Feb 2018 22:05:31 -0500 Subject: [PATCH 1374/1398] ftrace: Remove incorrect setting of glob search field commit 7b6586562708d2b3a04fe49f217ddbadbbbb0546 upstream. __unregister_ftrace_function_probe() will incorrectly parse the glob filter because it resets the search variable that was setup by filter_parse_regex(). Al Viro reported this: After that call of filter_parse_regex() we could have func_g.search not equal to glob only if glob started with '!' or '*'. In the former case we would've buggered off with -EINVAL (not = 1). In the latter we would've set func_g.search equal to glob + 1, calculated the length of that thing in func_g.len and proceeded to reset func_g.search back to glob. Suppose the glob is e.g. *foo*. We end up with func_g.type = MATCH_MIDDLE_ONLY; func_g.len = 3; func_g.search = "*foo"; Feeding that to ftrace_match_record() will not do anything sane - we will be looking for names containing "*foo" (->len is ignored for that one). Link: http://lkml.kernel.org/r/20180127031706.GE13338@ZenIV.linux.org.uk Fixes: 3ba009297149f ("ftrace: Introduce ftrace_glob structure") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Masami Hiramatsu Reported-by: Al Viro Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b8d7189e147..2884fe01cb54 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3911,7 +3911,6 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, func_g.type = filter_parse_regex(glob, strlen(glob), &func_g.search, ¬); func_g.len = strlen(func_g.search); - func_g.search = glob; /* we do not support '!' for function probes */ if (WARN_ON(not)) -- GitLab From 3e598a7089eef1fe04d5b87cc154295302960e62 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 17 Feb 2018 13:21:21 +0100 Subject: [PATCH 1375/1398] Linux 4.9.82 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4d5753f1c37b..d338530540e0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 525fb3cac2e1923c009c401ece114e219d755b97 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 23 Dec 2016 13:13:27 +0100 Subject: [PATCH 1376/1398] BACKPORT: tee: add tee_param_is_memref() for driver use Change-Id: I917f754955a9d89d3216cb551488f28d8ed25cb2 Reviewed-by: Etienne Carriere Signed-off-by: Jens Wiklander (cherry picked from commit 84debcc53533f162bf11f24e6a503d227c175cbe) Signed-off-by: Victor Chong --- drivers/tee/tee_core.c | 16 ++-------------- include/linux/tee_drv.h | 12 ++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 58a5009eacc3..c78104589e42 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -221,18 +221,6 @@ static int params_to_user(struct tee_ioctl_param __user *uparams, return 0; } -static bool param_is_memref(struct tee_param *param) -{ - switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { - case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: - case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: - case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: - return true; - default: - return false; - } -} - static int tee_ioctl_open_session(struct tee_context *ctx, struct tee_ioctl_buf_data __user *ubuf) { @@ -296,7 +284,7 @@ static int tee_ioctl_open_session(struct tee_context *ctx, if (params) { /* Decrease ref count for all valid shared memory pointers */ for (n = 0; n < arg.num_params; n++) - if (param_is_memref(params + n) && + if (tee_param_is_memref(params + n) && params[n].u.memref.shm) tee_shm_put(params[n].u.memref.shm); kfree(params); @@ -358,7 +346,7 @@ static int tee_ioctl_invoke(struct tee_context *ctx, if (params) { /* Decrease ref count for all valid shared memory pointers */ for (n = 0; n < arg.num_params; n++) - if (param_is_memref(params + n) && + if (tee_param_is_memref(params + n) && params[n].u.memref.shm) tee_shm_put(params[n].u.memref.shm); kfree(params); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index cb889afe576b..f4a0ac05ebb4 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -275,4 +275,16 @@ int tee_shm_get_id(struct tee_shm *shm); */ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); +static inline bool tee_param_is_memref(struct tee_param *param) +{ + switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + return true; + default: + return false; + } +} + #endif /*__TEE_DRV_H*/ -- GitLab From 67f68799f4bd4a6d69ff1ff73bfa7cb967d0dad4 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 23 Dec 2016 13:13:34 +0100 Subject: [PATCH 1377/1398] BACKPORT: tee: add TEE_IOCTL_PARAM_ATTR_META Adds TEE_IOCTL_PARAM_ATTR_META which can be used to indicate meta parameters when communicating with user space. These meta parameters can be used by supplicant support multiple parallel requests at a time. Change-Id: Ie866c6119b8125c08b892b7a1a1929832b2f162c Reviewed-by: Etienne Carriere Signed-off-by: Jens Wiklander (cherry picked from commit f2aa97240c84b8f258710e297ba60048bd9c153e) Signed-off-by: Victor Chong --- drivers/tee/optee/supp.c | 25 +++++++++++++++++++++++++ drivers/tee/tee_core.c | 16 ++++++++++------ include/uapi/linux/tee.h | 7 +++++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index b4ea0678a436..56aa8b929b8c 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -119,6 +119,27 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, return ret; } +static int supp_check_recv_params(size_t num_params, struct tee_param *params) +{ + size_t n; + + /* + * If there's memrefs we need to decrease those as they where + * increased earlier and we'll even refuse to accept any below. + */ + for (n = 0; n < num_params; n++) + if (tee_param_is_memref(params + n) && params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + + /* + * We only expect parameters as TEE_IOCTL_PARAM_ATTR_TYPE_NONE (0). + */ + for (n = 0; n < num_params; n++) + if (params[n].attr) + return -EINVAL; + return 0; +} + /** * optee_supp_recv() - receive request for supplicant * @ctx: context receiving the request @@ -137,6 +158,10 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, struct optee_supp *supp = &optee->supp; int rc; + rc = supp_check_recv_params(*num_params, param); + if (rc) + return rc; + /* * In case two threads in one supplicant is calling this function * simultaneously we need to protect the data with a mutex which diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index c78104589e42..4d0ce606f0fc 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -152,11 +152,11 @@ static int params_from_user(struct tee_context *ctx, struct tee_param *params, return -EFAULT; /* All unused attribute bits has to be zero */ - if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK) + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_MASK) return -EINVAL; params[n].attr = ip.attr; - switch (ip.attr) { + switch (ip.attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { case TEE_IOCTL_PARAM_ATTR_TYPE_NONE: case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: break; @@ -394,8 +394,8 @@ static int params_to_supp(struct tee_context *ctx, struct tee_ioctl_param ip; struct tee_param *p = params + n; - ip.attr = p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK; - switch (p->attr) { + ip.attr = p->attr; + switch (p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: ip.a = p->u.value.a; @@ -459,6 +459,10 @@ static int tee_ioctl_supp_recv(struct tee_context *ctx, if (!params) return -ENOMEM; + rc = params_from_user(ctx, params, num_params, uarg->params); + if (rc) + goto out; + rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params); if (rc) goto out; @@ -488,11 +492,11 @@ static int params_from_supp(struct tee_param *params, size_t num_params, return -EFAULT; /* All unused attribute bits has to be zero */ - if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK) + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_MASK) return -EINVAL; p->attr = ip.attr; - switch (ip.attr) { + switch (ip.attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: /* Only out and in/out values can be updated */ diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 688782e90140..267c12e7fd79 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -154,6 +154,13 @@ struct tee_ioctl_buf_data { */ #define TEE_IOCTL_PARAM_ATTR_TYPE_MASK 0xff +/* Meta parameter carrying extra information about the message. */ +#define TEE_IOCTL_PARAM_ATTR_META 0x100 + +/* Mask of all known attr bits */ +#define TEE_IOCTL_PARAM_ATTR_MASK \ + (TEE_IOCTL_PARAM_ATTR_TYPE_MASK | TEE_IOCTL_PARAM_ATTR_META) + /* * Matches TEEC_LOGIN_* in GP TEE Client API * Are only defined for GP compliant TEEs -- GitLab From ad675fa04ac59b6563808c970d46ff7281a300ef Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 23 Dec 2016 13:13:39 +0100 Subject: [PATCH 1378/1398] BACKPORT: optee: support asynchronous supplicant requests Adds support for asynchronous supplicant requests, meaning that the supplicant can process several requests in parallel or block in a request for some time. Change-Id: Ifa77c329d33f6884fc7a64b3594be2b33def492c Acked-by: Etienne Carriere Tested-by: Etienne Carriere (b2260 pager=y/n) Signed-off-by: Jens Wiklander (cherry picked from commit 1647a5ac175490d7dac2e74532e85b6197fc74e9) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 11 +- drivers/tee/optee/optee_private.h | 43 ++-- drivers/tee/optee/rpc.c | 4 +- drivers/tee/optee/supp.c | 358 ++++++++++++++++++------------ 4 files changed, 243 insertions(+), 173 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index edb6e4e9ef3a..d0dd09219795 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -187,12 +187,12 @@ static int optee_open(struct tee_context *ctx) if (teedev == optee->supp_teedev) { bool busy = true; - mutex_lock(&optee->supp.ctx_mutex); + mutex_lock(&optee->supp.mutex); if (!optee->supp.ctx) { busy = false; optee->supp.ctx = ctx; } - mutex_unlock(&optee->supp.ctx_mutex); + mutex_unlock(&optee->supp.mutex); if (busy) { kfree(ctxdata); return -EBUSY; @@ -252,11 +252,8 @@ static void optee_release(struct tee_context *ctx) ctx->data = NULL; - if (teedev == optee->supp_teedev) { - mutex_lock(&optee->supp.ctx_mutex); - optee->supp.ctx = NULL; - mutex_unlock(&optee->supp.ctx_mutex); - } + if (teedev == optee->supp_teedev) + optee_supp_release(&optee->supp); } static const struct tee_driver_ops optee_ops = { diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index c374cd594314..3e7da187acbe 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -53,36 +53,24 @@ struct optee_wait_queue { * @ctx the context of current connected supplicant. * if !NULL the supplicant device is available for use, * else busy - * @ctx_mutex: held while accessing @ctx - * @func: supplicant function id to call - * @ret: call return value - * @num_params: number of elements in @param - * @param: parameters for @func - * @req_posted: if true, a request has been posted to the supplicant - * @supp_next_send: if true, next step is for supplicant to send response - * @thrd_mutex: held by the thread doing a request to supplicant - * @supp_mutex: held by supplicant while operating on this struct - * @data_to_supp: supplicant is waiting on this for next request - * @data_from_supp: requesting thread is waiting on this to get the result + * @mutex: held while accessing content of this struct + * @req_id: current request id if supplicant is doing synchronous + * communication, else -1 + * @reqs: queued request not yet retrieved by supplicant + * @idr: IDR holding all requests currently being processed + * by supplicant + * @reqs_c: completion used by supplicant when waiting for a + * request to be queued. */ struct optee_supp { + /* Serializes access to this struct */ + struct mutex mutex; struct tee_context *ctx; - /* Serializes access of ctx */ - struct mutex ctx_mutex; - - u32 func; - u32 ret; - size_t num_params; - struct tee_param *param; - - bool req_posted; - bool supp_next_send; - /* Serializes access to this struct for requesting thread */ - struct mutex thrd_mutex; - /* Serializes access to this struct for supplicant threads */ - struct mutex supp_mutex; - struct completion data_to_supp; - struct completion data_from_supp; + + int req_id; + struct list_head reqs; + struct idr idr; + struct completion reqs_c; }; /** @@ -142,6 +130,7 @@ int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len); int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len); void optee_supp_init(struct optee_supp *supp); void optee_supp_uninit(struct optee_supp *supp); +void optee_supp_release(struct optee_supp *supp); int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, struct tee_param *param); diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index cef417f4f4d2..c6df4317ca9f 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -192,10 +192,10 @@ static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz) if (ret) return ERR_PTR(-ENOMEM); - mutex_lock(&optee->supp.ctx_mutex); + mutex_lock(&optee->supp.mutex); /* Increases count as secure world doesn't have a reference */ shm = tee_shm_get_from_id(optee->supp.ctx, param.u.value.c); - mutex_unlock(&optee->supp.ctx_mutex); + mutex_unlock(&optee->supp.mutex); return shm; } diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index 56aa8b929b8c..df35fc01fd3e 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -16,21 +16,61 @@ #include #include "optee_private.h" +struct optee_supp_req { + struct list_head link; + + bool busy; + u32 func; + u32 ret; + size_t num_params; + struct tee_param *param; + + struct completion c; +}; + void optee_supp_init(struct optee_supp *supp) { memset(supp, 0, sizeof(*supp)); - mutex_init(&supp->ctx_mutex); - mutex_init(&supp->thrd_mutex); - mutex_init(&supp->supp_mutex); - init_completion(&supp->data_to_supp); - init_completion(&supp->data_from_supp); + mutex_init(&supp->mutex); + init_completion(&supp->reqs_c); + idr_init(&supp->idr); + INIT_LIST_HEAD(&supp->reqs); + supp->req_id = -1; } void optee_supp_uninit(struct optee_supp *supp) { - mutex_destroy(&supp->ctx_mutex); - mutex_destroy(&supp->thrd_mutex); - mutex_destroy(&supp->supp_mutex); + mutex_destroy(&supp->mutex); + idr_destroy(&supp->idr); +} + +void optee_supp_release(struct optee_supp *supp) +{ + int id; + struct optee_supp_req *req; + struct optee_supp_req *req_tmp; + + mutex_lock(&supp->mutex); + + /* Abort all request retrieved by supplicant */ + idr_for_each_entry(&supp->idr, req, id) { + req->busy = false; + idr_remove(&supp->idr, id); + req->ret = TEEC_ERROR_COMMUNICATION; + complete(&req->c); + } + + /* Abort all queued requests */ + list_for_each_entry_safe(req, req_tmp, &supp->reqs, link) { + list_del(&req->link); + req->ret = TEEC_ERROR_COMMUNICATION; + complete(&req->c); + } + + supp->ctx = NULL; + supp->req_id = -1; + + mutex_unlock(&supp->mutex); } /** @@ -44,53 +84,42 @@ void optee_supp_uninit(struct optee_supp *supp) */ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, struct tee_param *param) + { - bool interruptable; struct optee *optee = tee_get_drvdata(ctx->teedev); struct optee_supp *supp = &optee->supp; + struct optee_supp_req *req = kzalloc(sizeof(*req), GFP_KERNEL); + bool interruptable; u32 ret; - /* - * Other threads blocks here until we've copied our answer from - * supplicant. - */ - while (mutex_lock_interruptible(&supp->thrd_mutex)) { - /* See comment below on when the RPC can be interrupted. */ - mutex_lock(&supp->ctx_mutex); - interruptable = !supp->ctx; - mutex_unlock(&supp->ctx_mutex); - if (interruptable) - return TEEC_ERROR_COMMUNICATION; - } + if (!req) + return TEEC_ERROR_OUT_OF_MEMORY; - /* - * We have exclusive access now since the supplicant at this - * point is either doing a - * wait_for_completion_interruptible(&supp->data_to_supp) or is in - * userspace still about to do the ioctl() to enter - * optee_supp_recv() below. - */ + init_completion(&req->c); + req->func = func; + req->num_params = num_params; + req->param = param; - supp->func = func; - supp->num_params = num_params; - supp->param = param; - supp->req_posted = true; + /* Insert the request in the request list */ + mutex_lock(&supp->mutex); + list_add_tail(&req->link, &supp->reqs); + mutex_unlock(&supp->mutex); - /* Let supplicant get the data */ - complete(&supp->data_to_supp); + /* Tell an eventual waiter there's a new request */ + complete(&supp->reqs_c); /* * Wait for supplicant to process and return result, once we've - * returned from wait_for_completion(data_from_supp) we have + * returned from wait_for_completion(&req->c) successfully we have * exclusive access again. */ - while (wait_for_completion_interruptible(&supp->data_from_supp)) { - mutex_lock(&supp->ctx_mutex); + while (wait_for_completion_interruptible(&req->c)) { + mutex_lock(&supp->mutex); interruptable = !supp->ctx; if (interruptable) { /* * There's no supplicant available and since the - * supp->ctx_mutex currently is held none can + * supp->mutex currently is held none can * become available until the mutex released * again. * @@ -101,28 +130,65 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, * will serve all requests in a timely manner and * interrupting then wouldn't make sense. */ - supp->ret = TEEC_ERROR_COMMUNICATION; - init_completion(&supp->data_to_supp); + interruptable = !req->busy; + if (!req->busy) + list_del(&req->link); } - mutex_unlock(&supp->ctx_mutex); - if (interruptable) + mutex_unlock(&supp->mutex); + + if (interruptable) { + req->ret = TEEC_ERROR_COMMUNICATION; break; + } } - ret = supp->ret; - supp->param = NULL; - supp->req_posted = false; - - /* We're done, let someone else talk to the supplicant now. */ - mutex_unlock(&supp->thrd_mutex); + ret = req->ret; + kfree(req); return ret; } -static int supp_check_recv_params(size_t num_params, struct tee_param *params) +static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp, + int num_params, int *id) +{ + struct optee_supp_req *req; + + if (supp->req_id != -1) { + /* + * Supplicant should not mix synchronous and asnynchronous + * requests. + */ + return ERR_PTR(-EINVAL); + } + + if (list_empty(&supp->reqs)) + return NULL; + + req = list_first_entry(&supp->reqs, struct optee_supp_req, link); + + if (num_params < req->num_params) { + /* Not enough room for parameters */ + return ERR_PTR(-EINVAL); + } + + *id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL); + if (*id < 0) + return ERR_PTR(-ENOMEM); + + list_del(&req->link); + req->busy = true; + + return req; +} + +static int supp_check_recv_params(size_t num_params, struct tee_param *params, + size_t *num_meta) { size_t n; + if (!num_params) + return -EINVAL; + /* * If there's memrefs we need to decrease those as they where * increased earlier and we'll even refuse to accept any below. @@ -132,11 +198,20 @@ static int supp_check_recv_params(size_t num_params, struct tee_param *params) tee_shm_put(params[n].u.memref.shm); /* - * We only expect parameters as TEE_IOCTL_PARAM_ATTR_TYPE_NONE (0). + * We only expect parameters as TEE_IOCTL_PARAM_ATTR_TYPE_NONE with + * or without the TEE_IOCTL_PARAM_ATTR_META bit set. */ for (n = 0; n < num_params; n++) - if (params[n].attr) + if (params[n].attr && + params[n].attr != TEE_IOCTL_PARAM_ATTR_META) return -EINVAL; + + /* At most we'll need one meta parameter so no need to check for more */ + if (params->attr == TEE_IOCTL_PARAM_ATTR_META) + *num_meta = 1; + else + *num_meta = 0; + return 0; } @@ -156,69 +231,99 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, struct tee_device *teedev = ctx->teedev; struct optee *optee = tee_get_drvdata(teedev); struct optee_supp *supp = &optee->supp; + struct optee_supp_req *req = NULL; + int id; + size_t num_meta; int rc; - rc = supp_check_recv_params(*num_params, param); + rc = supp_check_recv_params(*num_params, param, &num_meta); if (rc) return rc; - /* - * In case two threads in one supplicant is calling this function - * simultaneously we need to protect the data with a mutex which - * we'll release before returning. - */ - mutex_lock(&supp->supp_mutex); + while (true) { + mutex_lock(&supp->mutex); + req = supp_pop_entry(supp, *num_params - num_meta, &id); + mutex_unlock(&supp->mutex); + + if (req) { + if (IS_ERR(req)) + return PTR_ERR(req); + break; + } - if (supp->supp_next_send) { /* - * optee_supp_recv() has been called again without - * a optee_supp_send() in between. Supplicant has - * probably been restarted before it was able to - * write back last result. Abort last request and - * wait for a new. + * If we didn't get a request we'll block in + * wait_for_completion() to avoid needless spinning. + * + * This is where supplicant will be hanging most of + * the time, let's make this interruptable so we + * can easily restart supplicant if needed. */ - if (supp->req_posted) { - supp->ret = TEEC_ERROR_COMMUNICATION; - supp->supp_next_send = false; - complete(&supp->data_from_supp); - } + if (wait_for_completion_interruptible(&supp->reqs_c)) + return -ERESTARTSYS; } - /* - * This is where supplicant will be hanging most of the - * time, let's make this interruptable so we can easily - * restart supplicant if needed. - */ - if (wait_for_completion_interruptible(&supp->data_to_supp)) { - rc = -ERESTARTSYS; - goto out; + if (num_meta) { + /* + * tee-supplicant support meta parameters -> requsts can be + * processed asynchronously. + */ + param->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT | + TEE_IOCTL_PARAM_ATTR_META; + param->u.value.a = id; + param->u.value.b = 0; + param->u.value.c = 0; + } else { + mutex_lock(&supp->mutex); + supp->req_id = id; + mutex_unlock(&supp->mutex); } - /* We have exlusive access to the data */ + *func = req->func; + *num_params = req->num_params + num_meta; + memcpy(param + num_meta, req->param, + sizeof(struct tee_param) * req->num_params); - if (*num_params < supp->num_params) { - /* - * Not enough room for parameters, tell supplicant - * it failed and abort last request. - */ - supp->ret = TEEC_ERROR_COMMUNICATION; - rc = -EINVAL; - complete(&supp->data_from_supp); - goto out; + return 0; +} + +static struct optee_supp_req *supp_pop_req(struct optee_supp *supp, + size_t num_params, + struct tee_param *param, + size_t *num_meta) +{ + struct optee_supp_req *req; + int id; + size_t nm; + const u32 attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT | + TEE_IOCTL_PARAM_ATTR_META; + + if (!num_params) + return ERR_PTR(-EINVAL); + + if (supp->req_id == -1) { + if (param->attr != attr) + return ERR_PTR(-EINVAL); + id = param->u.value.a; + nm = 1; + } else { + id = supp->req_id; + nm = 0; } - *func = supp->func; - *num_params = supp->num_params; - memcpy(param, supp->param, - sizeof(struct tee_param) * supp->num_params); + req = idr_find(&supp->idr, id); + if (!req) + return ERR_PTR(-ENOENT); + + if ((num_params - nm) != req->num_params) + return ERR_PTR(-EINVAL); - /* Allow optee_supp_send() below to do its work */ - supp->supp_next_send = true; + req->busy = false; + idr_remove(&supp->idr, id); + supp->req_id = -1; + *num_meta = nm; - rc = 0; -out: - mutex_unlock(&supp->supp_mutex); - return rc; + return req; } /** @@ -236,63 +341,42 @@ int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, struct tee_device *teedev = ctx->teedev; struct optee *optee = tee_get_drvdata(teedev); struct optee_supp *supp = &optee->supp; + struct optee_supp_req *req; size_t n; - int rc = 0; + size_t num_meta; - /* - * We still have exclusive access to the data since that's how we - * left it when returning from optee_supp_read(). - */ + mutex_lock(&supp->mutex); + req = supp_pop_req(supp, num_params, param, &num_meta); + mutex_unlock(&supp->mutex); - /* See comment on mutex in optee_supp_read() above */ - mutex_lock(&supp->supp_mutex); - - if (!supp->supp_next_send) { - /* - * Something strange is going on, supplicant shouldn't - * enter optee_supp_send() in this state - */ - rc = -ENOENT; - goto out; - } - - if (num_params != supp->num_params) { - /* - * Something is wrong, let supplicant restart. Next call to - * optee_supp_recv() will give an error to the requesting - * thread and release it. - */ - rc = -EINVAL; - goto out; + if (IS_ERR(req)) { + /* Something is wrong, let supplicant restart. */ + return PTR_ERR(req); } /* Update out and in/out parameters */ - for (n = 0; n < num_params; n++) { - struct tee_param *p = supp->param + n; + for (n = 0; n < req->num_params; n++) { + struct tee_param *p = req->param + n; - switch (p->attr) { + switch (p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: - p->u.value.a = param[n].u.value.a; - p->u.value.b = param[n].u.value.b; - p->u.value.c = param[n].u.value.c; + p->u.value.a = param[n + num_meta].u.value.a; + p->u.value.b = param[n + num_meta].u.value.b; + p->u.value.c = param[n + num_meta].u.value.c; break; case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: - p->u.memref.size = param[n].u.memref.size; + p->u.memref.size = param[n + num_meta].u.memref.size; break; default: break; } } - supp->ret = ret; - - /* Allow optee_supp_recv() above to do its work */ - supp->supp_next_send = false; + req->ret = ret; /* Let the requesting thread continue */ - complete(&supp->data_from_supp); -out: - mutex_unlock(&supp->supp_mutex); - return rc; + complete(&req->c); + + return 0; } -- GitLab From 2d46c7f26b0549dcd9eaf45be5517757d0cb58ff Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 29 Nov 2017 14:48:25 +0200 Subject: [PATCH 1379/1398] BACKPORT: tee: flexible shared memory pool creation Makes creation of shm pools more flexible by adding new more primitive functions to allocate a shm pool. This makes it easier to add driver specific shm pool management. Change-Id: I9ab51718083e0b86a47b9c56f32c57d69b43e30d Signed-off-by: Jens Wiklander Signed-off-by: Volodymyr Babchuk (cherry picked from commit e2aca5d8928acb9cc9a87802b02102d4f9b9b596) Signed-off-by: Victor Chong --- drivers/tee/tee_private.h | 57 +------------ drivers/tee/tee_shm.c | 8 +- drivers/tee/tee_shm_pool.c | 165 +++++++++++++++++++++++-------------- include/linux/tee_drv.h | 91 ++++++++++++++++++++ 4 files changed, 199 insertions(+), 122 deletions(-) diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h index 21cb6be8bce9..2bc2b5ab1661 100644 --- a/drivers/tee/tee_private.h +++ b/drivers/tee/tee_private.h @@ -21,68 +21,15 @@ #include #include -struct tee_device; - -/** - * struct tee_shm - shared memory object - * @teedev: device used to allocate the object - * @ctx: context using the object, if NULL the context is gone - * @link link element - * @paddr: physical address of the shared memory - * @kaddr: virtual address of the shared memory - * @size: size of shared memory - * @dmabuf: dmabuf used to for exporting to user space - * @flags: defined by TEE_SHM_* in tee_drv.h - * @id: unique id of a shared memory object on this device - */ -struct tee_shm { - struct tee_device *teedev; - struct tee_context *ctx; - struct list_head link; - phys_addr_t paddr; - void *kaddr; - size_t size; - struct dma_buf *dmabuf; - u32 flags; - int id; -}; - -struct tee_shm_pool_mgr; - -/** - * struct tee_shm_pool_mgr_ops - shared memory pool manager operations - * @alloc: called when allocating shared memory - * @free: called when freeing shared memory - */ -struct tee_shm_pool_mgr_ops { - int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, - size_t size); - void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); -}; - -/** - * struct tee_shm_pool_mgr - shared memory manager - * @ops: operations - * @private_data: private data for the shared memory manager - */ -struct tee_shm_pool_mgr { - const struct tee_shm_pool_mgr_ops *ops; - void *private_data; -}; - /** * struct tee_shm_pool - shared memory pool * @private_mgr: pool manager for shared memory only between kernel * and secure world * @dma_buf_mgr: pool manager for shared memory exported to user space - * @destroy: called when destroying the pool - * @private_data: private data for the pool */ struct tee_shm_pool { - struct tee_shm_pool_mgr private_mgr; - struct tee_shm_pool_mgr dma_buf_mgr; - void (*destroy)(struct tee_shm_pool *pool); - void *private_data; + struct tee_shm_pool_mgr *private_mgr; + struct tee_shm_pool_mgr *dma_buf_mgr; }; #define TEE_DEVICE_FLAG_REGISTERED 0x1 diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 37207ae2de7b..a3172bc5be69 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -32,9 +32,9 @@ static void tee_shm_release(struct tee_shm *shm) mutex_unlock(&teedev->mutex); if (shm->flags & TEE_SHM_DMA_BUF) - poolm = &teedev->pool->dma_buf_mgr; + poolm = teedev->pool->dma_buf_mgr; else - poolm = &teedev->pool->private_mgr; + poolm = teedev->pool->private_mgr; poolm->ops->free(poolm, shm); kfree(shm); @@ -139,9 +139,9 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) shm->teedev = teedev; shm->ctx = ctx; if (flags & TEE_SHM_DMA_BUF) - poolm = &teedev->pool->dma_buf_mgr; + poolm = teedev->pool->dma_buf_mgr; else - poolm = &teedev->pool->private_mgr; + poolm = teedev->pool->private_mgr; rc = poolm->ops->alloc(poolm, shm, size); if (rc) { diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c index fb4f8522a526..e6d4b9e4a864 100644 --- a/drivers/tee/tee_shm_pool.c +++ b/drivers/tee/tee_shm_pool.c @@ -44,49 +44,18 @@ static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm, shm->kaddr = NULL; } +static void pool_op_gen_destroy_poolmgr(struct tee_shm_pool_mgr *poolm) +{ + gen_pool_destroy(poolm->private_data); + kfree(poolm); +} + static const struct tee_shm_pool_mgr_ops pool_ops_generic = { .alloc = pool_op_gen_alloc, .free = pool_op_gen_free, + .destroy_poolmgr = pool_op_gen_destroy_poolmgr, }; -static void pool_res_mem_destroy(struct tee_shm_pool *pool) -{ - gen_pool_destroy(pool->private_mgr.private_data); - gen_pool_destroy(pool->dma_buf_mgr.private_data); -} - -static int pool_res_mem_mgr_init(struct tee_shm_pool_mgr *mgr, - struct tee_shm_pool_mem_info *info, - int min_alloc_order) -{ - size_t page_mask = PAGE_SIZE - 1; - struct gen_pool *genpool = NULL; - int rc; - - /* - * Start and end must be page aligned - */ - if ((info->vaddr & page_mask) || (info->paddr & page_mask) || - (info->size & page_mask)) - return -EINVAL; - - genpool = gen_pool_create(min_alloc_order, -1); - if (!genpool) - return -ENOMEM; - - gen_pool_set_algo(genpool, gen_pool_best_fit, NULL); - rc = gen_pool_add_virt(genpool, info->vaddr, info->paddr, info->size, - -1); - if (rc) { - gen_pool_destroy(genpool); - return rc; - } - - mgr->private_data = genpool; - mgr->ops = &pool_ops_generic; - return 0; -} - /** * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved * memory range @@ -104,42 +73,109 @@ struct tee_shm_pool * tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, struct tee_shm_pool_mem_info *dmabuf_info) { - struct tee_shm_pool *pool = NULL; - int ret; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); - if (!pool) { - ret = -ENOMEM; - goto err; - } + struct tee_shm_pool_mgr *priv_mgr; + struct tee_shm_pool_mgr *dmabuf_mgr; + void *rc; /* * Create the pool for driver private shared memory */ - ret = pool_res_mem_mgr_init(&pool->private_mgr, priv_info, - 3 /* 8 byte aligned */); - if (ret) - goto err; + rc = tee_shm_pool_mgr_alloc_res_mem(priv_info->vaddr, priv_info->paddr, + priv_info->size, + 3 /* 8 byte aligned */); + if (IS_ERR(rc)) + return rc; + priv_mgr = rc; /* * Create the pool for dma_buf shared memory */ - ret = pool_res_mem_mgr_init(&pool->dma_buf_mgr, dmabuf_info, - PAGE_SHIFT); - if (ret) + rc = tee_shm_pool_mgr_alloc_res_mem(dmabuf_info->vaddr, + dmabuf_info->paddr, + dmabuf_info->size, PAGE_SHIFT); + if (IS_ERR(rc)) + goto err_free_priv_mgr; + dmabuf_mgr = rc; + + rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr); + if (IS_ERR(rc)) + goto err_free_dmabuf_mgr; + + return rc; + +err_free_dmabuf_mgr: + tee_shm_pool_mgr_destroy(dmabuf_mgr); +err_free_priv_mgr: + tee_shm_pool_mgr_destroy(priv_mgr); + + return rc; +} +EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem); + +struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, + size_t size, + int min_alloc_order) +{ + const size_t page_mask = PAGE_SIZE - 1; + struct tee_shm_pool_mgr *mgr; + int rc; + + /* Start and end must be page aligned */ + if (vaddr & page_mask || paddr & page_mask || size & page_mask) + return ERR_PTR(-EINVAL); + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return ERR_PTR(-ENOMEM); + + mgr->private_data = gen_pool_create(min_alloc_order, -1); + if (!mgr->private_data) { + rc = -ENOMEM; goto err; + } - pool->destroy = pool_res_mem_destroy; - return pool; + gen_pool_set_algo(mgr->private_data, gen_pool_best_fit, NULL); + rc = gen_pool_add_virt(mgr->private_data, vaddr, paddr, size, -1); + if (rc) { + gen_pool_destroy(mgr->private_data); + goto err; + } + + mgr->ops = &pool_ops_generic; + + return mgr; err: - if (ret == -ENOMEM) - pr_err("%s: can't allocate memory for res_mem shared memory pool\n", __func__); - if (pool && pool->private_mgr.private_data) - gen_pool_destroy(pool->private_mgr.private_data); - kfree(pool); - return ERR_PTR(ret); + kfree(mgr); + + return ERR_PTR(rc); } -EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem); +EXPORT_SYMBOL_GPL(tee_shm_pool_mgr_alloc_res_mem); + +static bool check_mgr_ops(struct tee_shm_pool_mgr *mgr) +{ + return mgr && mgr->ops && mgr->ops->alloc && mgr->ops->free && + mgr->ops->destroy_poolmgr; +} + +struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr, + struct tee_shm_pool_mgr *dmabuf_mgr) +{ + struct tee_shm_pool *pool; + + if (!check_mgr_ops(priv_mgr) || !check_mgr_ops(dmabuf_mgr)) + return ERR_PTR(-EINVAL); + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + pool->private_mgr = priv_mgr; + pool->dma_buf_mgr = dmabuf_mgr; + + return pool; +} +EXPORT_SYMBOL_GPL(tee_shm_pool_alloc); /** * tee_shm_pool_free() - Free a shared memory pool @@ -150,7 +186,10 @@ EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem); */ void tee_shm_pool_free(struct tee_shm_pool *pool) { - pool->destroy(pool); + if (pool->private_mgr) + tee_shm_pool_mgr_destroy(pool->private_mgr); + if (pool->dma_buf_mgr) + tee_shm_pool_mgr_destroy(pool->dma_buf_mgr); kfree(pool); } EXPORT_SYMBOL_GPL(tee_shm_pool_free); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index f4a0ac05ebb4..ead79b560b9d 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -149,6 +149,97 @@ int tee_device_register(struct tee_device *teedev); */ void tee_device_unregister(struct tee_device *teedev); +/** + * struct tee_shm - shared memory object + * @teedev: device used to allocate the object + * @ctx: context using the object, if NULL the context is gone + * @link link element + * @paddr: physical address of the shared memory + * @kaddr: virtual address of the shared memory + * @size: size of shared memory + * @offset: offset of buffer in user space + * @pages: locked pages from userspace + * @num_pages: number of locked pages + * @dmabuf: dmabuf used to for exporting to user space + * @flags: defined by TEE_SHM_* in tee_drv.h + * @id: unique id of a shared memory object on this device + * + * This pool is only supposed to be accessed directly from the TEE + * subsystem and from drivers that implements their own shm pool manager. + */ +struct tee_shm { + struct tee_device *teedev; + struct tee_context *ctx; + struct list_head link; + phys_addr_t paddr; + void *kaddr; + size_t size; + unsigned int offset; + struct page **pages; + size_t num_pages; + struct dma_buf *dmabuf; + u32 flags; + int id; +}; + +/** + * struct tee_shm_pool_mgr - shared memory manager + * @ops: operations + * @private_data: private data for the shared memory manager + */ +struct tee_shm_pool_mgr { + const struct tee_shm_pool_mgr_ops *ops; + void *private_data; +}; + +/** + * struct tee_shm_pool_mgr_ops - shared memory pool manager operations + * @alloc: called when allocating shared memory + * @free: called when freeing shared memory + * @destroy_poolmgr: called when destroying the pool manager + */ +struct tee_shm_pool_mgr_ops { + int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, + size_t size); + void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); + void (*destroy_poolmgr)(struct tee_shm_pool_mgr *poolmgr); +}; + +/** + * tee_shm_pool_alloc() - Create a shared memory pool from shm managers + * @priv_mgr: manager for driver private shared memory allocations + * @dmabuf_mgr: manager for dma-buf shared memory allocations + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr, + struct tee_shm_pool_mgr *dmabuf_mgr); + +/* + * tee_shm_pool_mgr_alloc_res_mem() - Create a shm manager for reserved + * memory + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + * + * @returns pointer to a 'struct tee_shm_pool_mgr' or an ERR_PTR on failure. + */ +struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, + size_t size, + int min_alloc_order); + +/** + * tee_shm_pool_mgr_destroy() - Free a shared memory manager + */ +static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm) +{ + poolm->ops->destroy_poolmgr(poolm); +} + /** * struct tee_shm_pool_mem_info - holds information needed to create a shared * memory pool -- GitLab From f902700b2419e66811d5276fb8c641a84d7aef39 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 29 Nov 2017 14:48:26 +0200 Subject: [PATCH 1380/1398] BACKPORT: tee: add register user memory Added new ioctl to allow users register own buffers as a shared memory. Change-Id: Icd25a3601884472fd42feac8aff03bb52e4d7e87 Signed-off-by: Volodymyr Babchuk [jw: moved tee_shm_is_registered() declaration] [jw: added space after __tee_shm_alloc() implementation] Signed-off-by: Jens Wiklander (cherry picked from commit 033ddf12bcf5326b93bd604f50a7474a434a35f9) Signed-off-by: Victor Chong --- drivers/tee/tee_core.c | 41 +++++++- drivers/tee/tee_shm.c | 206 ++++++++++++++++++++++++++++++++++----- include/linux/tee_drv.h | 47 ++++++++- include/uapi/linux/tee.h | 30 ++++++ 4 files changed, 294 insertions(+), 30 deletions(-) diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 4d0ce606f0fc..bb88298cea3a 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -114,8 +114,6 @@ static int tee_ioctl_shm_alloc(struct tee_context *ctx, if (data.flags) return -EINVAL; - data.id = -1; - shm = tee_shm_alloc(ctx, data.size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); if (IS_ERR(shm)) return PTR_ERR(shm); @@ -138,6 +136,43 @@ static int tee_ioctl_shm_alloc(struct tee_context *ctx, return ret; } +static int +tee_ioctl_shm_register(struct tee_context *ctx, + struct tee_ioctl_shm_register_data __user *udata) +{ + long ret; + struct tee_ioctl_shm_register_data data; + struct tee_shm *shm; + + if (copy_from_user(&data, udata, sizeof(data))) + return -EFAULT; + + /* Currently no input flags are supported */ + if (data.flags) + return -EINVAL; + + shm = tee_shm_register(ctx, data.addr, data.length, + TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + data.id = shm->id; + data.flags = shm->flags; + data.length = shm->size; + + if (copy_to_user(udata, &data, sizeof(data))) + ret = -EFAULT; + else + ret = tee_shm_get_fd(shm); + /* + * When user space closes the file descriptor the shared memory + * should be freed or if tee_shm_get_fd() failed then it will + * be freed immediately. + */ + tee_shm_put(shm); + return ret; +} + static int params_from_user(struct tee_context *ctx, struct tee_param *params, size_t num_params, struct tee_ioctl_param __user *uparams) @@ -578,6 +613,8 @@ static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return tee_ioctl_version(ctx, uarg); case TEE_IOC_SHM_ALLOC: return tee_ioctl_shm_alloc(ctx, uarg); + case TEE_IOC_SHM_REGISTER: + return tee_ioctl_shm_register(ctx, uarg); case TEE_IOC_OPEN_SESSION: return tee_ioctl_open_session(ctx, uarg); case TEE_IOC_INVOKE: diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index a3172bc5be69..ad06723c6dd2 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -23,7 +23,6 @@ static void tee_shm_release(struct tee_shm *shm) { struct tee_device *teedev = shm->teedev; - struct tee_shm_pool_mgr *poolm; mutex_lock(&teedev->mutex); idr_remove(&teedev->idr, shm->id); @@ -31,12 +30,29 @@ static void tee_shm_release(struct tee_shm *shm) list_del(&shm->link); mutex_unlock(&teedev->mutex); - if (shm->flags & TEE_SHM_DMA_BUF) - poolm = teedev->pool->dma_buf_mgr; - else - poolm = teedev->pool->private_mgr; + if (shm->flags & TEE_SHM_POOL) { + struct tee_shm_pool_mgr *poolm; + + if (shm->flags & TEE_SHM_DMA_BUF) + poolm = teedev->pool->dma_buf_mgr; + else + poolm = teedev->pool->private_mgr; + + poolm->ops->free(poolm, shm); + } else if (shm->flags & TEE_SHM_REGISTER) { + size_t n; + int rc = teedev->desc->ops->shm_unregister(shm->ctx, shm); + + if (rc) + dev_err(teedev->dev.parent, + "unregister shm %p failed: %d", shm, rc); + + for (n = 0; n < shm->num_pages; n++) + put_page(shm->pages[n]); + + kfree(shm->pages); + } - poolm->ops->free(poolm, shm); kfree(shm); tee_device_put(teedev); @@ -76,6 +92,10 @@ static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) struct tee_shm *shm = dmabuf->priv; size_t size = vma->vm_end - vma->vm_start; + /* Refuse sharing shared memory provided by application */ + if (shm->flags & TEE_SHM_REGISTER) + return -EINVAL; + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, size, vma->vm_page_prot); } @@ -89,26 +109,20 @@ static const struct dma_buf_ops tee_shm_dma_buf_ops = { .mmap = tee_shm_op_mmap, }; -/** - * tee_shm_alloc() - Allocate shared memory - * @ctx: Context that allocates the shared memory - * @size: Requested size of shared memory - * @flags: Flags setting properties for the requested shared memory. - * - * Memory allocated as global shared memory is automatically freed when the - * TEE file pointer is closed. The @flags field uses the bits defined by - * TEE_SHM_* in . TEE_SHM_MAPPED must currently always be - * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and - * associated with a dma-buf handle, else driver private memory. - */ -struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) +struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, + struct tee_device *teedev, + size_t size, u32 flags) { - struct tee_device *teedev = ctx->teedev; struct tee_shm_pool_mgr *poolm = NULL; struct tee_shm *shm; void *ret; int rc; + if (ctx && ctx->teedev != teedev) { + dev_err(teedev->dev.parent, "ctx and teedev mismatch\n"); + return ERR_PTR(-EINVAL); + } + if (!(flags & TEE_SHM_MAPPED)) { dev_err(teedev->dev.parent, "only mapped allocations supported\n"); @@ -135,7 +149,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_dev_put; } - shm->flags = flags; + shm->flags = flags | TEE_SHM_POOL; shm->teedev = teedev; shm->ctx = ctx; if (flags & TEE_SHM_DMA_BUF) @@ -171,9 +185,12 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_rem; } } - mutex_lock(&teedev->mutex); - list_add_tail(&shm->link, &ctx->list_shm); - mutex_unlock(&teedev->mutex); + + if (ctx) { + mutex_lock(&teedev->mutex); + list_add_tail(&shm->link, &ctx->list_shm); + mutex_unlock(&teedev->mutex); + } return shm; err_rem: @@ -188,8 +205,140 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) tee_device_put(teedev); return ret; } + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* in . TEE_SHM_MAPPED must currently always be + * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and + * associated with a dma-buf handle, else driver private memory. + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) +{ + return __tee_shm_alloc(ctx, ctx->teedev, size, flags); +} EXPORT_SYMBOL_GPL(tee_shm_alloc); +struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size) +{ + return __tee_shm_alloc(NULL, teedev, size, TEE_SHM_MAPPED); +} +EXPORT_SYMBOL_GPL(tee_shm_priv_alloc); + +struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, + size_t length, u32 flags) +{ + struct tee_device *teedev = ctx->teedev; + const u32 req_flags = TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED; + struct tee_shm *shm; + void *ret; + int rc; + int num_pages; + unsigned long start; + + if (flags != req_flags) + return ERR_PTR(-ENOTSUPP); + + if (!tee_device_get(teedev)) + return ERR_PTR(-EINVAL); + + if (!teedev->desc->ops->shm_register || + !teedev->desc->ops->shm_unregister) { + tee_device_put(teedev); + return ERR_PTR(-ENOTSUPP); + } + + shm = kzalloc(sizeof(*shm), GFP_KERNEL); + if (!shm) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + shm->flags = flags | TEE_SHM_REGISTER; + shm->teedev = teedev; + shm->ctx = ctx; + shm->id = -1; + start = rounddown(addr, PAGE_SIZE); + shm->offset = addr - start; + shm->size = length; + num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE; + shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); + if (!shm->pages) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + rc = get_user_pages_fast(start, num_pages, 1, shm->pages); + if (rc > 0) + shm->num_pages = rc; + if (rc != num_pages) { + if (rc > 0) + rc = -ENOMEM; + ret = ERR_PTR(rc); + goto err; + } + + mutex_lock(&teedev->mutex); + shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); + mutex_unlock(&teedev->mutex); + + if (shm->id < 0) { + ret = ERR_PTR(shm->id); + goto err; + } + + rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, + shm->num_pages); + if (rc) { + ret = ERR_PTR(rc); + goto err; + } + + if (flags & TEE_SHM_DMA_BUF) { + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &tee_shm_dma_buf_ops; + exp_info.size = shm->size; + exp_info.flags = O_RDWR; + exp_info.priv = shm; + + shm->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(shm->dmabuf)) { + ret = ERR_CAST(shm->dmabuf); + teedev->desc->ops->shm_unregister(ctx, shm); + goto err; + } + } + + mutex_lock(&teedev->mutex); + list_add_tail(&shm->link, &ctx->list_shm); + mutex_unlock(&teedev->mutex); + + return shm; +err: + if (shm) { + size_t n; + + if (shm->id >= 0) { + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + mutex_unlock(&teedev->mutex); + } + for (n = 0; n < shm->num_pages; n++) + put_page(shm->pages[n]); + kfree(shm->pages); + } + kfree(shm); + tee_device_put(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_shm_register); + /** * tee_shm_get_fd() - Increase reference count and return file descriptor * @shm: Shared memory handle @@ -197,10 +346,9 @@ EXPORT_SYMBOL_GPL(tee_shm_alloc); */ int tee_shm_get_fd(struct tee_shm *shm) { - u32 req_flags = TEE_SHM_MAPPED | TEE_SHM_DMA_BUF; int fd; - if ((shm->flags & req_flags) != req_flags) + if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); @@ -238,6 +386,8 @@ EXPORT_SYMBOL_GPL(tee_shm_free); */ int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa) { + if (!(shm->flags & TEE_SHM_MAPPED)) + return -EINVAL; /* Check that we're in the range of the shm */ if ((char *)va < (char *)shm->kaddr) return -EINVAL; @@ -258,6 +408,8 @@ EXPORT_SYMBOL_GPL(tee_shm_va2pa); */ int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va) { + if (!(shm->flags & TEE_SHM_MAPPED)) + return -EINVAL; /* Check that we're in the range of the shm */ if (pa < shm->paddr) return -EINVAL; @@ -284,6 +436,8 @@ EXPORT_SYMBOL_GPL(tee_shm_pa2va); */ void *tee_shm_get_va(struct tee_shm *shm, size_t offs) { + if (!(shm->flags & TEE_SHM_MAPPED)) + return ERR_PTR(-EINVAL); if (offs >= shm->size) return ERR_PTR(-EINVAL); return (char *)shm->kaddr + offs; diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index ead79b560b9d..0f9e574299b6 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -25,8 +25,12 @@ * specific TEE driver. */ -#define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ -#define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ +#define TEE_SHM_MAPPED BIT(0) /* Memory mapped by the kernel */ +#define TEE_SHM_DMA_BUF BIT(1) /* Memory with dma-buf handle */ +#define TEE_SHM_EXT_DMA_BUF BIT(2) /* Memory with dma-buf handle */ +#define TEE_SHM_REGISTER BIT(3) /* Memory registered in secure world */ +#define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ +#define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ struct device; struct tee_device; @@ -76,6 +80,8 @@ struct tee_param { * @cancel_req: request cancel of an ongoing invoke or open * @supp_revc: called for supplicant to get a command * @supp_send: called for supplicant to send a response + * @shm_register: register shared memory buffer in TEE + * @shm_unregister: unregister shared memory buffer in TEE */ struct tee_driver_ops { void (*get_version)(struct tee_device *teedev, @@ -94,6 +100,9 @@ struct tee_driver_ops { struct tee_param *param); int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, struct tee_param *param); + int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages); + int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm); }; /** @@ -301,6 +310,40 @@ void *tee_get_drvdata(struct tee_device *teedev); */ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +/** + * tee_shm_priv_alloc() - Allocate shared memory privately + * @dev: Device that allocates the shared memory + * @size: Requested size of shared memory + * + * Allocates shared memory buffer that is not associated with any client + * context. Such buffers are owned by TEE driver and used for internal calls. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size); + +/** + * tee_shm_register() - Register shared memory buffer + * @ctx: Context that registers the shared memory + * @addr: Address is userspace of the shared buffer + * @length: Length of the shared buffer + * @flags: Flags setting properties for the requested shared memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, + size_t length, u32 flags); + +/** + * tee_shm_is_registered() - Check if shared memory object in registered in TEE + * @shm: Shared memory handle + * @returns true if object is registered in TEE + */ +static inline bool tee_shm_is_registered(struct tee_shm *shm) +{ + return shm && (shm->flags & TEE_SHM_REGISTER); +} + /** * tee_shm_free() - Free shared memory * @shm: Handle to shared memory to free diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 267c12e7fd79..4b9eb064d7e7 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -50,6 +50,7 @@ #define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ #define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ +#define TEE_GEN_CAP_REG_MEM (1 << 2)/* Supports registering shared memory */ /* * TEE Implementation ID @@ -339,6 +340,35 @@ struct tee_iocl_supp_send_arg { #define TEE_IOC_SUPPL_SEND _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \ struct tee_ioctl_buf_data) +/** + * struct tee_ioctl_shm_register_data - Shared memory register argument + * @addr: [in] Start address of shared memory to register + * @length: [in/out] Length of shared memory to register + * @flags: [in/out] Flags to/from registration. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_REGISTER below. + */ +struct tee_ioctl_shm_register_data { + __u64 addr; + __u64 length; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_REGISTER - Register shared memory argument + * + * Registers shared memory between the user space process and secure OS. + * + * Returns a file descriptor on success or < 0 on failure + * + * The shared memory is unregisterred when the descriptor is closed. + */ +#define TEE_IOC_SHM_REGISTER _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 9, \ + struct tee_ioctl_shm_register_data) /* * Five syscalls are used when communicating with the TEE driver. * open(): opens the device associated with the driver -- GitLab From 006c193d43f3a59e9a150a493834cd04f5f3e0fa Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:27 +0200 Subject: [PATCH 1381/1398] BACKPORT: tee: shm: add accessors for buffer size and page offset These two function will be needed for shared memory registration in OP-TEE Change-Id: If6a16ec82a2880c05a37c05bf00e5722b11efc64 Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit b25946ad951c013c31d0a0e82d2017004bdc8fed) Signed-off-by: Victor Chong --- include/linux/tee_drv.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 0f9e574299b6..71cd1f2841c9 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -393,6 +393,26 @@ void *tee_shm_get_va(struct tee_shm *shm, size_t offs); */ int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa); +/** + * tee_shm_get_size() - Get size of shared memory buffer + * @shm: Shared memory handle + * @returns size of shared memory + */ +static inline size_t tee_shm_get_size(struct tee_shm *shm) +{ + return shm->size; +} + +/** + * tee_shm_get_page_offset() - Get shared buffer offset from page start + * @shm: Shared memory handle + * @returns page offset of shared buffer + */ +static inline size_t tee_shm_get_page_offset(struct tee_shm *shm) +{ + return shm->offset; +} + /** * tee_shm_get_id() - Get id of a shared memory object * @shm: Shared memory handle -- GitLab From 7bc052564eb034edaaf63da70e36e834227a0aa2 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:28 +0200 Subject: [PATCH 1382/1398] BACKPORT: tee: shm: add page accessor functions In order to register a shared buffer in TEE, we need accessor function that return list of pages for that buffer. Change-Id: I34630ae5d69813639ac8f0988631a65c16432846 Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit e0c69ae8bfb500facebe1fa331f9400c216eaab0) Signed-off-by: Victor Chong --- include/linux/tee_drv.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 71cd1f2841c9..80963f665db3 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -403,6 +403,19 @@ static inline size_t tee_shm_get_size(struct tee_shm *shm) return shm->size; } +/** + * tee_shm_get_pages() - Get list of pages that hold shared buffer + * @shm: Shared memory handle + * @num_pages: Number of pages will be stored there + * @returns pointer to pages array + */ +static inline struct page **tee_shm_get_pages(struct tee_shm *shm, + size_t *num_pages) +{ + *num_pages = shm->num_pages; + return shm->pages; +} + /** * tee_shm_get_page_offset() - Get shared buffer offset from page start * @shm: Shared memory handle -- GitLab From f442bb9bb1bc5d3adb0d4709467129fc8bcb69cc Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:29 +0200 Subject: [PATCH 1383/1398] BACKPORT: tee: optee: Update protocol definitions There were changes in REE<->OP-TEE ABI recently. Now ABI allows us to pass non-contiguous memory buffers as list of pages to OP-TEE. This can be achieved by using new parameter attribute OPTEE_MSG_ATTR_NONCONTIG. OP-TEE also is able to use all non-secure RAM for shared buffers. This new capability is enabled with OPTEE_SMC_SEC_CAP_DYNAMIC_SHM flag. This patch adds necessary definitions to the protocol definition files at Linux side. Change-Id: I9a1dbe642b4e6bf5544a579f80abb940b59723ab Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit de5c6dfc43daa59feb824505f80fe4591f8f8f85) Signed-off-by: Victor Chong --- drivers/tee/optee/optee_msg.h | 38 +++++++++++++++++++++++++++++------ drivers/tee/optee/optee_smc.h | 7 +++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h index dd7a06ee0462..30504901be80 100644 --- a/drivers/tee/optee/optee_msg.h +++ b/drivers/tee/optee/optee_msg.h @@ -67,11 +67,32 @@ #define OPTEE_MSG_ATTR_META BIT(8) /* - * The temporary shared memory object is not physically contigous and this - * temp memref is followed by another fragment until the last temp memref - * that doesn't have this bit set. + * Pointer to a list of pages used to register user-defined SHM buffer. + * Used with OPTEE_MSG_ATTR_TYPE_TMEM_*. + * buf_ptr should point to the beginning of the buffer. Buffer will contain + * list of page addresses. OP-TEE core can reconstruct contiguous buffer from + * that page addresses list. Page addresses are stored as 64 bit values. + * Last entry on a page should point to the next page of buffer. + * Every entry in buffer should point to a 4k page beginning (12 least + * significant bits must be equal to zero). + * + * 12 least significant bints of optee_msg_param.u.tmem.buf_ptr should hold page + * offset of the user buffer. + * + * So, entries should be placed like members of this structure: + * + * struct page_data { + * uint64_t pages_array[OPTEE_MSG_NONCONTIG_PAGE_SIZE/sizeof(uint64_t) - 1]; + * uint64_t next_page_data; + * }; + * + * Structure is designed to exactly fit into the page size + * OPTEE_MSG_NONCONTIG_PAGE_SIZE which is a standard 4KB page. + * + * The size of 4KB is chosen because this is the smallest page size for ARM + * architectures. If REE uses larger pages, it should divide them to 4KB ones. */ -#define OPTEE_MSG_ATTR_FRAGMENT BIT(9) +#define OPTEE_MSG_ATTR_NONCONTIG BIT(9) /* * Memory attributes for caching passed with temp memrefs. The actual value @@ -94,6 +115,11 @@ #define OPTEE_MSG_LOGIN_APPLICATION_USER 0x00000005 #define OPTEE_MSG_LOGIN_APPLICATION_GROUP 0x00000006 +/* + * Page size used in non-contiguous buffer entries + */ +#define OPTEE_MSG_NONCONTIG_PAGE_SIZE 4096 + /** * struct optee_msg_param_tmem - temporary memory reference parameter * @buf_ptr: Address of the buffer @@ -145,8 +171,8 @@ struct optee_msg_param_value { * * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value, - * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates tmem and - * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates rmem. + * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates @tmem and + * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates @rmem, * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used. */ struct optee_msg_param { diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h index 069c8e1429de..7cd327243ada 100644 --- a/drivers/tee/optee/optee_smc.h +++ b/drivers/tee/optee/optee_smc.h @@ -222,6 +222,13 @@ struct optee_smc_get_shm_config_result { #define OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM BIT(0) /* Secure world can communicate via previously unregistered shared memory */ #define OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM BIT(1) + +/* + * Secure world supports commands "register/unregister shared memory", + * secure world accepts command buffers located in any parts of non-secure RAM + */ +#define OPTEE_SMC_SEC_CAP_DYNAMIC_SHM BIT(2) + #define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES 9 #define OPTEE_SMC_EXCHANGE_CAPABILITIES \ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES) -- GitLab From 9cba6e753a49a03c20055b32ed2f673e11e6912e Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:30 +0200 Subject: [PATCH 1384/1398] BACKPORT: tee: optee: add page list manipulation functions These functions will be used to pass information about shared buffers to OP-TEE. ABI between Linux and OP-TEE is defined in optee_msg.h and optee_smc.h. optee_msg.h defines OPTEE_MSG_ATTR_NONCONTIG attribute for shared memory references and describes how such references should be passed. Note that it uses 64-bit page addresses even on 32 bit systems. This is done to support LPAE and to unify interface. Change-Id: I9285315d48979e75c54021163f3e8c7a2772db55 Signed-off-by: Volodymyr Babchuk [jw: replacing uint64_t with u64 in optee_fill_pages_list()] Signed-off-by: Jens Wiklander (cherry picked from commit 3bb48ba5cd60f9685aa8f1ccd9b14a72e237c13f) Signed-off-by: Victor Chong --- drivers/tee/optee/call.c | 91 +++++++++++++++++++++++++++++++ drivers/tee/optee/optee_private.h | 5 ++ 2 files changed, 96 insertions(+) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index f7b7b404c990..e85860f6e057 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -11,6 +11,7 @@ * GNU General Public License for more details. * */ +#include #include #include #include @@ -442,3 +443,93 @@ void optee_disable_shm_cache(struct optee *optee) } optee_cq_wait_final(&optee->call_queue, &w); } + +#define PAGELIST_ENTRIES_PER_PAGE \ + ((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1) + +/** + * optee_fill_pages_list() - write list of user pages to given shared + * buffer. + * + * @dst: page-aligned buffer where list of pages will be stored + * @pages: array of pages that represents shared buffer + * @num_pages: number of entries in @pages + * @page_offset: offset of user buffer from page start + * + * @dst should be big enough to hold list of user page addresses and + * links to the next pages of buffer + */ +void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages, + size_t page_offset) +{ + int n = 0; + phys_addr_t optee_page; + /* + * Refer to OPTEE_MSG_ATTR_NONCONTIG description in optee_msg.h + * for details. + */ + struct { + u64 pages_list[PAGELIST_ENTRIES_PER_PAGE]; + u64 next_page_data; + } *pages_data; + + /* + * Currently OP-TEE uses 4k page size and it does not looks + * like this will change in the future. On other hand, there are + * no know ARM architectures with page size < 4k. + * Thus the next built assert looks redundant. But the following + * code heavily relies on this assumption, so it is better be + * safe than sorry. + */ + BUILD_BUG_ON(PAGE_SIZE < OPTEE_MSG_NONCONTIG_PAGE_SIZE); + + pages_data = (void *)dst; + /* + * If linux page is bigger than 4k, and user buffer offset is + * larger than 4k/8k/12k/etc this will skip first 4k pages, + * because they bear no value data for OP-TEE. + */ + optee_page = page_to_phys(*pages) + + round_down(page_offset, OPTEE_MSG_NONCONTIG_PAGE_SIZE); + + while (true) { + pages_data->pages_list[n++] = optee_page; + + if (n == PAGELIST_ENTRIES_PER_PAGE) { + pages_data->next_page_data = + virt_to_phys(pages_data + 1); + pages_data++; + n = 0; + } + + optee_page += OPTEE_MSG_NONCONTIG_PAGE_SIZE; + if (!(optee_page & ~PAGE_MASK)) { + if (!--num_pages) + break; + pages++; + optee_page = page_to_phys(*pages); + } + } +} + +/* + * The final entry in each pagelist page is a pointer to the next + * pagelist page. + */ +static size_t get_pages_list_size(size_t num_entries) +{ + int pages = DIV_ROUND_UP(num_entries, PAGELIST_ENTRIES_PER_PAGE); + + return pages * OPTEE_MSG_NONCONTIG_PAGE_SIZE; +} + +u64 *optee_allocate_pages_list(size_t num_entries) +{ + return alloc_pages_exact(get_pages_list_size(num_entries), GFP_KERNEL); +} + +void optee_free_pages_list(void *list, size_t num_entries) +{ + free_pages_exact(list, get_pages_list_size(num_entries)); +} + diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index 3e7da187acbe..3696f5b4a792 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -154,6 +154,11 @@ int optee_from_msg_param(struct tee_param *params, size_t num_params, int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, const struct tee_param *params); +u64 *optee_allocate_pages_list(size_t num_entries); +void optee_free_pages_list(void *array, size_t num_entries); +void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages, + size_t page_offset); + /* * Small helpers */ -- GitLab From 88be5b8bb8cc8b0c6953d3053f54dbe4f2e9f253 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:31 +0200 Subject: [PATCH 1385/1398] BACKPORT: tee: optee: add shared buffer registration functions This change adds ops for shm_(un)register functions in tee interface. Client application can use these functions to (un)register an own shared buffer in OP-TEE address space. This allows zero copy data sharing between Normal and Secure Worlds. Please note that while those functions were added to optee code, it does not report to userspace that those functions are available. OP-TEE code does not set TEE_GEN_CAP_REG_MEM flag. This flag will be enabled only after all other features of dynamic shared memory will be implemented in subsequent patches. Of course user can ignore presence of TEE_GEN_CAP_REG_MEM flag and try do call those functions. This is okay, driver will register shared buffer in OP-TEE, but any attempts to use this shared buffer will fail. Change-Id: I918fd93ff2a5e8a371f6e7fd81f45bbfaed69315 Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit 06ca79179c4e00efe53cfe43456f1586f944f04f) Signed-off-by: Victor Chong --- drivers/tee/optee/call.c | 69 +++++++++++++++++++++++++++++++ drivers/tee/optee/core.c | 2 + drivers/tee/optee/optee_private.h | 4 ++ 3 files changed, 75 insertions(+) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index e85860f6e057..a05e9e61105f 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -533,3 +533,72 @@ void optee_free_pages_list(void *list, size_t num_entries) free_pages_exact(list, get_pages_list_size(num_entries)); } +int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages) +{ + struct tee_shm *shm_arg = NULL; + struct optee_msg_arg *msg_arg; + u64 *pages_list; + phys_addr_t msg_parg; + int rc = 0; + + if (!num_pages) + return -EINVAL; + + pages_list = optee_allocate_pages_list(num_pages); + if (!pages_list) + return -ENOMEM; + + shm_arg = get_msg_arg(ctx, 1, &msg_arg, &msg_parg); + if (IS_ERR(shm_arg)) { + rc = PTR_ERR(shm_arg); + goto out; + } + + optee_fill_pages_list(pages_list, pages, num_pages, + tee_shm_get_page_offset(shm)); + + msg_arg->cmd = OPTEE_MSG_CMD_REGISTER_SHM; + msg_arg->params->attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT | + OPTEE_MSG_ATTR_NONCONTIG; + msg_arg->params->u.tmem.shm_ref = (unsigned long)shm; + msg_arg->params->u.tmem.size = tee_shm_get_size(shm); + /* + * In the least bits of msg_arg->params->u.tmem.buf_ptr we + * store buffer offset from 4k page, as described in OP-TEE ABI. + */ + msg_arg->params->u.tmem.buf_ptr = virt_to_phys(pages_list) | + (tee_shm_get_page_offset(shm) & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1)); + + if (optee_do_call_with_arg(ctx, msg_parg) || + msg_arg->ret != TEEC_SUCCESS) + rc = -EINVAL; + + tee_shm_free(shm_arg); +out: + optee_free_pages_list(pages_list, num_pages); + return rc; +} + +int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm) +{ + struct tee_shm *shm_arg; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + int rc = 0; + + shm_arg = get_msg_arg(ctx, 1, &msg_arg, &msg_parg); + if (IS_ERR(shm_arg)) + return PTR_ERR(shm_arg); + + msg_arg->cmd = OPTEE_MSG_CMD_UNREGISTER_SHM; + + msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_RMEM_INPUT; + msg_arg->params[0].u.rmem.shm_ref = (unsigned long)shm; + + if (optee_do_call_with_arg(ctx, msg_parg) || + msg_arg->ret != TEEC_SUCCESS) + rc = -EINVAL; + tee_shm_free(shm_arg); + return rc; +} diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index d0dd09219795..0aa7dbb6b2fe 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -264,6 +264,8 @@ static const struct tee_driver_ops optee_ops = { .close_session = optee_close_session, .invoke_func = optee_invoke_func, .cancel_req = optee_cancel_req, + .shm_register = optee_shm_register, + .shm_unregister = optee_shm_unregister, }; static const struct tee_desc optee_desc = { diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index 3696f5b4a792..cdda1fa7241f 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -149,6 +149,10 @@ int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session); void optee_enable_shm_cache(struct optee *optee); void optee_disable_shm_cache(struct optee *optee); +int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages); +int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm); + int optee_from_msg_param(struct tee_param *params, size_t num_params, const struct optee_msg_param *msg_params); int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, -- GitLab From 7ec482be026ab54c1642a33657ffc28347eb2dc1 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:32 +0200 Subject: [PATCH 1386/1398] BACKPORT: tee: optee: add registered shared parameters handling Now, when client applications can register own shared buffers in OP-TEE, we need to extend ABI for parameter passing to/from OP-TEE. So, if OP-TEE core detects that parameter belongs to registered shared memory, it will use corresponding parameter attribute. Change-Id: I52b6b3a0092aac238b232b6d24668bbe3f3015e2 Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit 64cf9d8a672e770fed85a65b5c6767fc0aa1473b) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 78 ++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 0aa7dbb6b2fe..b122009f890c 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -97,6 +97,25 @@ int optee_from_msg_param(struct tee_param *params, size_t num_params, return rc; } break; + case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT: + case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT + + attr - OPTEE_MSG_ATTR_TYPE_RMEM_INPUT; + p->u.memref.size = mp->u.rmem.size; + shm = (struct tee_shm *)(unsigned long) + mp->u.rmem.shm_ref; + + if (!shm) { + p->u.memref.shm_offs = 0; + p->u.memref.shm = NULL; + break; + } + p->u.memref.shm_offs = mp->u.rmem.offs; + p->u.memref.shm = shm; + + break; + default: return -EINVAL; } @@ -104,6 +123,46 @@ int optee_from_msg_param(struct tee_param *params, size_t num_params, return 0; } +static int to_msg_param_tmp_mem(struct optee_msg_param *mp, + const struct tee_param *p) +{ + int rc; + phys_addr_t pa; + + mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; + + mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm; + mp->u.tmem.size = p->u.memref.size; + + if (!p->u.memref.shm) { + mp->u.tmem.buf_ptr = 0; + return 0; + } + + rc = tee_shm_get_pa(p->u.memref.shm, p->u.memref.shm_offs, &pa); + if (rc) + return rc; + + mp->u.tmem.buf_ptr = pa; + mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED << + OPTEE_MSG_ATTR_CACHE_SHIFT; + + return 0; +} + +static int to_msg_param_reg_mem(struct optee_msg_param *mp, + const struct tee_param *p) +{ + mp->attr = OPTEE_MSG_ATTR_TYPE_RMEM_INPUT + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; + + mp->u.rmem.shm_ref = (unsigned long)p->u.memref.shm; + mp->u.rmem.size = p->u.memref.size; + mp->u.rmem.offs = p->u.memref.shm_offs; + return 0; +} + /** * optee_to_msg_param() - convert from struct tee_params to OPTEE_MSG parameters * @msg_params: OPTEE_MSG parameters @@ -116,7 +175,6 @@ int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, { int rc; size_t n; - phys_addr_t pa; for (n = 0; n < num_params; n++) { const struct tee_param *p = params + n; @@ -139,22 +197,12 @@ int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: - mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + - p->attr - - TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; - mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm; - mp->u.tmem.size = p->u.memref.size; - if (!p->u.memref.shm) { - mp->u.tmem.buf_ptr = 0; - break; - } - rc = tee_shm_get_pa(p->u.memref.shm, - p->u.memref.shm_offs, &pa); + if (tee_shm_is_registered(p->u.memref.shm)) + rc = to_msg_param_reg_mem(mp, p); + else + rc = to_msg_param_tmp_mem(mp, p); if (rc) return rc; - mp->u.tmem.buf_ptr = pa; - mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED << - OPTEE_MSG_ATTR_CACHE_SHIFT; break; default: return -EINVAL; -- GitLab From ca453513d201a78dd52af81abeccb9ad67473a35 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:33 +0200 Subject: [PATCH 1387/1398] BACKPORT: tee: optee: add registered buffers handling into RPC calls With latest changes to OP-TEE we can use any buffers as a shared memory. Thus, it is possible for supplicant to provide part of own memory when OP-TEE asks to allocate a shared buffer. This patch adds support for such feature into RPC handling code. Now when OP-TEE asks supplicant to allocate shared buffer, supplicant can use TEE_IOC_SHM_REGISTER to provide such buffer. RPC handler is aware of this, so it will pass list of allocated pages to OP-TEE. Change-Id: If7b7812ab4255ed3c1915ae16eafd5556947b936 Signed-off-by: Volodymyr Babchuk [jw: fix parenthesis alignment in free_pages_list()] Signed-off-by: Jens Wiklander (cherry picked from commit 53a107c812de3dd74707458aa751eb457718ff9e) Signed-off-by: Victor Chong --- drivers/tee/optee/call.c | 19 +++++++- drivers/tee/optee/core.c | 2 + drivers/tee/optee/optee_private.h | 15 +++++- drivers/tee/optee/rpc.c | 77 +++++++++++++++++++++++++++---- 4 files changed, 102 insertions(+), 11 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index a05e9e61105f..e675e82ff095 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -136,6 +136,7 @@ u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) struct optee *optee = tee_get_drvdata(ctx->teedev); struct optee_call_waiter w; struct optee_rpc_param param = { }; + struct optee_call_ctx call_ctx = { }; u32 ret; param.a0 = OPTEE_SMC_CALL_WITH_ARG; @@ -160,13 +161,14 @@ u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) param.a1 = res.a1; param.a2 = res.a2; param.a3 = res.a3; - optee_handle_rpc(ctx, ¶m); + optee_handle_rpc(ctx, ¶m, &call_ctx); } else { ret = res.a0; break; } } + optee_rpc_finalize_call(&call_ctx); /* * We're done with our thread in secure world, if there's any * thread waiters wake up one. @@ -602,3 +604,18 @@ int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm) tee_shm_free(shm_arg); return rc; } + +int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages) +{ + /* + * We don't want to register supplicant memory in OP-TEE. + * Instead information about it will be passed in RPC code. + */ + return 0; +} + +int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm) +{ + return 0; +} diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index b122009f890c..94453f0f152d 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -328,6 +328,8 @@ static const struct tee_driver_ops optee_supp_ops = { .release = optee_release, .supp_recv = optee_supp_recv, .supp_send = optee_supp_send, + .shm_register = optee_shm_register_supp, + .shm_unregister = optee_shm_unregister_supp, }; static const struct tee_desc optee_supp_desc = { diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index cdda1fa7241f..8f3b740a2b16 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -118,7 +118,16 @@ struct optee_rpc_param { u32 a7; }; -void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param); +/* Holds context that is preserved during one STD call */ +struct optee_call_ctx { + /* information about pages list used in last allocation */ + void *pages_list; + size_t num_entries; +}; + +void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, + struct optee_call_ctx *call_ctx); +void optee_rpc_finalize_call(struct optee_call_ctx *call_ctx); void optee_wait_queue_init(struct optee_wait_queue *wq); void optee_wait_queue_exit(struct optee_wait_queue *wq); @@ -153,6 +162,10 @@ int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, struct page **pages, size_t num_pages); int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm); +int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages); +int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm); + int optee_from_msg_param(struct tee_param *params, size_t num_params, const struct optee_msg_param *msg_params); int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index c6df4317ca9f..41aea12e2bcc 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -200,7 +200,8 @@ static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz) } static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, - struct optee_msg_arg *arg) + struct optee_msg_arg *arg, + struct optee_call_ctx *call_ctx) { phys_addr_t pa; struct tee_shm *shm; @@ -245,10 +246,49 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, goto bad; } - arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT; - arg->params[0].u.tmem.buf_ptr = pa; - arg->params[0].u.tmem.size = sz; - arg->params[0].u.tmem.shm_ref = (unsigned long)shm; + sz = tee_shm_get_size(shm); + + if (tee_shm_is_registered(shm)) { + struct page **pages; + u64 *pages_list; + size_t page_num; + + pages = tee_shm_get_pages(shm, &page_num); + if (!pages || !page_num) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + goto bad; + } + + pages_list = optee_allocate_pages_list(page_num); + if (!pages_list) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + goto bad; + } + + call_ctx->pages_list = pages_list; + call_ctx->num_entries = page_num; + + arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT | + OPTEE_MSG_ATTR_NONCONTIG; + /* + * In the least bits of u.tmem.buf_ptr we store buffer offset + * from 4k page, as described in OP-TEE ABI. + */ + arg->params[0].u.tmem.buf_ptr = virt_to_phys(pages_list) | + (tee_shm_get_page_offset(shm) & + (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1)); + arg->params[0].u.tmem.size = tee_shm_get_size(shm); + arg->params[0].u.tmem.shm_ref = (unsigned long)shm; + + optee_fill_pages_list(pages_list, pages, page_num, + tee_shm_get_page_offset(shm)); + } else { + arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT; + arg->params[0].u.tmem.buf_ptr = pa; + arg->params[0].u.tmem.size = sz; + arg->params[0].u.tmem.shm_ref = (unsigned long)shm; + } + arg->ret = TEEC_SUCCESS; return; bad: @@ -307,8 +347,24 @@ static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx, arg->ret = TEEC_SUCCESS; } +static void free_pages_list(struct optee_call_ctx *call_ctx) +{ + if (call_ctx->pages_list) { + optee_free_pages_list(call_ctx->pages_list, + call_ctx->num_entries); + call_ctx->pages_list = NULL; + call_ctx->num_entries = 0; + } +} + +void optee_rpc_finalize_call(struct optee_call_ctx *call_ctx) +{ + free_pages_list(call_ctx); +} + static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, - struct tee_shm *shm) + struct tee_shm *shm, + struct optee_call_ctx *call_ctx) { struct optee_msg_arg *arg; @@ -329,7 +385,8 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, handle_rpc_func_cmd_wait(arg); break; case OPTEE_MSG_RPC_CMD_SHM_ALLOC: - handle_rpc_func_cmd_shm_alloc(ctx, arg); + free_pages_list(call_ctx); + handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx); break; case OPTEE_MSG_RPC_CMD_SHM_FREE: handle_rpc_func_cmd_shm_free(ctx, arg); @@ -343,10 +400,12 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, * optee_handle_rpc() - handle RPC from secure world * @ctx: context doing the RPC * @param: value of registers for the RPC + * @call_ctx: call context. Preserved during one OP-TEE invocation * * Result of RPC is written back into @param. */ -void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param) +void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, + struct optee_call_ctx *call_ctx) { struct tee_device *teedev = ctx->teedev; struct optee *optee = tee_get_drvdata(teedev); @@ -381,7 +440,7 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param) break; case OPTEE_SMC_RPC_FUNC_CMD: shm = reg_pair_to_ptr(param->a1, param->a2); - handle_rpc_func_cmd(ctx, optee, shm); + handle_rpc_func_cmd(ctx, optee, shm, call_ctx); break; default: pr_warn("Unknown RPC func 0x%x\n", -- GitLab From 8fecf08ca8b2c7e77357e3f05a0a9766230dd6b1 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:34 +0200 Subject: [PATCH 1388/1398] BACKPORT: tee: optee: store OP-TEE capabilities in private data Those capabilities will be used in subsequent patches. Change-Id: I296269714687d346c7beaf30fa400d74c81e717a Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit d885cc5e0759fc19badadddb60a64344b551469b) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 1 + drivers/tee/optee/optee_private.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 94453f0f152d..e359dc5dc9c3 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -542,6 +542,7 @@ static struct optee *optee_probe(struct device_node *np) } optee->invoke_fn = invoke_fn; + optee->sec_caps = sec_caps; teedev = tee_device_alloc(&optee_desc, NULL, pool, optee); if (IS_ERR(teedev)) { diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index 8f3b740a2b16..a85a24725e31 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -84,6 +84,8 @@ struct optee_supp { * @supp: supplicant synchronization struct for RPC to supplicant * @pool: shared memory pool * @memremaped_shm virtual address of memory in shared memory pool + * @sec_caps: secure world capabilities defined by + * OPTEE_SMC_SEC_CAP_* in optee_smc.h */ struct optee { struct tee_device *supp_teedev; @@ -94,6 +96,7 @@ struct optee { struct optee_supp supp; struct tee_shm_pool *pool; void *memremaped_shm; + u32 sec_caps; }; struct optee_session { -- GitLab From 8d919d71068745b787f72d00e5f11d010fb69f03 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:35 +0200 Subject: [PATCH 1389/1398] BACKPORT: tee: optee: add optee-specific shared pool implementation This is simple pool that uses kernel page allocator. This pool can be used in case OP-TEE supports dynamic shared memory. Change-Id: Idd0b512ef577b38f61dabca60b86becf09a746cc Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit abd135ba215c05ca84f9809e6047db25fc28b835) Signed-off-by: Victor Chong --- drivers/tee/optee/Makefile | 1 + drivers/tee/optee/shm_pool.c | 75 ++++++++++++++++++++++++++++++++++++ drivers/tee/optee/shm_pool.h | 23 +++++++++++ 3 files changed, 99 insertions(+) create mode 100644 drivers/tee/optee/shm_pool.c create mode 100644 drivers/tee/optee/shm_pool.h diff --git a/drivers/tee/optee/Makefile b/drivers/tee/optee/Makefile index 92fe5789bcce..220cf4298f0d 100644 --- a/drivers/tee/optee/Makefile +++ b/drivers/tee/optee/Makefile @@ -3,3 +3,4 @@ optee-objs += core.o optee-objs += call.o optee-objs += rpc.o optee-objs += supp.o +optee-objs += shm_pool.o diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c new file mode 100644 index 000000000000..49397813fff1 --- /dev/null +++ b/drivers/tee/optee/shm_pool.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015, Linaro Limited + * Copyright (c) 2017, EPAM Systems + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" +#include "shm_pool.h" + +static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm, size_t size) +{ + unsigned int order = get_order(size); + struct page *page; + + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!page) + return -ENOMEM; + + shm->kaddr = page_address(page); + shm->paddr = page_to_phys(page); + shm->size = PAGE_SIZE << order; + + return 0; +} + +static void pool_op_free(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm) +{ + free_pages((unsigned long)shm->kaddr, get_order(shm->size)); + shm->kaddr = NULL; +} + +static void pool_op_destroy_poolmgr(struct tee_shm_pool_mgr *poolm) +{ + kfree(poolm); +} + +static const struct tee_shm_pool_mgr_ops pool_ops = { + .alloc = pool_op_alloc, + .free = pool_op_free, + .destroy_poolmgr = pool_op_destroy_poolmgr, +}; + +/** + * optee_shm_pool_alloc_pages() - create page-based allocator pool + * + * This pool is used when OP-TEE supports dymanic SHM. In this case + * command buffers and such are allocated from kernel's own memory. + */ +struct tee_shm_pool_mgr *optee_shm_pool_alloc_pages(void) +{ + struct tee_shm_pool_mgr *mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + + if (!mgr) + return ERR_PTR(-ENOMEM); + + mgr->ops = &pool_ops; + + return mgr; +} diff --git a/drivers/tee/optee/shm_pool.h b/drivers/tee/optee/shm_pool.h new file mode 100644 index 000000000000..4e753c3bf7ec --- /dev/null +++ b/drivers/tee/optee/shm_pool.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2015, Linaro Limited + * Copyright (c) 2016, EPAM Systems + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef SHM_POOL_H +#define SHM_POOL_H + +#include + +struct tee_shm_pool_mgr *optee_shm_pool_alloc_pages(void); + +#endif -- GitLab From 1e046f0c0df98ff95b2923eab89f6c7716512381 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:36 +0200 Subject: [PATCH 1390/1398] BACKPORT: tee: optee: enable dynamic SHM support Previous patches added various features that are needed for dynamic SHM. Dynamic SHM allows Normal World to share any buffers with OP-TEE. While original design suggested to use pre-allocated region (usually of 1M to 2M of size), this new approach allows to use all non-secure RAM for command buffers, RPC allocations and TA parameters. This patch checks capability OPTEE_SMC_SEC_CAP_DYNAMIC_SHM. If it was set by OP-TEE, then kernel part of OP-TEE will use kernel page allocator to allocate command buffers. Also it will set TEE_GEN_CAP_REG_MEM capability to tell userspace that it supports shared memory registration. Change-Id: I02b0a57e8c16d1905883b35511decb7426e76960 Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit f58e236c9d665ad0af99c908de4a9b6f07e74dda) Signed-off-by: Victor Chong --- drivers/tee/optee/core.c | 69 +++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index e359dc5dc9c3..e9843c53fe31 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -28,6 +28,7 @@ #include #include "optee_private.h" #include "optee_smc.h" +#include "shm_pool.h" #define DRIVER_NAME "optee" @@ -219,6 +220,10 @@ static void optee_get_version(struct tee_device *teedev, .impl_caps = TEE_OPTEE_CAP_TZ, .gen_caps = TEE_GEN_CAP_GP, }; + struct optee *optee = tee_get_drvdata(teedev); + + if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) + v.gen_caps |= TEE_GEN_CAP_REG_MEM; *vers = v; } @@ -394,21 +399,22 @@ static bool optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn, } static struct tee_shm_pool * -optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm) +optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm, + u32 sec_caps) { union { struct arm_smccc_res smccc; struct optee_smc_get_shm_config_result result; } res; - struct tee_shm_pool *pool; unsigned long vaddr; phys_addr_t paddr; size_t size; phys_addr_t begin; phys_addr_t end; void *va; - struct tee_shm_pool_mem_info priv_info; - struct tee_shm_pool_mem_info dmabuf_info; + struct tee_shm_pool_mgr *priv_mgr; + struct tee_shm_pool_mgr *dmabuf_mgr; + void *rc; invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, &res.smccc); if (res.result.status != OPTEE_SMC_RETURN_OK) { @@ -438,22 +444,49 @@ optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm) } vaddr = (unsigned long)va; - priv_info.vaddr = vaddr; - priv_info.paddr = paddr; - priv_info.size = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; - dmabuf_info.vaddr = vaddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; - dmabuf_info.paddr = paddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; - dmabuf_info.size = size - OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; - - pool = tee_shm_pool_alloc_res_mem(&priv_info, &dmabuf_info); - if (IS_ERR(pool)) { - memunmap(va); - goto out; + /* + * If OP-TEE can work with unregistered SHM, we will use own pool + * for private shm + */ + if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) { + rc = optee_shm_pool_alloc_pages(); + if (IS_ERR(rc)) + goto err_memunmap; + priv_mgr = rc; + } else { + const size_t sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; + + rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz, + 3 /* 8 bytes aligned */); + if (IS_ERR(rc)) + goto err_memunmap; + priv_mgr = rc; + + vaddr += sz; + paddr += sz; + size -= sz; } + rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, size, PAGE_SHIFT); + if (IS_ERR(rc)) + goto err_free_priv_mgr; + dmabuf_mgr = rc; + + rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr); + if (IS_ERR(rc)) + goto err_free_dmabuf_mgr; + *memremaped_shm = va; -out: - return pool; + + return rc; + +err_free_dmabuf_mgr: + tee_shm_pool_mgr_destroy(dmabuf_mgr); +err_free_priv_mgr: + tee_shm_pool_mgr_destroy(priv_mgr); +err_memunmap: + memunmap(va); + return rc; } /* Simple wrapper functions to be able to use a function pointer */ @@ -531,7 +564,7 @@ static struct optee *optee_probe(struct device_node *np) if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM)) return ERR_PTR(-EINVAL); - pool = optee_config_shm_memremap(invoke_fn, &memremaped_shm); + pool = optee_config_shm_memremap(invoke_fn, &memremaped_shm, sec_caps); if (IS_ERR(pool)) return (void *)pool; -- GitLab From 4e64c305cb71d1d247db54958efe92dde2e9c365 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:37 +0200 Subject: [PATCH 1391/1398] BACKPORT: tee: use reference counting for tee_context We need to ensure that tee_context is present until last shared buffer will be freed. Change-Id: I0f16c080e0432d425496a7ceb11d13150daff00e Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit 217e0250cccb9e54d457991446cd3fab413085e1) Signed-off-by: Victor Chong --- drivers/tee/tee_core.c | 40 ++++++++++++++++++++++++++++++--------- drivers/tee/tee_private.h | 3 +++ drivers/tee/tee_shm.c | 7 +++++++ include/linux/tee_drv.h | 7 +++++++ 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index bb88298cea3a..6c4b200a4560 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -54,6 +54,7 @@ static int tee_open(struct inode *inode, struct file *filp) goto err; } + kref_init(&ctx->refcount); ctx->teedev = teedev; INIT_LIST_HEAD(&ctx->list_shm); filp->private_data = ctx; @@ -68,19 +69,40 @@ static int tee_open(struct inode *inode, struct file *filp) return rc; } -static int tee_release(struct inode *inode, struct file *filp) +void teedev_ctx_get(struct tee_context *ctx) { - struct tee_context *ctx = filp->private_data; - struct tee_device *teedev = ctx->teedev; - struct tee_shm *shm; + if (ctx->releasing) + return; + + kref_get(&ctx->refcount); +} +static void teedev_ctx_release(struct kref *ref) +{ + struct tee_context *ctx = container_of(ref, struct tee_context, + refcount); + ctx->releasing = true; ctx->teedev->desc->ops->release(ctx); - mutex_lock(&ctx->teedev->mutex); - list_for_each_entry(shm, &ctx->list_shm, link) - shm->ctx = NULL; - mutex_unlock(&ctx->teedev->mutex); kfree(ctx); - tee_device_put(teedev); +} + +void teedev_ctx_put(struct tee_context *ctx) +{ + if (ctx->releasing) + return; + + kref_put(&ctx->refcount, teedev_ctx_release); +} + +static void teedev_close_context(struct tee_context *ctx) +{ + tee_device_put(ctx->teedev); + teedev_ctx_put(ctx); +} + +static int tee_release(struct inode *inode, struct file *filp) +{ + teedev_close_context(filp->private_data); return 0; } diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h index 2bc2b5ab1661..85d99d621603 100644 --- a/drivers/tee/tee_private.h +++ b/drivers/tee/tee_private.h @@ -73,4 +73,7 @@ int tee_shm_get_fd(struct tee_shm *shm); bool tee_device_get(struct tee_device *teedev); void tee_device_put(struct tee_device *teedev); +void teedev_ctx_get(struct tee_context *ctx); +void teedev_ctx_put(struct tee_context *ctx); + #endif /*TEE_PRIVATE_H*/ diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index ad06723c6dd2..e8d6de997838 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -53,6 +53,9 @@ static void tee_shm_release(struct tee_shm *shm) kfree(shm->pages); } + if (shm->ctx) + teedev_ctx_put(shm->ctx); + kfree(shm); tee_device_put(teedev); @@ -187,6 +190,7 @@ struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, } if (ctx) { + teedev_ctx_get(ctx); mutex_lock(&teedev->mutex); list_add_tail(&shm->link, &ctx->list_shm); mutex_unlock(&teedev->mutex); @@ -253,6 +257,8 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, return ERR_PTR(-ENOTSUPP); } + teedev_ctx_get(ctx); + shm = kzalloc(sizeof(*shm), GFP_KERNEL); if (!shm) { ret = ERR_PTR(-ENOMEM); @@ -334,6 +340,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, kfree(shm->pages); } kfree(shm); + teedev_ctx_put(ctx); tee_device_put(teedev); return ret; } diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 80963f665db3..484e5058abbd 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -42,11 +43,17 @@ struct tee_shm_pool; * @teedev: pointer to this drivers struct tee_device * @list_shm: List of shared memory object owned by this context * @data: driver specific context data, managed by the driver + * @refcount: reference counter for this structure + * @releasing: flag that indicates if context is being released right now. + * It is needed to break circular dependency on context during + * shared memory release. */ struct tee_context { struct tee_device *teedev; struct list_head list_shm; void *data; + struct kref refcount; + bool releasing; }; struct tee_param_memref { -- GitLab From 70994c692bba5889bc8acaec341cfff71c8f9093 Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Wed, 29 Nov 2017 14:48:38 +0200 Subject: [PATCH 1392/1398] BACKPORT: tee: shm: inline tee_shm_get_id() Now, when struct tee_shm is defined in public header, we can inline small getter functions like this one. Change-Id: If5f8bd999aaf180dc6f431aa26ccabd0260a47e4 Signed-off-by: Volodymyr Babchuk Signed-off-by: Jens Wiklander (cherry picked from commit ef8e08d24ca84846ce639b835ebd2f15a943f42b) Signed-off-by: Victor Chong --- drivers/tee/tee_shm.c | 11 ----------- include/linux/tee_drv.h | 5 ++++- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index e8d6de997838..dd55e4735e3b 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -496,17 +496,6 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) } EXPORT_SYMBOL_GPL(tee_shm_get_from_id); -/** - * tee_shm_get_id() - Get id of a shared memory object - * @shm: Shared memory handle - * @returns id - */ -int tee_shm_get_id(struct tee_shm *shm) -{ - return shm->id; -} -EXPORT_SYMBOL_GPL(tee_shm_get_id); - /** * tee_shm_put() - Decrease reference count on a shared memory handle * @shm: Shared memory handle diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 484e5058abbd..41bd4bded28c 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -438,7 +438,10 @@ static inline size_t tee_shm_get_page_offset(struct tee_shm *shm) * @shm: Shared memory handle * @returns id */ -int tee_shm_get_id(struct tee_shm *shm); +static inline int tee_shm_get_id(struct tee_shm *shm) +{ + return shm->id; +} /** * tee_shm_get_from_id() - Find shared memory object and increase reference -- GitLab From 451b1b7584d846b98431bed0e5932556d2c73ce0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Dec 2017 21:18:25 +0100 Subject: [PATCH 1393/1398] BACKPORT: tee: optee: fix header dependencies The optee driver includes the header files in an unusual order, with asm/pgtable.h before the linux/*.h headers. For some reason this seems to trigger a build failure: drivers/tee/optee/call.c: In function 'optee_fill_pages_list': include/asm-generic/memory_model.h:64:14: error: implicit declaration of function 'page_to_section'; did you mean '__nr_to_section'? [-Werror=implicit-function-declaration] int __sec = page_to_section(__pg); \ drivers/tee/optee/call.c:494:15: note: in expansion of macro 'page_to_phys' optee_page = page_to_phys(*pages) + Let's just include linux/mm.h, which will then get the other header implicitly. Change-Id: I89668d35f8a9a8e27e77045e1f91934bb3d8cded Fixes: 3bb48ba5cd60 ("tee: optee: add page list manipulation functions") Signed-off-by: Arnd Bergmann (cherry picked from commit f681e08f671a8e68b085ba66190b8661deab4d85) Signed-off-by: Victor Chong --- drivers/tee/optee/call.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index e675e82ff095..0f38b3827457 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -11,11 +11,11 @@ * GNU General Public License for more details. * */ -#include #include #include #include #include +#include #include #include #include -- GitLab From ce42b2400a0cb07bc1e3b431351348661d225681 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 28 Dec 2017 10:08:00 +0100 Subject: [PATCH 1394/1398] BACKPORT: tee: add start argument to shm_register callback Adds a start argument to the shm_register callback to allow the callback to check memory type of the passed pages. Change-Id: Ic6062db0cfc7485adae4df949e87577f9d13e4d5 Signed-off-by: Jens Wiklander (cherry picked from commit 95ffe4ca43877eea176d7e95aa0d38bbdc3d2903) Signed-off-by: Victor Chong --- drivers/tee/optee/call.c | 6 ++++-- drivers/tee/optee/optee_private.h | 6 ++++-- drivers/tee/tee_shm.c | 2 +- include/linux/tee_drv.h | 3 ++- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 0f38b3827457..411bfab58628 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -536,7 +536,8 @@ void optee_free_pages_list(void *list, size_t num_entries) } int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, - struct page **pages, size_t num_pages) + struct page **pages, size_t num_pages, + unsigned long start) { struct tee_shm *shm_arg = NULL; struct optee_msg_arg *msg_arg; @@ -606,7 +607,8 @@ int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm) } int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, - struct page **pages, size_t num_pages) + struct page **pages, size_t num_pages, + unsigned long start) { /* * We don't want to register supplicant memory in OP-TEE. diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index a85a24725e31..35e79386c556 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -162,11 +162,13 @@ void optee_enable_shm_cache(struct optee *optee); void optee_disable_shm_cache(struct optee *optee); int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, - struct page **pages, size_t num_pages); + struct page **pages, size_t num_pages, + unsigned long start); int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm); int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, - struct page **pages, size_t num_pages); + struct page **pages, size_t num_pages, + unsigned long start); int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm); int optee_from_msg_param(struct tee_param *params, size_t num_params, diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index dd55e4735e3b..56ed03dccca0 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -299,7 +299,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, } rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, - shm->num_pages); + shm->num_pages, start); if (rc) { ret = ERR_PTR(rc); goto err; diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 41bd4bded28c..a2b3dfcee0b5 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -108,7 +108,8 @@ struct tee_driver_ops { int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, struct tee_param *param); int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm, - struct page **pages, size_t num_pages); + struct page **pages, size_t num_pages, + unsigned long start); int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm); }; -- GitLab From 61d36598e68072e8ba1ea87355ef63de31bd8e46 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 28 Dec 2017 11:14:05 +0100 Subject: [PATCH 1395/1398] BACKPORT: tee: optee: check type of registered shared memory Checks the memory type of the pages to be registered as shared memory. Only normal cached memory is allowed. Change-Id: I319740ef448e7d7ce31efd2b5456972da192fce3 Signed-off-by: Jens Wiklander (cherry picked from commit cdbcf83d29c1bf2aaa65260e74beaac1bcdc231c) Signed-off-by: Victor Chong --- drivers/tee/optee/call.c | 43 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 411bfab58628..a5afbe6dee68 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -535,6 +535,41 @@ void optee_free_pages_list(void *list, size_t num_entries) free_pages_exact(list, get_pages_list_size(num_entries)); } +static bool is_normal_memory(pgprot_t p) +{ +#if defined(CONFIG_ARM) + return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC; +#elif defined(CONFIG_ARM64) + return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL); +#else +#error "Unuspported architecture" +#endif +} + +static int __check_mem_type(struct vm_area_struct *vma, unsigned long end) +{ + while (vma && is_normal_memory(vma->vm_page_prot)) { + if (vma->vm_end >= end) + return 0; + vma = vma->vm_next; + } + + return -EINVAL; +} + +static int check_mem_type(unsigned long start, size_t num_pages) +{ + struct mm_struct *mm = current->mm; + int rc; + + down_read(&mm->mmap_sem); + rc = __check_mem_type(find_vma(mm, start), + start + num_pages * PAGE_SIZE); + up_read(&mm->mmap_sem); + + return rc; +} + int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, struct page **pages, size_t num_pages, unsigned long start) @@ -543,11 +578,15 @@ int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, struct optee_msg_arg *msg_arg; u64 *pages_list; phys_addr_t msg_parg; - int rc = 0; + int rc; if (!num_pages) return -EINVAL; + rc = check_mem_type(start, num_pages); + if (rc) + return rc; + pages_list = optee_allocate_pages_list(num_pages); if (!pages_list) return -ENOMEM; @@ -614,7 +653,7 @@ int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, * We don't want to register supplicant memory in OP-TEE. * Instead information about it will be passed in RPC code. */ - return 0; + return check_mem_type(start, num_pages); } int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm) -- GitLab From 6e76ed56d81e69adf9c81e6f7e61d5e5778081de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Dec 2017 17:01:22 +0000 Subject: [PATCH 1396/1398] BACKPORT: tee: shm: make function __tee_shm_alloc static The function __tee_shm_alloc is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol '__tee_shm_alloc' was not declared. Should it be static? Change-Id: I6bbd0f64c3a334f63ec6a834f43c3bc5830d7335 Signed-off-by: Colin Ian King Signed-off-by: Jens Wiklander (cherry picked from commit 80ec6f5de60b6934f145b2f7e5369592bcab85f3) Signed-off-by: Victor Chong --- drivers/tee/tee_shm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 56ed03dccca0..fd3ff05eb8ca 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -112,9 +112,9 @@ static const struct dma_buf_ops tee_shm_dma_buf_ops = { .mmap = tee_shm_op_mmap, }; -struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, - struct tee_device *teedev, - size_t size, u32 flags) +static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, + struct tee_device *teedev, + size_t size, u32 flags) { struct tee_shm_pool_mgr *poolm = NULL; struct tee_shm *shm; -- GitLab From 196cb35a3efaa6f669503fd80f1b30dede8869ba Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Dec 2017 17:51:50 +0000 Subject: [PATCH 1397/1398] BACKPORT: tee: shm: don't put_page on null shm->pages In the case that shm->pages fails to allocate, the current exit error path will try to put_page on a null shm->pages and cause a null pointer dereference when accessing shm->pages[n]. Fix this by only performing the put_page and kfree on shm->pages if it is not null. Detected by CoverityScan, CID#1463283 ("Dereference after null check") Change-Id: I9dbf1a729d784c1c13eabe0d70c48f1f12b2e5b3 Fixes: 033ddf12bcf5 ("tee: add register user memory") Signed-off-by: Colin Ian King Signed-off-by: Jens Wiklander (cherry picked from commit c94f31b526fe658c25dd2d07c90486a85437f01c) Signed-off-by: Victor Chong --- drivers/tee/tee_shm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index fd3ff05eb8ca..4f0ae2e98010 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -335,9 +335,11 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, idr_remove(&teedev->idr, shm->id); mutex_unlock(&teedev->mutex); } - for (n = 0; n < shm->num_pages; n++) - put_page(shm->pages[n]); - kfree(shm->pages); + if (shm->pages) { + for (n = 0; n < shm->num_pages; n++) + put_page(shm->pages[n]); + kfree(shm->pages); + } } kfree(shm); teedev_ctx_put(ctx); -- GitLab From 7595d550012d4a0b4b1fb40547495259a3674dc4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 Jan 2018 12:22:30 +0300 Subject: [PATCH 1398/1398] BACKPORT: tee: shm: Potential NULL dereference calling tee_shm_register() get_user_pages_fast() can return zero in certain error paths. We should handle that or else it means we accidentally return ERR_PTR(0) which is NULL instead of an error pointer. The callers are not expecting that and will crash with a NULL dereference. Change-Id: I5622affddeb1fc5e80ba6ccdbfdc77e129a2ca84 Fixes: 033ddf12bcf5 ("tee: add register user memory") Signed-off-by: Dan Carpenter Signed-off-by: Jens Wiklander (cherry picked from commit 2490cdf6435b1d3cac0dbf710cd752487c67c296) Signed-off-by: Victor Chong --- drivers/tee/tee_shm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 4f0ae2e98010..ed2d71c3337d 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -283,7 +283,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, if (rc > 0) shm->num_pages = rc; if (rc != num_pages) { - if (rc > 0) + if (rc >= 0) rc = -ENOMEM; ret = ERR_PTR(rc); goto err; -- GitLab