From 66ff25e225c39daba3923eccd8315e7c7f4bb0d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Feb 2020 13:47:07 -0800 Subject: [PATCH 0001/1278] f2fs: fix build error on PAGE_KERNEL_RO This fixes build error reported by kbuild test robot. tree: https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-stable.git linux-4.14.y head: 2945d197414d9732c680ea0b709735d3b0d8ea57 commit: f6574fbf6578e47cfa3cace486ca852979a1e433 [868/885] f2fs: support data compression config: mips-allyesconfig (attached as .config) compiler: mips-linux-gcc (GCC) 7.5.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout f6574fbf6578e47cfa3cace486ca852979a1e433 # save the attached .config to linux build tree GCC_VERSION=7.5.0 make.cross ARCH=mips If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): fs/f2fs/compress.c: In function 'f2fs_compress_pages': >> fs/f2fs/compress.c:359:56: error: 'PAGE_KERNEL_RO' undeclared (first use in this function); did you mean +'PAGE_KERNEL_NC'? cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO); ^~~~~~~~~~~~~~ PAGE_KERNEL_NC fs/f2fs/compress.c:359:56: note: each undeclared identifier is reported only once for each function it appears +in fs/f2fs/compress.c: In function 'f2fs_decompress_pages': fs/f2fs/compress.c:456:56: error: 'PAGE_KERNEL_RO' undeclared (first use in this function); did you mean +'PAGE_KERNEL_NC'? dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); ^~~~~~~~~~~~~~ PAGE_KERNEL_NC vim +359 fs/f2fs/compress.c Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d8a64be90a50..17e10c4cd880 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -16,6 +16,11 @@ #include "node.h" #include +/* Some architectures don't have PAGE_KERNEL_RO */ +#ifndef PAGE_KERNEL_RO +#define PAGE_KERNEL_RO PAGE_KERNEL +#endif + struct f2fs_compress_ops { int (*init_compress_ctx)(struct compress_ctx *cc); void (*destroy_compress_ctx)(struct compress_ctx *cc); -- GitLab From d4fdc94421725fbc9f58ef8d426d667f1bb0ac7e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 4 Dec 2018 10:31:27 -0800 Subject: [PATCH 0002/1278] keys: Export lookup_user_key to external users Export lookup_user_key() symbol in order to allow nvdimm passphrase update to retrieve user injected keys. Signed-off-by: Dave Jiang Acked-by: David Howells Signed-off-by: Dan Williams --- include/linux/key.h | 3 +++ security/keys/internal.h | 2 -- security/keys/process_keys.c | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index 8a15cabe928d..afe4d6b90cad 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -345,6 +345,9 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); +extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, + key_perm_t perm); + /* * The permissions required on a key that we're looking up. */ diff --git a/security/keys/internal.h b/security/keys/internal.h index 503adbae7b0d..2bdc05bed4f0 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -158,8 +158,6 @@ extern struct key *request_key_and_link(struct key_type *type, extern bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data); -extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); #define KEY_LOOKUP_CREATE 0x01 #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 740affd65ee9..0fac129b4fa2 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -755,6 +755,7 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, put_cred(ctx.cred); goto try_again; } +EXPORT_SYMBOL(lookup_user_key); /* * Join the named keyring as the session keyring if possible else attempt to -- GitLab From 35290265028f070dc8e0a79e855f0384b99ff1cc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 19 Nov 2019 14:24:47 -0800 Subject: [PATCH 0003/1278] fscrypt: support passing a keyring key to FS_IOC_ADD_ENCRYPTION_KEY Extend the FS_IOC_ADD_ENCRYPTION_KEY ioctl to allow the raw key to be specified by a Linux keyring key, rather than specified directly. This is useful because fscrypt keys belong to a particular filesystem instance, so they are destroyed when that filesystem is unmounted. Usually this is desired. But in some cases, userspace may need to unmount and re-mount the filesystem while keeping the keys, e.g. during a system update. This requires keeping the keys somewhere else too. The keys could be kept in memory in a userspace daemon. But depending on the security architecture and assumptions, it can be preferable to keep them only in kernel memory, where they are unreadable by userspace. We also can't solve this by going back to the original fscrypt API (where for each file, the master key was looked up in the process's keyring hierarchy) because that caused lots of problems of its own. Therefore, add the ability for FS_IOC_ADD_ENCRYPTION_KEY to accept a Linux keyring key. This solves the problem by allowing userspace to (if needed) save the keys securely in a Linux keyring for re-provisioning, while still using the new fscrypt key management ioctls. This is analogous to how dm-crypt accepts a Linux keyring key, but the key is then stored internally in the dm-crypt data structures rather than being looked up again each time the dm-crypt device is accessed. Use a custom key type "fscrypt-provisioning" rather than one of the existing key types such as "logon". This is strongly desired because it enforces that these keys are only usable for a particular purpose: for fscrypt as input to a particular KDF. Otherwise, the keys could also be passed to any kernel API that accepts a "logon" key with any service prefix, e.g. dm-crypt, UBIFS, or (recently proposed) AF_ALG. This would risk leaking information about the raw key despite it ostensibly being unreadable. Of course, this mistake has already been made for multiple kernel APIs; but since this is a new API, let's do it right. This patch has been tested using an xfstest which I wrote to test it. Link: https://lore.kernel.org/r/20191119222447.226853-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 35 ++++++- fs/crypto/keyring.c | 132 ++++++++++++++++++++++++-- include/uapi/linux/fscrypt.h | 13 ++- 3 files changed, 168 insertions(+), 12 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 471a511c7508..4ed9d58ea0ab 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -638,7 +638,8 @@ follows:: struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[8]; __u8 raw[]; }; @@ -655,6 +656,12 @@ follows:: } u; }; + struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; + }; + :c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized as follows: @@ -677,9 +684,26 @@ as follows: ``Documentation/security/keys/core.rst``). - ``raw_size`` must be the size of the ``raw`` key provided, in bytes. + Alternatively, if ``key_id`` is nonzero, this field must be 0, since + in that case the size is implied by the specified Linux keyring key. + +- ``key_id`` is 0 if the raw key is given directly in the ``raw`` + field. Otherwise ``key_id`` is the ID of a Linux keyring key of + type "fscrypt-provisioning" whose payload is a :c:type:`struct + fscrypt_provisioning_key_payload` whose ``raw`` field contains the + raw key and whose ``type`` field matches ``key_spec.type``. Since + ``raw`` is variable-length, the total size of this key's payload + must be ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the + raw key size. The process must have Search permission on this key. + + Most users should leave this 0 and specify the raw key directly. + The support for specifying a Linux keyring key is intended mainly to + allow re-adding keys after a filesystem is unmounted and re-mounted, + without having to store the raw keys in userspace memory. - ``raw`` is a variable-length field which must contain the actual - key, ``raw_size`` bytes long. + key, ``raw_size`` bytes long. Alternatively, if ``key_id`` is + nonzero, then this field is unused. For v2 policy keys, the kernel keeps track of which user (identified by effective user ID) added the key, and only allows the key to be @@ -701,11 +725,16 @@ FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors: - ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the caller does not have the CAP_SYS_ADMIN capability in the initial - user namespace + user namespace; or the raw key was specified by Linux key ID but the + process lacks Search permission on the key. - ``EDQUOT``: the key quota for this user would be exceeded by adding the key - ``EINVAL``: invalid key size or key specifier type, or reserved bits were set +- ``EKEYREJECTED``: the raw key was specified by Linux key ID, but the + key has the wrong type +- ``ENOKEY``: the raw key was specified by Linux key ID, but no key + exists with that ID - ``ENOTTY``: this type of filesystem does not implement encryption - ``EOPNOTSUPP``: the kernel was not configured with encryption support for this filesystem, or the filesystem superblock has not diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index ecbebdc1b02a..395aee2e6e4c 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -465,6 +465,109 @@ static int add_master_key(struct super_block *sb, return err; } +static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep) +{ + const struct fscrypt_provisioning_key_payload *payload = prep->data; + + if (prep->datalen < sizeof(*payload) + FSCRYPT_MIN_KEY_SIZE || + prep->datalen > sizeof(*payload) + FSCRYPT_MAX_KEY_SIZE) + return -EINVAL; + + if (payload->type != FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && + payload->type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) + return -EINVAL; + + if (payload->__reserved) + return -EINVAL; + + prep->payload.data[0] = kmemdup(payload, prep->datalen, GFP_KERNEL); + if (!prep->payload.data[0]) + return -ENOMEM; + + prep->quotalen = prep->datalen; + return 0; +} + +static void fscrypt_provisioning_key_free_preparse( + struct key_preparsed_payload *prep) +{ + kzfree(prep->payload.data[0]); +} + +static void fscrypt_provisioning_key_describe(const struct key *key, + struct seq_file *m) +{ + seq_puts(m, key->description); + if (key_is_positive(key)) { + const struct fscrypt_provisioning_key_payload *payload = + key->payload.data[0]; + + seq_printf(m, ": %u [%u]", key->datalen, payload->type); + } +} + +static void fscrypt_provisioning_key_destroy(struct key *key) +{ + kzfree(key->payload.data[0]); +} + +static struct key_type key_type_fscrypt_provisioning = { + .name = "fscrypt-provisioning", + .preparse = fscrypt_provisioning_key_preparse, + .free_preparse = fscrypt_provisioning_key_free_preparse, + .instantiate = generic_key_instantiate, + .describe = fscrypt_provisioning_key_describe, + .destroy = fscrypt_provisioning_key_destroy, +}; + +/* + * Retrieve the raw key from the Linux keyring key specified by 'key_id', and + * store it into 'secret'. + * + * The key must be of type "fscrypt-provisioning" and must have the field + * fscrypt_provisioning_key_payload::type set to 'type', indicating that it's + * only usable with fscrypt with the particular KDF version identified by + * 'type'. We don't use the "logon" key type because there's no way to + * completely restrict the use of such keys; they can be used by any kernel API + * that accepts "logon" keys and doesn't require a specific service prefix. + * + * The ability to specify the key via Linux keyring key is intended for cases + * where userspace needs to re-add keys after the filesystem is unmounted and + * re-mounted. Most users should just provide the raw key directly instead. + */ +static int get_keyring_key(u32 key_id, u32 type, + struct fscrypt_master_key_secret *secret) +{ + key_ref_t ref; + struct key *key; + const struct fscrypt_provisioning_key_payload *payload; + int err; + + ref = lookup_user_key(key_id, 0, KEY_NEED_SEARCH); + if (IS_ERR(ref)) + return PTR_ERR(ref); + key = key_ref_to_ptr(ref); + + if (key->type != &key_type_fscrypt_provisioning) + goto bad_key; + payload = key->payload.data[0]; + + /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */ + if (payload->type != type) + goto bad_key; + + secret->size = key->datalen - sizeof(*payload); + memcpy(secret->raw, payload->raw, secret->size); + err = 0; + goto out_put; + +bad_key: + err = -EKEYREJECTED; +out_put: + key_ref_put(ref); + return err; +} + /* * Add a master encryption key to the filesystem, causing all files which were * encrypted with it to appear "unlocked" (decrypted) when accessed. @@ -503,18 +606,25 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) if (!valid_key_spec(&arg.key_spec)) return -EINVAL; - if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || - arg.raw_size > FSCRYPT_MAX_KEY_SIZE) - return -EINVAL; - if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; memset(&secret, 0, sizeof(secret)); - secret.size = arg.raw_size; - err = -EFAULT; - if (copy_from_user(secret.raw, uarg->raw, secret.size)) - goto out_wipe_secret; + if (arg.key_id) { + if (arg.raw_size != 0) + return -EINVAL; + err = get_keyring_key(arg.key_id, arg.key_spec.type, &secret); + if (err) + goto out_wipe_secret; + } else { + if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || + arg.raw_size > FSCRYPT_MAX_KEY_SIZE) + return -EINVAL; + secret.size = arg.raw_size; + err = -EFAULT; + if (copy_from_user(secret.raw, uarg->raw, secret.size)) + goto out_wipe_secret; + } switch (arg.key_spec.type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: @@ -978,8 +1088,14 @@ int __init fscrypt_init_keyring(void) if (err) goto err_unregister_fscrypt; + err = register_key_type(&key_type_fscrypt_provisioning); + if (err) + goto err_unregister_fscrypt_user; + return 0; +err_unregister_fscrypt_user: + unregister_key_type(&key_type_fscrypt_user); err_unregister_fscrypt: unregister_key_type(&key_type_fscrypt); return err; diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 1beb174ad950..d5112a24e8b9 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -109,11 +109,22 @@ struct fscrypt_key_specifier { } u; }; +/* + * Payload of Linux keyring key of type "fscrypt-provisioning", referenced by + * fscrypt_add_key_arg::key_id as an alternative to fscrypt_add_key_arg::raw. + */ +struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; +}; + /* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[8]; __u8 raw[]; }; -- GitLab From 9c1b3af1a6869c1207e6941b5707ef2f3ff02d99 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:38:10 -0800 Subject: [PATCH 0004/1278] fscrypt: use crypto_skcipher_driver_name() Crypto API users shouldn't really be accessing struct skcipher_alg directly. already has a function crypto_skcipher_driver_name(), so use that instead. No change in behavior. Link: https://lore.kernel.org/r/20191209203810.225302-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 2f926d3e6b5d..0db5130b70de 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -89,8 +89,7 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, * first time a mode is used. */ pr_info("fscrypt: %s using implementation \"%s\"\n", - mode->friendly_name, - crypto_skcipher_alg(tfm)->base.cra_driver_name); + mode->friendly_name, crypto_skcipher_driver_name(tfm)); } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); -- GitLab From a7b6398dee530e0efe68a83ef5c16d15b7b6c646 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:39:18 -0800 Subject: [PATCH 0005/1278] fscrypt: verify that the crypto_skcipher has the correct ivsize As a sanity check, verify that the allocated crypto_skcipher actually has the ivsize that fscrypt is assuming it has. This will always be the case unless there's a bug. But if there ever is such a bug (e.g. like there was in earlier versions of the ESSIV conversion patch [1]) it's preferable for it to be immediately obvious, and not rely on the ciphertext verification tests failing due to uninitialized IV bytes. [1] https://lkml.kernel.org/linux-crypto/20190702215517.GA69157@gmail.com/ Link: https://lore.kernel.org/r/20191209203918.225691-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 0db5130b70de..9ced26a4a887 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -91,6 +91,10 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, pr_info("fscrypt: %s using implementation \"%s\"\n", mode->friendly_name, crypto_skcipher_driver_name(tfm)); } + if (WARN_ON(crypto_skcipher_ivsize(tfm) != mode->ivsize)) { + err = -EINVAL; + goto err_free_tfm; + } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); if (err) -- GitLab From 394222909cb26d8b087904e32406c281d721f9f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:40:54 -0800 Subject: [PATCH 0006/1278] fscrypt: constify struct fscrypt_hkdf parameter to fscrypt_hkdf_expand() Constify the struct fscrypt_hkdf parameter to fscrypt_hkdf_expand(). This makes it clearer that struct fscrypt_hkdf contains the key only, not any per-request state. Link: https://lore.kernel.org/r/20191209204054.227736-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 2 +- fs/crypto/hkdf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 130b50e5a011..23cef4d3793a 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -287,7 +287,7 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 -extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, +extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen); diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 2c026009c6e7..fd7f67628561 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -113,7 +113,7 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, * adds to its application-specific info strings to guarantee that it doesn't * accidentally repeat an info string when using HKDF for different purposes.) */ -int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, +int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen) { -- GitLab From 39a0accbdfd3897b7b5f6a38eac2e0f2f6f06131 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 15 Dec 2019 13:39:47 -0800 Subject: [PATCH 0007/1278] fscrypt: constify inode parameter to filename encryption functions Constify the struct inode parameter to fscrypt_fname_disk_to_usr() and the other filename encryption functions so that users don't have to pass in a non-const inode when they are dealing with a const one, as in [1]. [1] https://lkml.kernel.org/linux-ext4/20191203051049.44573-6-drosen@google.com/ Cc: Daniel Rosenberg Link: https://lore.kernel.org/r/20191215213947.9521-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fname.c | 20 ++++++++++---------- fs/crypto/fscrypt_private.h | 2 +- include/linux/fscrypt.h | 8 +++++--- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 3da3707c10e3..c87b71aa2353 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -34,12 +34,12 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) * * Return: 0 on success, -errno on failure */ -int fname_encrypt(struct inode *inode, const struct qstr *iname, +int fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; union fscrypt_iv iv; struct scatterlist sg; @@ -85,14 +85,14 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, * * Return: 0 on success, -errno on failure */ -static int fname_decrypt(struct inode *inode, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +static int fname_decrypt(const struct inode *inode, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; union fscrypt_iv iv; int res; @@ -247,10 +247,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * Return: 0 on success, -errno on failure */ -int fscrypt_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); struct fscrypt_digested_name digested_name; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 23cef4d3793a..5792ecbd4d24 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -260,7 +260,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci); /* fname.c */ -extern int fname_encrypt(struct inode *inode, const struct qstr *iname, +extern int fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen); extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 7f302250e52e..759e75364da3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -153,8 +153,10 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 @@ -433,7 +435,7 @@ static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) return; } -static inline int fscrypt_fname_disk_to_usr(struct inode *inode, +static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) -- GitLab From 387197777f2154d0299599f582c45416c661a0ad Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:43:59 -0800 Subject: [PATCH 0008/1278] fscrypt: move fscrypt_d_revalidate() to fname.c fscrypt_d_revalidate() and fscrypt_d_ops really belong in fname.c, since they're specific to filenames encryption. crypto.c is for contents encryption and general fs/crypto/ initialization and utilities. Link: https://lore.kernel.org/r/20191209204359.228544-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 50 ------------------------------------- fs/crypto/fname.c | 49 ++++++++++++++++++++++++++++++++++++ fs/crypto/fscrypt_private.h | 2 +- 3 files changed, 50 insertions(+), 51 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 6e6f39ea18a7..8a783d74137b 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include #include "fscrypt_private.h" @@ -285,54 +283,6 @@ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); -/* - * Validate dentries in encrypted directories to make sure we aren't potentially - * caching stale dentries after a key has been added. - */ -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct dentry *dir; - int err; - int valid; - - /* - * Plaintext names are always valid, since fscrypt doesn't support - * reverting to ciphertext names without evicting the directory's inode - * -- which implies eviction of the dentries in the directory. - */ - if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) - return 1; - - /* - * Ciphertext name; valid if the directory's key is still unavailable. - * - * Although fscrypt forbids rename() on ciphertext names, we still must - * use dget_parent() here rather than use ->d_parent directly. That's - * because a corrupted fs image may contain directory hard links, which - * the VFS handles by moving the directory's dentry tree in the dcache - * each time ->lookup() finds the directory and it already has a dentry - * elsewhere. Thus ->d_parent can be changing, and we must safely grab - * a reference to some ->d_parent to prevent it from being freed. - */ - - if (flags & LOOKUP_RCU) - return -ECHILD; - - dir = dget_parent(dentry); - err = fscrypt_get_encryption_info(d_inode(dir)); - valid = !fscrypt_has_encryption_key(d_inode(dir)); - dput(dir); - - if (err < 0) - return err; - - return valid; -} - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; - /** * fscrypt_initialize() - allocate major buffers for fs encryption. * @cop_flags: fscrypt operations flags diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index c87b71aa2353..3fd27e14ebdd 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -11,6 +11,7 @@ * This has not yet undergone a rigorous security audit. */ +#include #include #include #include "fscrypt_private.h" @@ -400,3 +401,51 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); + +/* + * Validate dentries in encrypted directories to make sure we aren't potentially + * caching stale dentries after a key has been added. + */ +static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct dentry *dir; + int err; + int valid; + + /* + * Plaintext names are always valid, since fscrypt doesn't support + * reverting to ciphertext names without evicting the directory's inode + * -- which implies eviction of the dentries in the directory. + */ + if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) + return 1; + + /* + * Ciphertext name; valid if the directory's key is still unavailable. + * + * Although fscrypt forbids rename() on ciphertext names, we still must + * use dget_parent() here rather than use ->d_parent directly. That's + * because a corrupted fs image may contain directory hard links, which + * the VFS handles by moving the directory's dentry tree in the dcache + * each time ->lookup() finds the directory and it already has a dentry + * elsewhere. Thus ->d_parent can be changing, and we must safely grab + * a reference to some ->d_parent to prevent it from being freed. + */ + + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dget_parent(dentry); + err = fscrypt_get_encryption_info(d_inode(dir)); + valid = !fscrypt_has_encryption_key(d_inode(dir)); + dput(dir); + + if (err < 0) + return err; + + return valid; +} + +const struct dentry_operations fscrypt_d_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 5792ecbd4d24..37c418d23962 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -233,7 +233,6 @@ extern int fscrypt_crypt_block(const struct inode *inode, unsigned int len, unsigned int offs, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); -extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); @@ -265,6 +264,7 @@ extern int fname_encrypt(const struct inode *inode, const struct qstr *iname, extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); +extern const struct dentry_operations fscrypt_d_ops; /* hkdf.c */ -- GitLab From bfa4ca6ee85a82274f5b0ed4782e95fa93f6a315 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:50:21 -0800 Subject: [PATCH 0009/1278] fscrypt: introduce fscrypt_needs_contents_encryption() Add a function fscrypt_needs_contents_encryption() which takes an inode and returns true if it's an encrypted regular file and the kernel was built with fscrypt support. This will allow replacing duplicated checks of IS_ENCRYPTED() && S_ISREG() on the I/O paths in ext4 and f2fs, while also optimizing out unneeded code when !CONFIG_FS_ENCRYPTION. Link: https://lore.kernel.org/r/20191209205021.231767-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 759e75364da3..dd67e7aa148f 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -72,6 +72,21 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return READ_ONCE(inode->i_crypt_info) != NULL; } +/** + * fscrypt_needs_contents_encryption() - check whether an inode needs + * contents encryption + * + * Return: %true iff the inode is an encrypted regular file and the kernel was + * built with fscrypt support. + * + * If you need to know whether the encrypt bit is set even when the kernel was + * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. + */ +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return inode->i_sb->s_cop->dummy_context && @@ -264,6 +279,11 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return false; } +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return false; +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return false; -- GitLab From 2454b5bb0df7a874c1bc849c33eab54ca05bfa48 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:26 -0800 Subject: [PATCH 0010/1278] fscrypt: split up fscrypt_supported_policy() by policy version Make fscrypt_supported_policy() call new functions fscrypt_supported_v1_policy() and fscrypt_supported_v2_policy(), to reduce the indentation level and make the code easier to read. Also adjust the function comment to mention that whether the encryption policy is supported can also depend on the inode. No change in behavior. Link: https://lore.kernel.org/r/20191209211829.239800-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/policy.c | 116 +++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 96f528071bed..fdb13ce69cd2 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -63,13 +63,65 @@ static bool supported_iv_ino_lblk_64_policy( return true; } +static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, + const struct inode *inode) +{ + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | + FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { + fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + return true; +} + +static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, + const struct inode *inode) +{ + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { + fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && + !supported_iv_ino_lblk_64_policy(policy, inode)) + return false; + + if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { + fscrypt_warn(inode, "Reserved bits set in encryption policy"); + return false; + } + + return true; +} + /** * fscrypt_supported_policy - check whether an encryption policy is supported * * Given an encryption policy, check whether all its encryption modes and other - * settings are supported by this kernel. (But we don't currently don't check - * for crypto API support here, so attempting to use an algorithm not configured - * into the crypto API will still fail later.) + * settings are supported by this kernel on the given inode. (But we don't + * currently don't check for crypto API support here, so attempting to use an + * algorithm not configured into the crypto API will still fail later.) * * Return: %true if supported, else %false */ @@ -77,60 +129,10 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode) { switch (policy_u->version) { - case FSCRYPT_POLICY_V1: { - const struct fscrypt_policy_v1 *policy = &policy_u->v1; - - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | - FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { - fscrypt_warn(inode, - "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - return true; - } - case FSCRYPT_POLICY_V2: { - const struct fscrypt_policy_v2 *policy = &policy_u->v2; - - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { - fscrypt_warn(inode, - "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && - !supported_iv_ino_lblk_64_policy(policy, inode)) - return false; - - if (memchr_inv(policy->__reserved, 0, - sizeof(policy->__reserved))) { - fscrypt_warn(inode, - "Reserved bits set in encryption policy"); - return false; - } - - return true; - } + case FSCRYPT_POLICY_V1: + return fscrypt_supported_v1_policy(&policy_u->v1, inode); + case FSCRYPT_POLICY_V2: + return fscrypt_supported_v2_policy(&policy_u->v2, inode); } return false; } -- GitLab From add6ac48dd8504a511a80195b96561454e1df784 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:27 -0800 Subject: [PATCH 0011/1278] fscrypt: check for appropriate use of DIRECT_KEY flag earlier FSCRYPT_POLICY_FLAG_DIRECT_KEY is currently only allowed with Adiantum encryption. But FS_IOC_SET_ENCRYPTION_POLICY allowed it in combination with other encryption modes, and an error wasn't reported until later when the encrypted directory was actually used. Fix it to report the error earlier by validating the correct use of the DIRECT_KEY flag in fscrypt_supported_policy(), similar to how we validate the IV_INO_LBLK_64 flag. Link: https://lore.kernel.org/r/20191209211829.239800-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 6 +----- fs/crypto/keysetup.c | 14 ++++---------- fs/crypto/keysetup_v1.c | 15 --------------- fs/crypto/policy.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 37c418d23962..41b061cdf06e 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -448,11 +448,7 @@ struct fscrypt_mode { int logged_impl_name; }; -static inline bool -fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) -{ - return mode->ivsize >= offsetofend(union fscrypt_iv, nonce); -} +extern struct fscrypt_mode fscrypt_modes[]; extern struct crypto_skcipher * fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 9ced26a4a887..573fdc0f480b 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -13,7 +13,7 @@ #include "fscrypt_private.h" -static struct fscrypt_mode available_modes[] = { +struct fscrypt_mode fscrypt_modes[] = { [FSCRYPT_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", @@ -51,10 +51,10 @@ select_encryption_mode(const union fscrypt_policy *policy, const struct inode *inode) { if (S_ISREG(inode->i_mode)) - return &available_modes[fscrypt_policy_contents_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_contents_mode(policy)]; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - return &available_modes[fscrypt_policy_fnames_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_fnames_mode(policy)]; WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", inode->i_ino, (inode->i_mode & S_IFMT)); @@ -129,7 +129,7 @@ static int setup_per_mode_key(struct fscrypt_info *ci, const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; - u8 mode_num = mode - available_modes; + const u8 mode_num = mode - fscrypt_modes; struct crypto_skcipher *tfm, *prev_tfm; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; @@ -189,12 +189,6 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * This ensures that the master key is consistently used only * for HKDF, avoiding key reuse issues. */ - if (!fscrypt_mode_supports_direct_key(ci->ci_mode)) { - fscrypt_warn(ci->ci_inode, - "Direct key flag not allowed with %s", - ci->ci_mode->friendly_name); - return -EINVAL; - } return setup_per_mode_key(ci, mk, mk->mk_direct_tfms, HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 454fb03fc30e..6b8815d7448d 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -253,23 +253,8 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) static int setup_v1_file_key_direct(struct fscrypt_info *ci, const u8 *raw_master_key) { - const struct fscrypt_mode *mode = ci->ci_mode; struct fscrypt_direct_key *dk; - if (!fscrypt_mode_supports_direct_key(mode)) { - fscrypt_warn(ci->ci_inode, - "Direct key mode not allowed with %s", - mode->friendly_name); - return -EINVAL; - } - - if (ci->ci_policy.v1.contents_encryption_mode != - ci->ci_policy.v1.filenames_encryption_mode) { - fscrypt_warn(ci->ci_inode, - "Direct key mode not allowed with different contents and filenames modes"); - return -EINVAL; - } - dk = fscrypt_get_direct_key(ci, raw_master_key); if (IS_ERR(dk)) return PTR_ERR(dk); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index fdb13ce69cd2..e785b00f19b3 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,6 +29,26 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } +static bool supported_direct_key_modes(const struct inode *inode, + u32 contents_mode, u32 filenames_mode) +{ + const struct fscrypt_mode *mode; + + if (contents_mode != filenames_mode) { + fscrypt_warn(inode, + "Direct key flag not allowed with different contents and filenames modes"); + return false; + } + mode = &fscrypt_modes[contents_mode]; + + if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { + fscrypt_warn(inode, "Direct key flag not allowed with %s", + mode->friendly_name); + return false; + } + return true; +} + static bool supported_iv_ino_lblk_64_policy( const struct fscrypt_policy_v2 *policy, const struct inode *inode) @@ -82,6 +102,11 @@ static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, return false; } + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && + !supported_direct_key_modes(inode, policy->contents_encryption_mode, + policy->filenames_encryption_mode)) + return false; + return true; } @@ -103,6 +128,11 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, return false; } + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && + !supported_direct_key_modes(inode, policy->contents_encryption_mode, + policy->filenames_encryption_mode)) + return false; + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && !supported_iv_ino_lblk_64_policy(policy, inode)) return false; -- GitLab From 19b132bac6d38d2cc164730fe57bf6fe8a3d92ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:28 -0800 Subject: [PATCH 0012/1278] fscrypt: move fscrypt_valid_enc_modes() to policy.c fscrypt_valid_enc_modes() is only used by policy.c, so move it to there. Also adjust the order of the checks to be more natural, matching the numerical order of the constants and also keeping AES-256 (the recommended default) first in the list. No change in behavior. Link: https://lore.kernel.org/r/20191209211829.239800-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 18 ------------------ fs/crypto/policy.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 41b061cdf06e..71f496fe7173 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -206,24 +206,6 @@ typedef enum { FS_ENCRYPT, } fscrypt_direction_t; -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FSCRYPT_MODE_AES_128_CBC && - filenames_mode == FSCRYPT_MODE_AES_128_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_AES_256_XTS && - filenames_mode == FSCRYPT_MODE_AES_256_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_ADIANTUM && - filenames_mode == FSCRYPT_MODE_ADIANTUM) - return true; - - return false; -} - /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index e785b00f19b3..f1cff83c151a 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,6 +29,23 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } +static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) +{ + if (contents_mode == FSCRYPT_MODE_AES_256_XTS && + filenames_mode == FSCRYPT_MODE_AES_256_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_AES_128_CBC && + filenames_mode == FSCRYPT_MODE_AES_128_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_ADIANTUM && + filenames_mode == FSCRYPT_MODE_ADIANTUM) + return true; + + return false; +} + static bool supported_direct_key_modes(const struct inode *inode, u32 contents_mode, u32 filenames_mode) { -- GitLab From b21b79d7fe40880451189826917daecd3402f6b3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:29 -0800 Subject: [PATCH 0013/1278] fscrypt: remove fscrypt_is_direct_key_policy() fscrypt_is_direct_key_policy() is no longer used, so remove it. Link: https://lore.kernel.org/r/20191209211829.239800-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 71f496fe7173..b22e8decebed 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -136,12 +136,6 @@ fscrypt_policy_flags(const union fscrypt_policy *policy) BUG(); } -static inline bool -fscrypt_is_direct_key_policy(const union fscrypt_policy *policy) -{ - return fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAG_DIRECT_KEY; -} - /** * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. -- GitLab From 8842133ff32ea9fbd138aecebfc45e27102ea066 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:23:48 -0800 Subject: [PATCH 0014/1278] fscrypt: don't check for ENOKEY from fscrypt_get_encryption_info() fscrypt_get_encryption_info() returns 0 if the encryption key is unavailable; it never returns ENOKEY. So remove checks for ENOKEY. Link: https://lore.kernel.org/r/20191209212348.243331-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ext4/dir.c | 2 +- fs/f2fs/dir.c | 2 +- fs/ubifs/dir.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 7219f19710c2..b30052b61c8f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -115,7 +115,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) return err; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1972638165fd..331c90556a0f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1001,7 +1001,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 9cbce7a9c31c..f7057c320a33 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -537,7 +537,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) if (encrypted) { err = fscrypt_get_encryption_info(dir); - if (err && err != -ENOKEY) + if (err) return err; err = fscrypt_fname_alloc_buffer(dir, UBIFS_MAX_NLEN, &fstr); -- GitLab From 737ae902586bbb09549e4604e8c87bee5a797622 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 19 Dec 2019 10:56:24 -0800 Subject: [PATCH 0015/1278] fscrypt: include in UAPI header defines ioctl numbers using the macros like _IOWR() which are defined in , so should be included as a prerequisite, like it is in many other kernel headers. In practice this doesn't really matter since anyone referencing these ioctl numbers will almost certainly include too in order to actually call ioctl(). But we might as well fix this. Link: https://lore.kernel.org/r/20191219185624.21251-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/uapi/linux/fscrypt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index d5112a24e8b9..0d8a6f47711c 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -8,6 +8,7 @@ #ifndef _UAPI_LINUX_FSCRYPT_H #define _UAPI_LINUX_FSCRYPT_H +#include #include /* Encryption policy flags */ -- GitLab From 04f51847e5ca898e84a433da5576a5746d0592ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 27 Dec 2019 10:47:00 +0800 Subject: [PATCH 0016/1278] fscrypt: Allow modular crypto algorithms The commit 643fa9612bf1 ("fscrypt: remove filesystem specific build config option") removed modular support for fs/crypto. This causes the Crypto API to be built-in whenever fscrypt is enabled. This makes it very difficult for me to test modular builds of the Crypto API without disabling fscrypt which is a pain. As fscrypt is still evolving and it's developing new ties with the fs layer, it's hard to build it as a module for now. However, the actual algorithms are not required until a filesystem is mounted. Therefore we can allow them to be built as modules. Signed-off-by: Herbert Xu Link: https://lore.kernel.org/r/20191227024700.7vrzuux32uyfdgum@gondor.apana.org.au Signed-off-by: Eric Biggers --- fs/crypto/Kconfig | 21 ++++++++++++++------- fs/ext4/Kconfig | 1 + fs/f2fs/Kconfig | 1 + fs/ubifs/Kconfig | 1 + 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 4bc66f2c571e..d0a0238e37dd 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,13 +1,8 @@ config FS_ENCRYPTION bool "FS Encryption (Per-file encryption)" select CRYPTO - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_ECB - select CRYPTO_XTS - select CRYPTO_CTS - select CRYPTO_SHA512 - select CRYPTO_HMAC + select CRYPTO_HASH + select CRYPTO_BLKCIPHER select KEYS help Enable encryption of files and directories. This @@ -15,3 +10,15 @@ config FS_ENCRYPTION efficient since it avoids caching the encrypted and decrypted pages in the page cache. Currently Ext4, F2FS and UBIFS make use of this feature. + +# Filesystems supporting encryption must select this if FS_ENCRYPTION. This +# allows the algorithms to be built as modules when all the filesystems are. +config FS_ENCRYPTION_ALGS + tristate + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_CTS + select CRYPTO_ECB + select CRYPTO_HMAC + select CRYPTO_SHA512 + select CRYPTO_XTS diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e2cfd33fd759..39cd2c054339 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,6 +37,7 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help This is the next generation of the ext3 filesystem. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 9bb02d446d44..8f2019d17f34 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -5,6 +5,7 @@ config F2FS_FS select CRYPTO select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index dfc6fdf019d7..fe221d7d99d6 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -7,6 +7,7 @@ config UBIFS_FS select CRYPTO if UBIFS_FS_ZLIB select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION depends on MTD_UBI help UBIFS is a file system for flash devices which works on top of UBI. -- GitLab From 1c88eea96e762f274921259e3438c003d6cf26bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:45:09 -0800 Subject: [PATCH 0017/1278] fscrypt: remove redundant bi_status check submit_bio_wait() already returns bi_status translated to an errno. So the additional check of bi_status is redundant and can be removed. Link: https://lore.kernel.org/r/20191209204509.228942-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 4a7f4d78ef90..3548ab118cf9 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -77,8 +77,6 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, goto errout; } err = submit_bio_wait(bio); - if (err == 0 && bio->bi_status) - err = -EIO; bio_put(bio); if (err) goto errout; -- GitLab From bee5bd5b8f2ec1010f53d66aa26ac9b23dcdf271 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 26 Dec 2019 10:08:13 -0600 Subject: [PATCH 0018/1278] fscrypt: optimize fscrypt_zeroout_range() Currently fscrypt_zeroout_range() issues and waits on a bio for each block it writes, which makes it very slow. Optimize it to write up to 16 pages at a time instead. Also add a function comment, and improve reliability by allowing the allocations of the bio and the first ciphertext page to wait on the corresponding mempools. Link: https://lore.kernel.org/r/20191226160813.53182-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 112 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 3548ab118cf9..d7b1ce2aa307 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -41,51 +41,101 @@ void fscrypt_decrypt_bio(struct bio *bio) } EXPORT_SYMBOL(fscrypt_decrypt_bio); +/** + * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file + * @inode: the file's inode + * @lblk: the first file logical block to zero out + * @pblk: the first filesystem physical block to zero out + * @len: number of blocks to zero out + * + * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write + * ciphertext blocks which decrypt to the all-zeroes block. The blocks must be + * both logically and physically contiguous. It's also assumed that the + * filesystem only uses a single block device, ->s_bdev. + * + * Note that since each block uses a different IV, this involves writing a + * different ciphertext to each block; we can't simply reuse the same one. + * + * Return: 0 on success; -errno on failure. + */ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) + sector_t pblk, unsigned int len) { const unsigned int blockbits = inode->i_blkbits; const unsigned int blocksize = 1 << blockbits; - struct page *ciphertext_page; + const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; + const unsigned int blocks_per_page = 1 << blocks_per_page_bits; + struct page *pages[16]; /* write up to 16 pages at a time */ + unsigned int nr_pages; + unsigned int i; + unsigned int offset; struct bio *bio; - int ret, err = 0; + int ret, err; - ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); - if (!ciphertext_page) - return -ENOMEM; + if (len == 0) + return 0; - while (len--) { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - blocksize, 0, GFP_NOFS); - if (err) - goto errout; + BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES); + nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), + (len + blocks_per_page - 1) >> blocks_per_page_bits); - bio = bio_alloc(GFP_NOWAIT, 1); - if (!bio) { - err = -ENOMEM; - goto errout; - } + /* + * We need at least one page for ciphertext. Allocate the first one + * from a mempool, with __GFP_DIRECT_RECLAIM set so that it can't fail. + * + * Any additional page allocations are allowed to fail, as they only + * help performance, and waiting on the mempool for them could deadlock. + */ + for (i = 0; i < nr_pages; i++) { + pages[i] = fscrypt_alloc_bounce_page(i == 0 ? GFP_NOFS : + GFP_NOWAIT | __GFP_NOWARN); + if (!pages[i]) + break; + } + nr_pages = i; + if (WARN_ON(nr_pages <= 0)) + return -EINVAL; + + /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ + bio = bio_alloc(GFP_NOFS, nr_pages); + + do { bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = bio_add_page(bio, ciphertext_page, blocksize, 0); - if (WARN_ON(ret != blocksize)) { - /* should never happen! */ - bio_put(bio); - err = -EIO; - goto errout; - } + + i = 0; + offset = 0; + do { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), pages[i], + blocksize, offset, GFP_NOFS); + if (err) + goto out; + lblk++; + pblk++; + len--; + offset += blocksize; + if (offset == PAGE_SIZE || len == 0) { + ret = bio_add_page(bio, pages[i++], offset, 0); + if (WARN_ON(ret != offset)) { + err = -EIO; + goto out; + } + offset = 0; + } + } while (i != nr_pages && len != 0); + err = submit_bio_wait(bio); - bio_put(bio); if (err) - goto errout; - lblk++; - pblk++; - } + goto out; + bio_reset(bio); + } while (len != 0); err = 0; -errout: - fscrypt_free_bounce_page(ciphertext_page); +out: + bio_put(bio); + for (i = 0; i < nr_pages; i++) + fscrypt_free_bounce_page(pages[i]); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); -- GitLab From 9c5c8c523222e52b2f2b8fce4dabf8a64da671c3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 12:10:26 -0600 Subject: [PATCH 0019/1278] fscrypt: document gfp_flags for bounce page allocation Document that fscrypt_encrypt_pagecache_blocks() allocates the bounce page from a mempool, and document what this means for the @gfp_flags argument. Link: https://lore.kernel.org/r/20191231181026.47400-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 8a783d74137b..dcbd507824b7 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -137,7 +137,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * multiple of the filesystem's block size. * @offs: Byte offset within @page of the first block to encrypt. Must be * a multiple of the filesystem's block size. - * @gfp_flags: Memory allocation flags + * @gfp_flags: Memory allocation flags. See details below. * * A new bounce page is allocated, and the specified block(s) are encrypted into * it. In the bounce page, the ciphertext block(s) will be located at the same @@ -147,6 +147,11 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * * This is for use by the filesystem's ->writepages() method. * + * The bounce page allocation is mempool-backed, so it will always succeed when + * @gfp_flags includes __GFP_DIRECT_RECLAIM, e.g. when it's GFP_NOFS. However, + * only the first page of each bio can be allocated this way. To prevent + * deadlocks, for any additional pages a mask like GFP_NOWAIT must be used. + * * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, -- GitLab From 85b9c3e49199fe0d307cfba9f59df391e0a8510f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 19 Jan 2020 22:07:32 -0800 Subject: [PATCH 0020/1278] fscrypt: don't print name of busy file when removing key When an encryption key can't be fully removed due to file(s) protected by it still being in-use, we shouldn't really print the path to one of these files to the kernel log, since parts of this path are likely to be encrypted on-disk, and (depending on how the system is set up) the confidentiality of this path might be lost by printing it to the log. This is a trade-off: a single file path often doesn't matter at all, especially if it's a directory; the kernel log might still be protected in some way; and I had originally hoped that any "inode(s) still busy" bugs (which are security weaknesses in their own right) would be quickly fixed and that to do so it would be super helpful to always know the file path and not have to run 'find dir -inum $inum' after the fact. But in practice, these bugs can be hard to fix (e.g. due to asynchronous process killing that is difficult to eliminate, for performance reasons), and also not tied to specific files, so knowing a file path doesn't necessarily help. So to be safe, for now let's just show the inode number, not the path. If someone really wants to know a path they can use 'find -inum'. Fixes: b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") Cc: # v5.4+ Link: https://lore.kernel.org/r/20200120060732.390362-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 395aee2e6e4c..35ffabfffbf8 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -776,9 +776,6 @@ static int check_for_busy_inodes(struct super_block *sb, struct list_head *pos; size_t busy_count = 0; unsigned long ino; - struct dentry *dentry; - char _path[256]; - char *path = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); @@ -797,22 +794,14 @@ static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; - dentry = d_find_alias(inode); } spin_unlock(&mk->mk_decrypted_inodes_lock); - if (dentry) { - path = dentry_path(dentry, _path, sizeof(_path)); - dput(dentry); - } - if (IS_ERR_OR_NULL(path)) - path = "(unknown)"; - fscrypt_warn(NULL, - "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", + "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, - ino, path); + ino); return -EBUSY; } -- GitLab From 0bc68c180e8e56afe74a5c9ae455165414d61373 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 19 Jan 2020 23:17:36 -0800 Subject: [PATCH 0021/1278] fscrypt: add "fscrypt_" prefix to fname_encrypt() fname_encrypt() is a global function, due to being used in both fname.c and hooks.c. So it should be prefixed with "fscrypt_", like all the other global functions in fs/crypto/. Link: https://lore.kernel.org/r/20200120071736.45915-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fname.c | 10 +++++----- fs/crypto/fscrypt_private.h | 5 +++-- fs/crypto/hooks.c | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 3fd27e14ebdd..4614e4969736 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -28,15 +28,15 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) } /** - * fname_encrypt() - encrypt a filename + * fscrypt_fname_encrypt() - encrypt a filename * * The output buffer must be at least as large as the input buffer. * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -int fname_encrypt(const struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen) +int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); @@ -343,8 +343,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (!fname->crypto_buf.name) return -ENOMEM; - ret = fname_encrypt(dir, iname, fname->crypto_buf.name, - fname->crypto_buf.len); + ret = fscrypt_fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index b22e8decebed..fea7f5547428 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -235,8 +235,9 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci); /* fname.c */ -extern int fname_encrypt(const struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen); +extern int fscrypt_fname_encrypt(const struct inode *inode, + const struct qstr *iname, + u8 *out, unsigned int olen); extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 30b1ca661249..a0723fcd77a3 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -187,7 +187,8 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, ciphertext_len = disk_link->len - sizeof(*sd); sd->len = cpu_to_le16(ciphertext_len); - err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path, + ciphertext_len); if (err) goto err_free_sd; -- GitLab From e16d8494ecc69348badaaef8631040d40ef1e91d Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:31:56 -0800 Subject: [PATCH 0022/1278] fscrypt: don't allow v1 policies with casefolding Casefolded encrypted directories will use a new dirhash method that requires a secret key. If the directory uses a v2 encryption policy, it's easy to derive this key from the master key using HKDF. However, v1 encryption policies don't provide a way to derive additional keys. Therefore, don't allow casefolding on directories that use a v1 policy. Specifically, make it so that trying to enable casefolding on a directory that has a v1 policy fails, trying to set a v1 policy on a casefolded directory fails, and trying to open a casefolded directory that has a v1 policy (if one somehow exists on-disk) fails. Signed-off-by: Daniel Rosenberg [EB: improved commit message, updated fscrypt.rst, and other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 4 +++- fs/crypto/hooks.c | 28 +++++++++++++++++++++++++++ fs/crypto/policy.c | 7 +++++++ fs/inode.c | 3 ++- include/linux/fscrypt.h | 9 +++++++++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 4ed9d58ea0ab..9514bef7e99e 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -513,7 +513,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - ``EEXIST``: the file is already encrypted with an encryption policy different from the one specified - ``EINVAL``: an invalid encryption policy was specified (invalid - version, mode(s), or flags; or reserved bits were set) + version, mode(s), or flags; or reserved bits were set); or a v1 + encryption policy was specified but the directory has the casefold + flag enabled (casefolding is incompatible with v1 policies). - ``ENOKEY``: a v2 encryption policy was specified, but the key with the specified ``master_key_identifier`` has not been added, nor does the process have the CAP_FOWNER capability in the initial user diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index a0723fcd77a3..3312d9ac1143 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -121,6 +121,34 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); +/** + * fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS + * @inode: the inode on which flags are being changed + * @oldflags: the old flags + * @flags: the new flags + * + * The caller should be holding i_rwsem for write. + * + * Return: 0 on success; -errno if the flags change isn't allowed or if + * another error occurs. + */ +int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags) +{ + struct fscrypt_info *ci; + int err; + + if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { + err = fscrypt_require_key(inode); + if (err) + return err; + ci = inode->i_crypt_info; + if (ci->ci_policy.version != FSCRYPT_POLICY_V2) + return -EINVAL; + } + return 0; +} + int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index f1cff83c151a..cf2a9d26ef7d 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -124,6 +124,13 @@ static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, policy->filenames_encryption_mode)) return false; + if (IS_CASEFOLDED(inode)) { + /* With v1, there's no way to derive dirhash keys. */ + fscrypt_warn(inode, + "v1 policies can't be used on casefolded directories"); + return false; + } + return true; } diff --git a/fs/inode.c b/fs/inode.c index 95fece639652..01ed6d0a4a5c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -2146,7 +2147,7 @@ int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - return 0; + return fscrypt_prepare_setflags(inode, oldflags, flags); } EXPORT_SYMBOL(vfs_ioc_setflags_prepare); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index dd67e7aa148f..f5a8293964bf 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -263,6 +263,8 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); +extern int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); @@ -514,6 +516,13 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, + unsigned int flags) +{ + return 0; +} + static inline int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, -- GitLab From 7e2503236b61e3005c4c453b9dd22f05f517956f Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:31:57 -0800 Subject: [PATCH 0023/1278] fscrypt: derive dirhash key for casefolded directories When we allow indexed directories to use both encryption and casefolding, for the dirhash we can't just hash the ciphertext filenames that are stored on-disk (as is done currently) because the dirhash must be case insensitive, but the stored names are case-preserving. Nor can we hash the plaintext names with an unkeyed hash (or a hash keyed with a value stored on-disk like ext4's s_hash_seed), since that would leak information about the names that encryption is meant to protect. Instead, if we can accept a dirhash that's only computable when the fscrypt key is available, we can hash the plaintext names with a keyed hash using a secret key derived from the directory's fscrypt master key. We'll use SipHash-2-4 for this purpose. Prepare for this by deriving a SipHash key for each casefolded encrypted directory. Make sure to handle deriving the key not only when setting up the directory's fscrypt_info, but also in the case where the casefold flag is enabled after the fscrypt_info was already set up. (We could just always derive the key regardless of casefolding, but that would introduce unnecessary overhead for people not using casefolding.) Signed-off-by: Daniel Rosenberg [EB: improved commit message, updated fscrypt.rst, squashed with change that avoids unnecessarily deriving the key, and many other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 10 +++++ fs/crypto/fname.c | 21 +++++++++++ fs/crypto/fscrypt_private.h | 13 +++++++ fs/crypto/hooks.c | 16 ++++++++ fs/crypto/keysetup.c | 54 ++++++++++++++++++++------- include/linux/fscrypt.h | 10 +++++ 6 files changed, 110 insertions(+), 14 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 9514bef7e99e..a737503f8062 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -302,6 +302,16 @@ For master keys used for v2 encryption policies, a unique 16-byte "key identifier" is also derived using the KDF. This value is stored in the clear, since it is needed to reliably identify the key itself. +Dirhash keys +------------ + +For directories that are indexed using a secret-keyed dirhash over the +plaintext filenames, the KDF is also used to derive a 128-bit +SipHash-2-4 key per directory in order to hash filenames. This works +just like deriving a per-file encryption key, except that a different +KDF context is used. Currently, only casefolded ("case-insensitive") +encrypted directories use this style of hashing. + Encryption modes and usage ========================== diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 4614e4969736..851d2082ecfe 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -402,6 +402,27 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, } EXPORT_SYMBOL(fscrypt_setup_filename); +/** + * fscrypt_fname_siphash() - calculate the SipHash of a filename + * @dir: the parent directory + * @name: the filename to calculate the SipHash of + * + * Given a plaintext filename @name and a directory @dir which uses SipHash as + * its dirhash method and has had its fscrypt key set up, this function + * calculates the SipHash of that name using the directory's secret dirhash key. + * + * Return: the SipHash of @name using the hash key of @dir + */ +u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) +{ + const struct fscrypt_info *ci = dir->i_crypt_info; + + WARN_ON(!ci->ci_dirhash_key_initialized); + + return siphash(name->name, name->len, &ci->ci_dirhash_key); +} +EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); + /* * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index fea7f5547428..81dbb2befe81 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -12,6 +12,7 @@ #define _FSCRYPT_PRIVATE_H #include +#include #include #define CONST_STRLEN(str) (sizeof(str) - 1) @@ -188,6 +189,14 @@ struct fscrypt_info { */ struct fscrypt_direct_key *ci_direct_key; + /* + * This inode's hash key for filenames. This is a 128-bit SipHash-2-4 + * key. This is only set for directories that use a keyed dirhash over + * the plaintext filenames -- currently just casefolded directories. + */ + siphash_key_t ci_dirhash_key; + bool ci_dirhash_key_initialized; + /* The encryption policy used by this inode */ union fscrypt_policy ci_policy; @@ -263,6 +272,7 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, #define HKDF_CONTEXT_PER_FILE_KEY 2 #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 +#define HKDF_CONTEXT_DIRHASH_KEY 5 extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, @@ -434,6 +444,9 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, extern int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key); +extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, + const struct fscrypt_master_key *mk); + /* keysetup_v1.c */ extern void fscrypt_put_direct_key(struct fscrypt_direct_key *dk); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 3312d9ac1143..4ca167017d67 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -4,6 +4,8 @@ * Encryption hooks for higher-level filesystem operations. */ +#include + #include "fscrypt_private.h" /** @@ -136,8 +138,14 @@ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_info *ci; + struct fscrypt_master_key *mk; int err; + /* + * When the CASEFOLD flag is set on an encrypted directory, we must + * derive the secret key needed for the dirhash. This is only possible + * if the directory uses a v2 encryption policy. + */ if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { err = fscrypt_require_key(inode); if (err) @@ -145,6 +153,14 @@ int fscrypt_prepare_setflags(struct inode *inode, ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; + mk = ci->ci_master_key->payload.data[0]; + down_read(&mk->mk_secret_sem); + if (is_master_key_secret_present(&mk->mk_secret)) + err = fscrypt_derive_dirhash_key(ci, mk); + else + err = -ENOKEY; + up_read(&mk->mk_secret_sem); + return err; } return 0; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 573fdc0f480b..258c060b37e2 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -174,10 +174,24 @@ static int setup_per_mode_key(struct fscrypt_info *ci, return 0; } +int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, + const struct fscrypt_master_key *mk) +{ + int err; + + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_DIRHASH_KEY, + ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, + (u8 *)&ci->ci_dirhash_key, + sizeof(ci->ci_dirhash_key)); + if (err) + return err; + ci->ci_dirhash_key_initialized = true; + return 0; +} + static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, struct fscrypt_master_key *mk) { - u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; int err; if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { @@ -189,8 +203,8 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * This ensures that the master key is consistently used only * for HKDF, avoiding key reuse issues. */ - return setup_per_mode_key(ci, mk, mk->mk_direct_tfms, - HKDF_CONTEXT_DIRECT_KEY, false); + err = setup_per_mode_key(ci, mk, mk->mk_direct_tfms, + HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { /* @@ -199,21 +213,33 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ - return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, - HKDF_CONTEXT_IV_INO_LBLK_64_KEY, - true); + err = setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); + } else { + u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; + + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + HKDF_CONTEXT_PER_FILE_KEY, + ci->ci_nonce, + FS_KEY_DERIVATION_NONCE_SIZE, + derived_key, ci->ci_mode->keysize); + if (err) + return err; + + err = fscrypt_set_derived_key(ci, derived_key); + memzero_explicit(derived_key, ci->ci_mode->keysize); } - - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_FILE_KEY, - ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, - derived_key, ci->ci_mode->keysize); if (err) return err; - err = fscrypt_set_derived_key(ci, derived_key); - memzero_explicit(derived_key, ci->ci_mode->keysize); - return err; + /* Derive a secret dirhash key for directories that need it. */ + if (S_ISDIR(ci->ci_inode->i_mode) && IS_CASEFOLDED(ci->ci_inode)) { + err = fscrypt_derive_dirhash_key(ci, mk); + if (err) + return err; + } + + return 0; } /* diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index f5a8293964bf..ccdee616e7e5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -247,6 +247,9 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } +extern u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name); + /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, @@ -474,6 +477,13 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } +static inline u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name) +{ + WARN_ON_ONCE(1); + return 0; +} + /* bio.c */ static inline void fscrypt_decrypt_bio(struct bio *bio) { -- GitLab From 216d8cabb6b8482796ceb2878e803c92e6df8fb8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:31:58 -0800 Subject: [PATCH 0024/1278] fscrypt: clarify what is meant by a per-file key Now that there's sometimes a second type of per-file key (the dirhash key), clarify some function names, macros, and documentation that specifically deal with per-file *encryption* keys. Link: https://lore.kernel.org/r/20200120223201.241390-4-ebiggers@kernel.org Reviewed-by: Daniel Rosenberg Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 24 ++++++++--------- fs/crypto/fscrypt_private.h | 6 ++--- fs/crypto/keysetup.c | 39 ++++++++++++++------------- fs/crypto/keysetup_v1.c | 4 +-- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index a737503f8062..28524dcc51c9 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -234,8 +234,8 @@ HKDF is more flexible, is nonreversible, and evenly distributes entropy from the master key. HKDF is also standardized and widely used by other software, whereas the AES-128-ECB based KDF is ad-hoc. -Per-file keys -------------- +Per-file encryption keys +------------------------ Since each master key can protect many files, it is necessary to "tweak" the encryption of each file so that the same plaintext in two @@ -268,9 +268,9 @@ is greater than that of an AES-256-XTS key. Therefore, to improve performance and save memory, for Adiantum a "direct key" configuration is supported. When the user has enabled this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy, -per-file keys are not used. Instead, whenever any data (contents or -filenames) is encrypted, the file's 16-byte nonce is included in the -IV. Moreover: +per-file encryption keys are not used. Instead, whenever any data +(contents or filenames) is encrypted, the file's 16-byte nonce is +included in the IV. Moreover: - For v1 encryption policies, the encryption is done directly with the master key. Because of this, users **must not** use the same master @@ -335,11 +335,11 @@ used. Adiantum is a (primarily) stream cipher-based mode that is fast even on CPUs without dedicated crypto instructions. It's also a true wide-block mode, unlike XTS. It can also eliminate the need to derive -per-file keys. However, it depends on the security of two primitives, -XChaCha12 and AES-256, rather than just one. See the paper -"Adiantum: length-preserving encryption for entry-level processors" -(https://eprint.iacr.org/2018/720.pdf) for more details. To use -Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast +per-file encryption keys. However, it depends on the security of two +primitives, XChaCha12 and AES-256, rather than just one. See the +paper "Adiantum: length-preserving encryption for entry-level +processors" (https://eprint.iacr.org/2018/720.pdf) for more details. +To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast implementations of ChaCha and NHPoly1305 should be enabled, e.g. CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM. @@ -1149,8 +1149,8 @@ The context structs contain the same information as the corresponding policy structs (see `Setting an encryption policy`_), except that the context structs also contain a nonce. The nonce is randomly generated by the kernel and is used as KDF input or as a tweak to cause -different files to be encrypted differently; see `Per-file keys`_ and -`DIRECT_KEY policies`_. +different files to be encrypted differently; see `Per-file encryption +keys`_ and `DIRECT_KEY policies`_. Data path changes ----------------- diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 81dbb2befe81..9aae851409e5 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -269,7 +269,7 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, * output doesn't reveal another. */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 -#define HKDF_CONTEXT_PER_FILE_KEY 2 +#define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 #define HKDF_CONTEXT_DIRHASH_KEY 5 @@ -441,8 +441,8 @@ extern struct crypto_skcipher * fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, const struct inode *inode); -extern int fscrypt_set_derived_key(struct fscrypt_info *ci, - const u8 *derived_key); +extern int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, + const u8 *raw_key); extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, const struct fscrypt_master_key *mk); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 258c060b37e2..f9ab21c778e1 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -107,12 +107,12 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, return ERR_PTR(err); } -/* Given the per-file key, set up the file's crypto transform object */ -int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) +/* Given a per-file encryption key, set up the file's crypto transform object */ +int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) { struct crypto_skcipher *tfm; - tfm = fscrypt_allocate_skcipher(ci->ci_mode, derived_key, ci->ci_inode); + tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); if (IS_ERR(tfm)) return PTR_ERR(tfm); @@ -121,10 +121,10 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) return 0; } -static int setup_per_mode_key(struct fscrypt_info *ci, - struct fscrypt_master_key *mk, - struct crypto_skcipher **tfms, - u8 hkdf_context, bool include_fs_uuid) +static int setup_per_mode_enc_key(struct fscrypt_info *ci, + struct fscrypt_master_key *mk, + struct crypto_skcipher **tfms, + u8 hkdf_context, bool include_fs_uuid) { const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; @@ -196,15 +196,15 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { /* - * DIRECT_KEY: instead of deriving per-file keys, the per-file - * nonce will be included in all the IVs. But unlike v1 - * policies, for v2 policies in this case we don't encrypt with - * the master key directly but rather derive a per-mode key. - * This ensures that the master key is consistently used only - * for HKDF, avoiding key reuse issues. + * DIRECT_KEY: instead of deriving per-file encryption keys, the + * per-file nonce will be included in all the IVs. But unlike + * v1 policies, for v2 policies in this case we don't encrypt + * with the master key directly but rather derive a per-mode + * encryption key. This ensures that the master key is + * consistently used only for HKDF, avoiding key reuse issues. */ - err = setup_per_mode_key(ci, mk, mk->mk_direct_tfms, - HKDF_CONTEXT_DIRECT_KEY, false); + err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_tfms, + HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { /* @@ -213,20 +213,21 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ - err = setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, - HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); + err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, + true); } else { u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_FILE_KEY, + HKDF_CONTEXT_PER_FILE_ENC_KEY, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, derived_key, ci->ci_mode->keysize); if (err) return err; - err = fscrypt_set_derived_key(ci, derived_key); + err = fscrypt_set_per_file_enc_key(ci, derived_key); memzero_explicit(derived_key, ci->ci_mode->keysize); } if (err) diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 6b8815d7448d..8a97a8dd8ebb 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -9,7 +9,7 @@ * This file implements compatibility functions for the original encryption * policy version ("v1"), including: * - * - Deriving per-file keys using the AES-128-ECB based KDF + * - Deriving per-file encryption keys using the AES-128-ECB based KDF * (rather than the new method of using HKDF-SHA512) * * - Retrieving fscrypt master keys from process-subscribed keyrings @@ -283,7 +283,7 @@ static int setup_v1_file_key_derived(struct fscrypt_info *ci, if (err) goto out; - err = fscrypt_set_derived_key(ci, derived_key); + err = fscrypt_set_per_file_enc_key(ci, derived_key); out: kzfree(derived_key); return err; -- GitLab From 89aca68d2fbac03d49bf33d1036dc04e769594d5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:31:59 -0800 Subject: [PATCH 0025/1278] ubifs: don't trigger assertion on invalid no-key filename If userspace provides an invalid fscrypt no-key filename which encodes a hash value with any of the UBIFS node type bits set (i.e. the high 3 bits), gracefully report ENOENT rather than triggering ubifs_assert(). Test case with kvm-xfstests shell: . fs/ubifs/config . ~/xfstests/common/encrypt dev=$(__blkdev_to_ubi_volume /dev/vdc) ubiupdatevol $dev -t mount $dev /mnt -t ubifs mkdir /mnt/edir xfs_io -c set_encpolicy /mnt/edir rm /mnt/edir/_,,,,,DAAAAAAAAAAAAAAAAAAAAAAAAAA With the bug, the following assertion fails on the 'rm' command: [ 19.066048] UBIFS error (ubi0:0 pid 379): ubifs_assert_failed: UBIFS assert failed: !(hash & ~UBIFS_S_KEY_HASH_MASK), in fs/ubifs/key.h:170 Fixes: f4f61d2cc6d8 ("ubifs: Implement encrypted filenames") Cc: # v4.10+ Link: https://lore.kernel.org/r/20200120223201.241390-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ubifs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f7057c320a33..6bba57bbf1ae 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -240,6 +240,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, if (nm.hash) { ubifs_assert(fname_len(&nm) == 0); ubifs_assert(fname_name(&nm) == NULL); + if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) + goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); } else { -- GitLab From 7fd1c005ecf02122fe5026a1e5d0d6e23e298f72 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:32:00 -0800 Subject: [PATCH 0026/1278] ubifs: allow both hash and disk name to be provided in no-key names In order to support a new dirhash method that is a secret-keyed hash over the plaintext filenames (which will be used by encrypted+casefolded directories on ext4 and f2fs), fscrypt will be switching to a new no-key name format that always encodes the dirhash in the name. UBIFS isn't happy with this because it has assertions that verify that either the hash or the disk name is provided, not both. Change it to use the disk name if one is provided, even if a hash is available too; else use the hash. Link: https://lore.kernel.org/r/20200120223201.241390-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ubifs/dir.c | 4 +--- fs/ubifs/journal.c | 4 ++-- fs/ubifs/key.h | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 6bba57bbf1ae..0dc907a67889 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -237,9 +237,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out_fname; } - if (nm.hash) { - ubifs_assert(fname_len(&nm) == 0); - ubifs_assert(fname_name(&nm) == NULL); + if (fname_name(&nm) == NULL) { if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 04c4ec6483e5..708bd86cf020 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -583,7 +583,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (!xent) { dent->ch.node_type = UBIFS_DENT_NODE; - if (nm->hash) + if (fname_name(nm) == NULL) dent_key_init_hash(c, &dent_key, dir->i_ino, nm->hash); else dent_key_init(c, &dent_key, dir->i_ino, nm); @@ -630,7 +630,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, kfree(dent); if (deletion) { - if (nm->hash) + if (fname_name(nm) == NULL) err = ubifs_tnc_remove_dh(c, &dent_key, nm->minor_hash); else err = ubifs_tnc_remove_nm(c, &dent_key, nm); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index b1f7c0caa3ac..7547be512db2 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -162,7 +162,6 @@ static inline void dent_key_init(const struct ubifs_info *c, uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm)); ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - ubifs_assert(!nm->hash && !nm->minor_hash); key->u32[0] = inum; key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); } -- GitLab From fe6e85580b05e9ead1b6dd9fe10201555130c838 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:32:01 -0800 Subject: [PATCH 0027/1278] fscrypt: improve format of no-key names When an encrypted directory is listed without the key, the filesystem must show "no-key names" that uniquely identify directory entries, are at most 255 (NAME_MAX) bytes long, and don't contain '/' or '\0'. Currently, for short names the no-key name is the base64 encoding of the ciphertext filename, while for long names it's the base64 encoding of the ciphertext filename's dirhash and second-to-last 16-byte block. This format has the following problems: - Since it doesn't always include the dirhash, it's incompatible with directories that will use a secret-keyed dirhash over the plaintext filenames. In this case, the dirhash won't be computable from the ciphertext name without the key, so it instead must be retrieved from the directory entry and always included in the no-key name. Casefolded encrypted directories will use this type of dirhash. - It's ambiguous: it's possible to craft two filenames that map to the same no-key name, since the method used to abbreviate long filenames doesn't use a proper cryptographic hash function. Solve both these problems by switching to a new no-key name format that is the base64 encoding of a variable-length structure that contains the dirhash, up to 149 bytes of the ciphertext filename, and (if any bytes remain) the SHA-256 of the remaining bytes of the ciphertext filename. This ensures that each no-key name contains everything needed to find the directory entry again, contains only legal characters, doesn't exceed NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only take the performance hit of SHA-256 on very long filenames. Note: this change does *not* address the existing issue where users can modify the 'dirhash' part of a no-key name and the filesystem may still accept the name. Signed-off-by: Daniel Rosenberg [EB: improved comments and commit message, fixed checking return value of base64_decode(), check for SHA-256 error, continue to set disk_name for short names to keep matching simpler, and many other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 2 +- fs/crypto/Kconfig | 1 + fs/crypto/fname.c | 219 ++++++++++++++++++++------ include/linux/fscrypt.h | 77 +-------- 4 files changed, 172 insertions(+), 127 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 28524dcc51c9..fbcd185d15da 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -1202,7 +1202,7 @@ filesystem-specific hash(es) needed for directory lookups. This allows the filesystem to still, with a high degree of confidence, map the filename given in ->lookup() back to a particular directory entry that was previously listed by readdir(). See :c:type:`struct -fscrypt_digested_name` in the source for more details. +fscrypt_nokey_name` in the source for more details. Note that the precise way that filenames are presented to userspace without the key is subject to change in the future. It is only meant diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index d0a0238e37dd..a7cc1f9808d1 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -20,5 +20,6 @@ config FS_ENCRYPTION_ALGS select CRYPTO_CTS select CRYPTO_ECB select CRYPTO_HMAC + select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_XTS diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 851d2082ecfe..5db3cc8c07e3 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -13,9 +13,86 @@ #include #include +#include +#include #include #include "fscrypt_private.h" +/** + * struct fscrypt_nokey_name - identifier for directory entry when key is absent + * + * When userspace lists an encrypted directory without access to the key, the + * filesystem must present a unique "no-key name" for each filename that allows + * it to find the directory entry again if requested. Naively, that would just + * mean using the ciphertext filenames. However, since the ciphertext filenames + * can contain illegal characters ('\0' and '/'), they must be encoded in some + * way. We use base64. But that can cause names to exceed NAME_MAX (255 + * bytes), so we also need to use a strong hash to abbreviate long names. + * + * The filesystem may also need another kind of hash, the "dirhash", to quickly + * find the directory entry. Since filesystems normally compute the dirhash + * over the on-disk filename (i.e. the ciphertext), it's not computable from + * no-key names that abbreviate the ciphertext using the strong hash to fit in + * NAME_MAX. It's also not computable if it's a keyed hash taken over the + * plaintext (but it may still be available in the on-disk directory entry); + * casefolded directories use this type of dirhash. At least in these cases, + * each no-key name must include the name's dirhash too. + * + * To meet all these requirements, we base64-encode the following + * variable-length structure. It contains the dirhash, or 0's if the filesystem + * didn't provide one; up to 149 bytes of the ciphertext name; and for + * ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes. + * + * This ensures that each no-key name contains everything needed to find the + * directory entry again, contains only legal characters, doesn't exceed + * NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only + * take the performance hit of SHA-256 on very long filenames (which are rare). + */ +struct fscrypt_nokey_name { + u32 dirhash[2]; + u8 bytes[149]; + u8 sha256[SHA256_DIGEST_SIZE]; +}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */ + +/* + * Decoded size of max-size nokey name, i.e. a name that was abbreviated using + * the strong hash and thus includes the 'sha256' field. This isn't simply + * sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included. + */ +#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256) + +static struct crypto_shash *sha256_hash_tfm; + +static int fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result) +{ + struct crypto_shash *tfm = READ_ONCE(sha256_hash_tfm); + + if (unlikely(!tfm)) { + struct crypto_shash *prev_tfm; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + fscrypt_err(NULL, + "Error allocating SHA-256 transform: %ld", + PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + prev_tfm = cmpxchg(&sha256_hash_tfm, NULL, tfm); + if (prev_tfm) { + crypto_free_shash(tfm); + tfm = prev_tfm; + } + } + { + SHASH_DESC_ON_STACK(desc, tfm); + + desc->tfm = tfm; + desc->flags = 0; + + return crypto_shash_digest(desc, data, data_len, result); + } +} + static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') @@ -207,9 +284,7 @@ int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len, struct fscrypt_str *crypto_str) { - const u32 max_encoded_len = - max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), - 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX); u32 max_presented_len; max_presented_len = max(max_encoded_len, max_encrypted_len); @@ -242,9 +317,9 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * The caller must have allocated sufficient memory for the @oname string. * - * If the key is available, we'll decrypt the disk name; otherwise, we'll encode - * it for presentation. Short names are directly base64-encoded, while long - * names are encoded in fscrypt_digested_name format. + * If the key is available, we'll decrypt the disk name. Otherwise, we'll + * encode it for presentation in fscrypt_nokey_name format. + * See struct fscrypt_nokey_name for details. * * Return: 0 on success, -errno on failure */ @@ -254,7 +329,9 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); - struct fscrypt_digested_name digested_name; + struct fscrypt_nokey_name nokey_name; + u32 size; /* size of the unencoded no-key name */ + int err; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; @@ -269,24 +346,37 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, if (fscrypt_has_encryption_key(inode)) return fname_decrypt(inode, iname, oname); - if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { - oname->len = base64_encode(iname->name, iname->len, - oname->name); - return 0; - } + /* + * Sanity check that struct fscrypt_nokey_name doesn't have padding + * between fields and that its encoded size never exceeds NAME_MAX. + */ + BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, dirhash) != + offsetof(struct fscrypt_nokey_name, bytes)); + BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) != + offsetof(struct fscrypt_nokey_name, sha256)); + BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX); + if (hash) { - digested_name.hash = hash; - digested_name.minor_hash = minor_hash; + nokey_name.dirhash[0] = hash; + nokey_name.dirhash[1] = minor_hash; + } else { + nokey_name.dirhash[0] = 0; + nokey_name.dirhash[1] = 0; + } + if (iname->len <= sizeof(nokey_name.bytes)) { + memcpy(nokey_name.bytes, iname->name, iname->len); + size = offsetof(struct fscrypt_nokey_name, bytes[iname->len]); } else { - digested_name.hash = 0; - digested_name.minor_hash = 0; + memcpy(nokey_name.bytes, iname->name, sizeof(nokey_name.bytes)); + /* Compute strong hash of remaining part of name. */ + err = fscrypt_do_sha256(&iname->name[sizeof(nokey_name.bytes)], + iname->len - sizeof(nokey_name.bytes), + nokey_name.sha256); + if (err) + return err; + size = FSCRYPT_NOKEY_NAME_MAX; } - memcpy(digested_name.digest, - FSCRYPT_FNAME_DIGEST(iname->name, iname->len), - FSCRYPT_FNAME_DIGEST_SIZE); - oname->name[0] = '_'; - oname->len = 1 + base64_encode((const u8 *)&digested_name, - sizeof(digested_name), oname->name + 1); + oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -307,8 +397,7 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); * get the disk_name. * * Else, for keyless @lookup operations, @iname is the presented ciphertext, so - * we decode it to get either the ciphertext disk_name (for short names) or the - * fscrypt_digested_name (for long names). Non-@lookup operations will be + * we decode it to get the fscrypt_nokey_name. Non-@lookup operations will be * impossible in this case, so we fail them with ENOKEY. * * If successful, fscrypt_free_filename() must be called later to clean up. @@ -318,8 +407,8 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { + struct fscrypt_nokey_name *nokey_name; int ret; - int digested; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; @@ -359,40 +448,31 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, * We don't have the key and we are doing a lookup; decode the * user-supplied name */ - if (iname->name[0] == '_') { - if (iname->len != - 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))) - return -ENOENT; - digested = 1; - } else { - if (iname->len > - BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)) - return -ENOENT; - digested = 0; - } - fname->crypto_buf.name = - kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE, - sizeof(struct fscrypt_digested_name)), - GFP_KERNEL); + if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX)) + return -ENOENT; + + fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = base64_decode(iname->name + digested, iname->len - digested, - fname->crypto_buf.name); - if (ret < 0) { + ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name); + if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || + (ret > offsetof(struct fscrypt_nokey_name, sha256) && + ret != FSCRYPT_NOKEY_NAME_MAX)) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; - if (digested) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - fname->hash = n->hash; - fname->minor_hash = n->minor_hash; - } else { - fname->disk_name.name = fname->crypto_buf.name; - fname->disk_name.len = fname->crypto_buf.len; + + nokey_name = (void *)fname->crypto_buf.name; + fname->hash = nokey_name->dirhash[0]; + fname->minor_hash = nokey_name->dirhash[1]; + if (ret != FSCRYPT_NOKEY_NAME_MAX) { + /* The full ciphertext filename is available. */ + fname->disk_name.name = nokey_name->bytes; + fname->disk_name.len = + ret - offsetof(struct fscrypt_nokey_name, bytes); } return 0; @@ -402,6 +482,43 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, } EXPORT_SYMBOL(fscrypt_setup_filename); +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and the name we're + * looking for is very long, then we won't have the full disk_name and instead + * we'll need to match against a fscrypt_nokey_name that includes a strong hash. + * + * Return: %true if the name matches, otherwise %false. + */ +bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + const struct fscrypt_nokey_name *nokey_name = + (const void *)fname->crypto_buf.name; + u8 sha256[SHA256_DIGEST_SIZE]; + + if (likely(fname->disk_name.name)) { + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, de_name_len); + } + if (de_name_len <= sizeof(nokey_name->bytes)) + return false; + if (memcmp(de_name, nokey_name->bytes, sizeof(nokey_name->bytes))) + return false; + if (fscrypt_do_sha256(&de_name[sizeof(nokey_name->bytes)], + de_name_len - sizeof(nokey_name->bytes), sha256)) + return false; + return !memcmp(sha256, nokey_name->sha256, sizeof(sha256)); +} +EXPORT_SYMBOL_GPL(fscrypt_match_name); + /** * fscrypt_fname_siphash() - calculate the SipHash of a filename * @dir: the parent directory diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index ccdee616e7e5..fd1bc965a3e3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -172,81 +172,8 @@ extern int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname); - -#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 - -/* Extracts the second-to-last ciphertext block; see explanation below */ -#define FSCRYPT_FNAME_DIGEST(name, len) \ - ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ - FS_CRYPTO_BLOCK_SIZE)) - -#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE - -/** - * fscrypt_digested_name - alternate identifier for an on-disk filename - * - * When userspace lists an encrypted directory without access to the key, - * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE - * bytes are shown in this abbreviated form (base64-encoded) rather than as the - * full ciphertext (base64-encoded). This is necessary to allow supporting - * filenames up to NAME_MAX bytes, since base64 encoding expands the length. - * - * To make it possible for filesystems to still find the correct directory entry - * despite not knowing the full on-disk name, we encode any filesystem-specific - * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, - * followed by the second-to-last ciphertext block of the filename. Due to the - * use of the CBC-CTS encryption mode, the second-to-last ciphertext block - * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes accidental collisions very - * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they - * share the same filesystem-specific hashes. - * - * However, this scheme isn't immune to intentional collisions, which can be - * created by anyone able to create arbitrary plaintext filenames and view them - * without the key. Making the "digest" be a real cryptographic hash like - * SHA-256 over the full ciphertext would prevent this, although it would be - * less efficient and harder to implement, especially since the filesystem would - * need to calculate it for each directory entry examined during a search. - */ -struct fscrypt_digested_name { - u32 hash; - u32 minor_hash; - u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; -}; - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and a filename in it is - * very long, then we won't have the full disk_name and we'll instead need to - * match against the fscrypt_digested_name. - * - * Return: %true if the name matches, otherwise %false. - */ -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - if (unlikely(!fname->disk_name.name)) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) - return false; - if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) - return false; - return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), - n->digest, FSCRYPT_FNAME_DIGEST_SIZE); - } - - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - +extern bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len); extern u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); -- GitLab From 693ad1e5b397bbc6c7fb971c8406426a5f21c8c1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jan 2020 12:54:10 -0800 Subject: [PATCH 0028/1278] fs-verity: implement readahead for FS_IOC_ENABLE_VERITY When it builds the first level of the Merkle tree, FS_IOC_ENABLE_VERITY sequentially reads each page of the file using read_mapping_page(). This works fine if the file's data is already in pagecache, which should normally be the case, since this ioctl is normally used immediately after writing out the file. But in any other case this implementation performs very poorly, since only one page is read at a time. Fix this by implementing readahead using the functions from mm/readahead.c. This improves performance in the uncached case by about 20x, as seen in the following benchmarks done on a 250MB file (on x86_64 with SHA-NI): FS_IOC_ENABLE_VERITY uncached (before) 3.299s FS_IOC_ENABLE_VERITY uncached (after) 0.160s FS_IOC_ENABLE_VERITY cached 0.147s sha256sum uncached 0.191s sha256sum cached 0.145s Note: we could instead switch to kernel_read(). But that would mean we'd no longer be hashing the data directly from the pagecache, which is a nice optimization of its own. And using kernel_read() would require allocating another temporary buffer, hashing the data and tree pages separately, and explicitly zero-padding the last page -- so it wouldn't really be any simpler than direct pagecache access, at least for now. Link: https://lore.kernel.org/r/20200106205410.136707-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/verity/enable.c | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index eabc6ac19906..1f05f7319377 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -13,13 +13,42 @@ #include #include -static int build_merkle_tree_level(struct inode *inode, unsigned int level, +/* + * Read a file data page for Merkle tree construction. Do aggressive readahead, + * since we're sequentially reading the entire file. + */ +static struct page *read_file_data_page(struct file *filp, pgoff_t index, + struct file_ra_state *ra, + unsigned long remaining_pages) +{ + struct page *page; + + page = find_get_page_flags(filp->f_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else + page_cache_sync_readahead(filp->f_mapping, ra, filp, + index, remaining_pages); + page = read_mapping_page(filp->f_mapping, index, NULL); + if (IS_ERR(page)) + return page; + } + if (PageReadahead(page)) + page_cache_async_readahead(filp->f_mapping, ra, filp, page, + index, remaining_pages); + return page; +} + +static int build_merkle_tree_level(struct file *filp, unsigned int level, u64 num_blocks_to_hash, const struct merkle_tree_params *params, u8 *pending_hashes, struct ahash_request *req) { + struct inode *inode = file_inode(filp); const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct file_ra_state ra = { 0 }; unsigned int pending_size = 0; u64 dst_block_num; u64 i; @@ -36,6 +65,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, dst_block_num = 0; /* unused */ } + file_ra_state_init(&ra, filp->f_mapping); + for (i = 0; i < num_blocks_to_hash; i++) { struct page *src_page; @@ -45,7 +76,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, if (level == 0) { /* Leaf: hashing a data block */ - src_page = read_mapping_page(inode->i_mapping, i, NULL); + src_page = read_file_data_page(filp, i, &ra, + num_blocks_to_hash - i); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, @@ -103,17 +135,18 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, } /* - * Build the Merkle tree for the given inode using the given parameters, and + * Build the Merkle tree for the given file using the given parameters, and * return the root hash in @root_hash. * * The tree is written to a filesystem-specific location as determined by the * ->write_merkle_tree_block() method. However, the blocks that comprise the * tree are the same for all filesystems. */ -static int build_merkle_tree(struct inode *inode, +static int build_merkle_tree(struct file *filp, const struct merkle_tree_params *params, u8 *root_hash) { + struct inode *inode = file_inode(filp); u8 *pending_hashes; struct ahash_request *req; u64 blocks; @@ -139,7 +172,7 @@ static int build_merkle_tree(struct inode *inode, blocks = (inode->i_size + params->block_size - 1) >> params->log_blocksize; for (level = 0; level <= params->num_levels; level++) { - err = build_merkle_tree_level(inode, level, blocks, params, + err = build_merkle_tree_level(filp, level, blocks, params, pending_hashes, req); if (err) goto out; @@ -227,7 +260,7 @@ static int enable_verity(struct file *filp, */ pr_debug("Building Merkle tree...\n"); BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); - err = build_merkle_tree(inode, ¶ms, desc->root_hash); + err = build_merkle_tree(filp, ¶ms, desc->root_hash); if (err) { fsverity_err(inode, "Error %d building Merkle tree", err); goto rollback; -- GitLab From 2ff972ed7e88ba838552d9c5569c699f18244b37 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Aug 2018 15:45:42 -0700 Subject: [PATCH 0029/1278] ext4: readpages() should submit IO as read-ahead a_ops->readpages() is only ever used for read-ahead. Ensure that we pass this information down to the block layer. Link: http://lkml.kernel.org/r/20180621010725.17813-5-axboe@kernel.dk Signed-off-by: Jens Axboe Reviewed-by: Andrew Morton Cc: Al Viro Cc: Chris Mason Cc: Christoph Hellwig Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 5 +++-- fs/ext4/readpage.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8d6accd3f763..f933c35fdcc1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3201,7 +3201,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages); + unsigned nr_pages, bool is_readahead); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4f2c73b52c83..4b68bfe9e5cb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3328,7 +3328,8 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1); + return ext4_mpage_readpages(page->mapping, NULL, page, 1, + false); return ret; } @@ -3343,7 +3344,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, if (ext4_has_inline_data(inode)) return 0; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); + return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static void ext4_invalidatepage(struct page *page, unsigned int offset, diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 49b8af055fa7..b82456f0f691 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -211,7 +211,7 @@ static inline loff_t ext4_readpage_limit(struct inode *inode) int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -373,7 +373,8 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, + is_readahead ? REQ_RAHEAD : 0); } length = first_hole << blkbits; -- GitLab From 18ddf07481a4bcd0520abd8c8ad92f8bd6db2af2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jan 2020 12:55:33 -0800 Subject: [PATCH 0030/1278] fs-verity: implement readahead of Merkle tree pages When fs-verity verifies data pages, currently it reads each Merkle tree page synchronously using read_mapping_page(). Therefore, when the Merkle tree pages aren't already cached, fs-verity causes an extra 4 KiB I/O request for every 512 KiB of data (assuming that the Merkle tree uses SHA-256 and 4 KiB blocks). This results in more I/O requests and performance loss than is strictly necessary. Therefore, implement readahead of the Merkle tree pages. For simplicity, we take advantage of the fact that the kernel already does readahead of the file's *data*, just like it does for any other file. Due to this, we don't really need a separate readahead state (struct file_ra_state) just for the Merkle tree, but rather we just need to piggy-back on the existing data readahead requests. We also only really need to bother with the first level of the Merkle tree, since the usual fan-out factor is 128, so normally over 99% of Merkle tree I/O requests are for the first level. Therefore, make fsverity_verify_bio() enable readahead of the first Merkle tree level, for up to 1/4 the number of pages in the bio, when it sees that the REQ_RAHEAD flag is set on the bio. The readahead size is then passed down to ->read_merkle_tree_page() for the filesystem to (optionally) implement if it sees that the requested page is uncached. While we're at it, also make build_merkle_tree_level() set the Merkle tree readahead size, since it's easy to do there. However, for now don't set the readahead size in fsverity_verify_page(), since currently it's only used to verify holes on ext4 and f2fs, and it would need parameters added to know how much to read ahead. This patch significantly improves fs-verity sequential read performance. Some quick benchmarks with 'cat'-ing a 250MB file after dropping caches: On an ARM64 phone (using sha256-ce): Before: 217 MB/s After: 263 MB/s (compare to sha256sum of non-verity file: 357 MB/s) In an x86_64 VM (using sha256-avx2): Before: 173 MB/s After: 215 MB/s (compare to sha256sum of non-verity file: 223 MB/s) Link: https://lore.kernel.org/r/20200106205533.137005-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/ext4/verity.c | 49 ++++++++++++++++++++++++++++++++++-- fs/f2fs/verity.c | 49 ++++++++++++++++++++++++++++++++++-- fs/verity/enable.c | 8 +++++- fs/verity/fsverity_private.h | 1 + fs/verity/open.c | 1 + fs/verity/verify.c | 34 ++++++++++++++++++++----- include/linux/fsverity.h | 7 +++++- 7 files changed, 137 insertions(+), 12 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index d0d8a9795dd6..bd717248a4bc 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -342,12 +342,57 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, return desc_size; } +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void ext4_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) +{ + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + static struct page *ext4_read_merkle_tree_page(struct inode *inode, - pgoff_t index) + pgoff_t index, + unsigned long num_ra_pages) { + struct page *page; + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + ext4_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index a401ef72bc82..5905050f7fb8 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -222,12 +222,57 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, return size; } +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void f2fs_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) +{ + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + static struct page *f2fs_read_merkle_tree_page(struct inode *inode, - pgoff_t index) + pgoff_t index, + unsigned long num_ra_pages) { + struct page *page; + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + f2fs_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 1f05f7319377..9d30708d963d 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,6 +8,7 @@ #include "fsverity_private.h" #include +#include #include #include #include @@ -86,9 +87,14 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level, return err; } } else { + unsigned long num_ra_pages = + min_t(unsigned long, num_blocks_to_hash - i, + inode->i_sb->s_bdi->io_pages); + /* Non-leaf: hashing hash block from level below */ src_page = vops->read_merkle_tree_page(inode, - params->level_start[level - 1] + i); + params->level_start[level - 1] + i, + num_ra_pages); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index e74c79b64d88..ab9cfdd8f965 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -50,6 +50,7 @@ struct merkle_tree_params { unsigned int log_arity; /* log2(hashes_per_block) */ unsigned int num_levels; /* number of levels in Merkle tree */ u64 tree_size; /* Merkle tree size in bytes */ + unsigned long level0_blocks; /* number of blocks in tree level 0 */ /* * Starting block index for each tree level, ordered from leaf level (0) diff --git a/fs/verity/open.c b/fs/verity/open.c index 4cdd75acbc97..b7b0a5479c6f 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, /* temporarily using level_start[] to store blocks in level */ params->level_start[params->num_levels++] = blocks; } + params->level0_blocks = params->level_start[0]; /* Compute the starting block of each level */ offset = 0; diff --git a/fs/verity/verify.c b/fs/verity/verify.c index cf09852e5227..461789903709 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -84,7 +84,8 @@ static inline int cmp_hashes(const struct fsverity_info *vi, * Return: true if the page is valid, else false. */ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, - struct ahash_request *req, struct page *data_page) + struct ahash_request *req, struct page *data_page, + unsigned long level0_ra_pages) { const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; @@ -117,8 +118,8 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", level, hindex, hoffset); - hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, - hindex); + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex, + level == 0 ? level0_ra_pages : 0); if (IS_ERR(hpage)) { err = PTR_ERR(hpage); fsverity_err(inode, @@ -195,7 +196,7 @@ bool fsverity_verify_page(struct page *page) if (unlikely(!req)) return false; - valid = verify_page(inode, vi, req, page); + valid = verify_page(inode, vi, req, page, 0); ahash_request_free(req); @@ -222,21 +223,42 @@ void fsverity_verify_bio(struct bio *bio) { struct inode *inode = bio->bi_io_vec->bv_page->mapping->host; const struct fsverity_info *vi = inode->i_verity_info; + const struct merkle_tree_params *params = &vi->tree_params; struct ahash_request *req; struct bio_vec *bv; int i; + unsigned long max_ra_pages = 0; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + req = ahash_request_alloc(params->hash_alg->tfm, GFP_NOFS); if (unlikely(!req)) { bio_for_each_segment_all(bv, bio, i) SetPageError(bv->bv_page); return; } + if (bio->bi_opf & REQ_RAHEAD) { + /* + * If this bio is for data readahead, then we also do readahead + * of the first (largest) level of the Merkle tree. Namely, + * when a Merkle tree page is read, we also try to piggy-back on + * some additional pages -- up to 1/4 the number of data pages. + * + * This improves sequential read performance, as it greatly + * reduces the number of I/O requests made to the Merkle tree. + */ + bio_for_each_segment_all(bv, bio, i) + max_ra_pages++; + max_ra_pages /= 4; + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; + unsigned long level0_index = page->index >> params->log_arity; + unsigned long level0_ra_pages = + min(max_ra_pages, params->level0_blocks - level0_index); - if (!PageError(page) && !verify_page(inode, vi, req, page)) + if (!PageError(page) && + !verify_page(inode, vi, req, page, level0_ra_pages)) SetPageError(page); } diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 3b6b8ccebe7d..ecc604e61d61 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -77,6 +77,10 @@ struct fsverity_operations { * * @inode: the inode * @index: 0-based index of the page within the Merkle tree + * @num_ra_pages: The number of Merkle tree pages that should be + * prefetched starting at @index if the page at @index + * isn't already cached. Implementations may ignore this + * argument; it's only a performance optimization. * * This can be called at any time on an open verity file, as well as * between ->begin_enable_verity() and ->end_enable_verity(). It may be @@ -87,7 +91,8 @@ struct fsverity_operations { * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, - pgoff_t index); + pgoff_t index, + unsigned long num_ra_pages); /** * Write a Merkle tree block to the given inode. -- GitLab From 4d9ce1827aa2627bb9e08e396185ebb994dccf80 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 11:55:45 -0600 Subject: [PATCH 0031/1278] fs-verity: use mempool for hash requests When initializing an fs-verity hash algorithm, also initialize a mempool that contains a single preallocated hash request object. Then replace the direct calls to ahash_request_alloc() and ahash_request_free() with allocating and freeing from this mempool. This eliminates the possibility of the allocation failing, which is desirable for the I/O path. This doesn't cause deadlocks because there's no case where multiple hash requests are needed at a time to make forward progress. Link: https://lore.kernel.org/r/20191231175545.20709-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/verity/enable.c | 8 +-- fs/verity/fsverity_private.h | 16 ++++-- fs/verity/hash_algs.c | 100 ++++++++++++++++++++++++++--------- fs/verity/open.c | 4 +- fs/verity/verify.c | 17 +++--- 5 files changed, 99 insertions(+), 46 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 9d30708d963d..d22f5161ff9a 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -165,9 +165,11 @@ static int build_merkle_tree(struct file *filp, return 0; } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL); + pending_hashes = kmalloc(params->block_size, GFP_KERNEL); - req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL); - if (!pending_hashes || !req) + if (!pending_hashes) goto out; /* @@ -189,7 +191,7 @@ static int build_merkle_tree(struct file *filp, err = 0; out: kfree(pending_hashes); - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); return err; } diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index ab9cfdd8f965..4b2c8aed0563 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -16,6 +16,7 @@ #include #include +#include struct ahash_request; @@ -37,11 +38,12 @@ struct fsverity_hash_alg { const char *name; /* crypto API name, e.g. sha256 */ unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ + mempool_t *req_pool; /* mempool with a preallocated hash request */ }; /* Merkle tree parameters: hash algorithm, initial hash state, and topology */ struct merkle_tree_params { - const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ const u8 *hashstate; /* initial hash state or NULL */ unsigned int digest_size; /* same as hash_alg->digest_size */ unsigned int block_size; /* size of data and tree blocks */ @@ -115,14 +117,18 @@ struct fsverity_signed_digest { extern struct fsverity_hash_alg fsverity_hash_algs[]; -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num); -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags); +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req); +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size); int fsverity_hash_page(const struct merkle_tree_params *params, const struct inode *inode, struct ahash_request *req, struct page *page, u8 *out); -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out); void __init fsverity_check_hash_algs(void); diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c index 31e6d7d2389a..6682e4e6b601 100644 --- a/fs/verity/hash_algs.c +++ b/fs/verity/hash_algs.c @@ -24,6 +24,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { }, }; +static DEFINE_MUTEX(fsverity_hash_alg_init_mutex); + /** * fsverity_get_hash_alg() - validate and prepare a hash algorithm * @inode: optional inode for logging purposes @@ -36,8 +38,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { * * Return: pointer to the hash alg on success, else an ERR_PTR() */ -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num) +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) { struct fsverity_hash_alg *alg; struct crypto_ahash *tfm; @@ -50,10 +52,15 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, } alg = &fsverity_hash_algs[num]; - /* pairs with cmpxchg() below */ - tfm = READ_ONCE(alg->tfm); - if (likely(tfm != NULL)) + /* pairs with smp_store_release() below */ + if (likely(smp_load_acquire(&alg->tfm) != NULL)) return alg; + + mutex_lock(&fsverity_hash_alg_init_mutex); + + if (alg->tfm != NULL) + goto out_unlock; + /* * Using the shash API would make things a bit simpler, but the ahash * API is preferable as it allows the use of crypto accelerators. @@ -64,12 +71,14 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, fsverity_warn(inode, "Missing crypto API support for hash algorithm \"%s\"", alg->name); - return ERR_PTR(-ENOPKG); + alg = ERR_PTR(-ENOPKG); + goto out_unlock; } fsverity_err(inode, "Error allocating hash algorithm \"%s\": %ld", alg->name, PTR_ERR(tfm)); - return ERR_CAST(tfm); + alg = ERR_CAST(tfm); + goto out_unlock; } err = -EINVAL; @@ -78,18 +87,63 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm))) goto err_free_tfm; + alg->req_pool = mempool_create_kmalloc_pool(1, + sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm)); + if (!alg->req_pool) { + err = -ENOMEM; + goto err_free_tfm; + } + pr_info("%s using implementation \"%s\"\n", alg->name, crypto_ahash_driver_name(tfm)); - /* pairs with READ_ONCE() above */ - if (cmpxchg(&alg->tfm, NULL, tfm) != NULL) - crypto_free_ahash(tfm); - - return alg; + /* pairs with smp_load_acquire() above */ + smp_store_release(&alg->tfm, tfm); + goto out_unlock; err_free_tfm: crypto_free_ahash(tfm); - return ERR_PTR(err); + alg = ERR_PTR(err); +out_unlock: + mutex_unlock(&fsverity_hash_alg_init_mutex); + return alg; +} + +/** + * fsverity_alloc_hash_request() - allocate a hash request object + * @alg: the hash algorithm for which to allocate the request + * @gfp_flags: memory allocation flags + * + * This is mempool-backed, so this never fails if __GFP_DIRECT_RECLAIM is set in + * @gfp_flags. However, in that case this might need to wait for all + * previously-allocated requests to be freed. So to avoid deadlocks, callers + * must never need multiple requests at a time to make forward progress. + * + * Return: the request object on success; NULL on failure (but see above) + */ +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags) +{ + struct ahash_request *req = mempool_alloc(alg->req_pool, gfp_flags); + + if (req) + ahash_request_set_tfm(req, alg->tfm); + return req; +} + +/** + * fsverity_free_hash_request() - free a hash request object + * @alg: the hash algorithm + * @req: the hash request object to free + */ +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req) +{ + if (req) { + ahash_request_zero(req); + mempool_free(req, alg->req_pool); + } } /** @@ -101,7 +155,7 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed * initial hash state on success or an ERR_PTR() on failure. */ -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size) { u8 *hashstate = NULL; @@ -119,11 +173,8 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (!hashstate) return ERR_PTR(-ENOMEM); - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) { - err = -ENOMEM; - goto err_free; - } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); /* * Zero-pad the salt to the next multiple of the input size of the hash @@ -158,7 +209,7 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (err) goto err_free; out: - ahash_request_free(req); + fsverity_free_hash_request(alg, req); kfree(padded_salt); return hashstate; @@ -229,7 +280,7 @@ int fsverity_hash_page(const struct merkle_tree_params *params, * * Return: 0 on success, -errno on failure */ -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out) { struct ahash_request *req; @@ -237,9 +288,8 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, DECLARE_CRYPTO_WAIT(wait); int err; - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) - return -ENOMEM; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); sg_init_one(&sg, data, size); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | @@ -249,7 +299,7 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, err = crypto_wait_req(crypto_ahash_digest(req), &wait); - ahash_request_free(req); + fsverity_free_hash_request(alg, req); return err; } diff --git a/fs/verity/open.c b/fs/verity/open.c index b7b0a5479c6f..25b29065d897 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -31,7 +31,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, unsigned int log_blocksize, const u8 *salt, size_t salt_size) { - const struct fsverity_hash_alg *hash_alg; + struct fsverity_hash_alg *hash_alg; int err; u64 blocks; u64 offset; @@ -127,7 +127,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, * Compute the file measurement by hashing the fsverity_descriptor excluding the * signature and with the sig_size field set to 0. */ -static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg, +static int compute_file_measurement(struct fsverity_hash_alg *hash_alg, struct fsverity_descriptor *desc, u8 *measurement) { diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 461789903709..5324270cd7d4 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -192,13 +192,12 @@ bool fsverity_verify_page(struct page *page) struct ahash_request *req; bool valid; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) - return false; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS); valid = verify_page(inode, vi, req, page, 0); - ahash_request_free(req); + fsverity_free_hash_request(vi->tree_params.hash_alg, req); return valid; } @@ -229,12 +228,8 @@ void fsverity_verify_bio(struct bio *bio) int i; unsigned long max_ra_pages = 0; - req = ahash_request_alloc(params->hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) { - bio_for_each_segment_all(bv, bio, i) - SetPageError(bv->bv_page); - return; - } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_NOFS); if (bio->bi_opf & REQ_RAHEAD) { /* @@ -262,7 +257,7 @@ void fsverity_verify_bio(struct bio *bio) SetPageError(page); } - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); } EXPORT_SYMBOL_GPL(fsverity_verify_bio); #endif /* CONFIG_BLOCK */ -- GitLab From 7c18ae38056922b251fa24a6af4c73992ee291d7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 11:54:08 -0600 Subject: [PATCH 0032/1278] fs-verity: use u64_to_user_ptr() already provides a macro u64_to_user_ptr(). Use it instead of open-coding the two casts. No change in behavior. Link: https://lore.kernel.org/r/20191231175408.20524-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/enable.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index d22f5161ff9a..15e7d14ec2ff 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -216,8 +216,7 @@ static int enable_verity(struct file *filp, /* Get the salt if the user provided one */ if (arg->salt_size && - copy_from_user(desc->salt, - (const u8 __user *)(uintptr_t)arg->salt_ptr, + copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr), arg->salt_size)) { err = -EFAULT; goto out; @@ -226,8 +225,7 @@ static int enable_verity(struct file *filp, /* Get the signature if the user provided one */ if (arg->sig_size && - copy_from_user(desc->signature, - (const u8 __user *)(uintptr_t)arg->sig_ptr, + copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr), arg->sig_size)) { err = -EFAULT; goto out; -- GitLab From 4c08606a4abe409c5a217f60478c85764460f12c Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Tue, 18 Feb 2020 21:51:08 -0800 Subject: [PATCH 0033/1278] ANDROID: clang: update to 10.0.4 Bug: 149785865 Change-Id: I299263190154da05d8b8db8da1beea3ddb764d8c Signed-off-by: Alistair Delva --- build.config.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.common b/build.config.common index d4754486cd82..be3afbc1c6b5 100644 --- a/build.config.common +++ b/build.config.common @@ -3,7 +3,7 @@ KERNEL_DIR=common CC=clang LD=ld.lld -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r370808/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r377782b/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' -- GitLab From 23b0338e20577fac9aeef7d4f05f44081b37ade5 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Feb 2020 23:08:43 +0100 Subject: [PATCH 0034/1278] ANDROID: test_stackinit: work around LLVM PR44916 Temporarily move the local outside of switch statement to work around https://bugs.llvm.org/show_bug.cgi?id=44916 and unbreak the ClangBuiltLinux project Travis build. This patch has been tested according to the instructions at https://github.com/ClangBuiltLinux/continuous-integration Bug: 149581678 Test: ./driver.sh REPO=android-4.14 Change-Id: Iea85ee09d8f0e6d698b5f23a02fe23a6002ad5e7 Signed-off-by: Alexander Potapenko --- lib/test_stackinit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 7c5f9fb49e58..b7e586d559ee 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -282,8 +282,9 @@ DEFINE_TEST(user, struct test_user, STRUCT, none); */ static int noinline __leaf_switch_none(int path, bool fill) { + uint64_t var; + switch (path) { - uint64_t var; case 1: target_start = &var; -- GitLab From ee1d24d6af5e2cea35022a8f95371974c175415b Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 13 Feb 2020 08:09:57 -0800 Subject: [PATCH 0035/1278] ANDROID: Incremental fs: Support xattrs To make selinux work, add xattr support. This is a bit clunky - it seems like it would be better for the log and pending read functionality to be ioctls rather than this mixture of real and virtual files. Bug: 133435829 Change-Id: I56579fabe2ae7efb88f0344553948dc9573299aa Signed-off-by: Paul Lawrence --- fs/incfs/data_mgmt.c | 2 ++ fs/incfs/data_mgmt.h | 6 ++++ fs/incfs/vfs.c | 78 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 4698f14bbdf7..90bf9e37d236 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -68,6 +68,8 @@ void incfs_free_mount_info(struct mount_info *mi) mutex_destroy(&mi->mi_pending_reads_mutex); put_cred(mi->mi_owner); kfree(mi->mi_log.rl_ring_buf); + kfree(mi->log_xattr); + kfree(mi->pending_read_xattr); kfree(mi); } diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 6722cef1608c..8b62b0348d51 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -131,6 +131,12 @@ struct mount_info { /* Temporary buffer for read logger. */ struct read_log mi_log; + + void *log_xattr; + size_t log_xattr_size; + + void *pending_read_xattr; + size_t pending_read_xattr_size; }; struct data_file_block { diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index aebd2b02bd83..22edaeeaa613 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -75,6 +75,8 @@ static void evict_inode(struct inode *inode); static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size); +static ssize_t incfs_setxattr(struct dentry *d, const char *name, + const void *value, size_t size, int flags); static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); static int show_options(struct seq_file *, struct dentry *); @@ -169,9 +171,18 @@ static int incfs_handler_getxattr(const struct xattr_handler *xh, return incfs_getxattr(d, name, buffer, size); } +static int incfs_handler_setxattr(const struct xattr_handler *xh, + struct dentry *d, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) +{ + return incfs_setxattr(d, name, buffer, size, flags); +} + static const struct xattr_handler incfs_xattr_handler = { .prefix = "", /* AKA all attributes */ .get = incfs_handler_getxattr, + .set = incfs_handler_setxattr, }; static const struct xattr_handler *incfs_xattr_ops[] = { @@ -2048,11 +2059,74 @@ static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size) { struct dentry_info *di = get_incfs_dentry(d); + struct mount_info *mi = get_mount_info(d->d_sb); + char *stored_value; + size_t stored_size; - if (!di || !di->backing_path.dentry) + if (di && di->backing_path.dentry) + return vfs_getxattr(di->backing_path.dentry, name, value, size); + + if (strcmp(name, "security.selinux")) + return -ENODATA; + + if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { + stored_value = mi->pending_read_xattr; + stored_size = mi->pending_read_xattr_size; + } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { + stored_value = mi->log_xattr; + stored_size = mi->log_xattr_size; + } else { return -ENODATA; + } + + if (!stored_value) + return -ENODATA; + + if (stored_size > size) + return -E2BIG; + + memcpy(value, stored_value, stored_size); + return stored_size; + +} - return vfs_getxattr(di->backing_path.dentry, name, value, size); + +static ssize_t incfs_setxattr(struct dentry *d, const char *name, + const void *value, size_t size, int flags) +{ + struct dentry_info *di = get_incfs_dentry(d); + struct mount_info *mi = get_mount_info(d->d_sb); + void **stored_value; + size_t *stored_size; + + if (di && di->backing_path.dentry) + return vfs_setxattr(di->backing_path.dentry, name, value, size, + flags); + + if (strcmp(name, "security.selinux")) + return -ENODATA; + + if (size > INCFS_MAX_FILE_ATTR_SIZE) + return -E2BIG; + + if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { + stored_value = &mi->pending_read_xattr; + stored_size = &mi->pending_read_xattr_size; + } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { + stored_value = &mi->log_xattr; + stored_size = &mi->log_xattr_size; + } else { + return -ENODATA; + } + + kfree (*stored_value); + *stored_value = kzalloc(size, GFP_NOFS); + if (!*stored_value) + return -ENOMEM; + + memcpy(*stored_value, value, size); + *stored_size = size; + return 0; } static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) -- GitLab From 8d97219e60d4b11b9592b178627a557a0683a841 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 13 Feb 2020 15:08:24 -0800 Subject: [PATCH 0036/1278] ANDROID: ufs, block: fix crypto power management and move into block layer The call to pm_runtime_get_sync() in ufshcd_program_key() can deadlock because it waits for the UFS controller to be resumed, but it can itself be reached while resuming the UFS controller via: - ufshcd_runtime_resume() - ufshcd_resume() - ufshcd_reset_and_restore() - ufshcd_host_reset_and_restore() - ufshcd_hba_enable() - ufshcd_hba_execute_hce() - ufshcd_hba_start() - ufshcd_crypto_enable() - keyslot_manager_reprogram_all_keys() - ufshcd_crypto_keyslot_program() - ufshcd_program_key() But pm_runtime_get_sync() *is* needed when evicting a key. Also, on pre-4.20 kernels it's needed when programming a keyslot for a bio since the block layer used to resume the device in a different place. Thus, it's hard for drivers to know what to do in .keyslot_program() and .keyslot_evict(). In old kernels it may even be impossible unless we were to pass more information down from the keyslot_manager. There's also another possible deadlock: keyslot programming and eviction take ksm->lock for write and then resume the device, which may result in ksm->lock being taken again via the above call stack. To fix this, we should resume the device before taking ksm->lock. Fix these problems by moving to a better design where the block layer (namely, the keyslot manager) handles runtime power management instead of drivers. This is analogous to the block layer's existing runtime power management support (blk-pm), which handles resuming devices when bios are submitted to them so that drivers don't need to handle it. Test: Tested on coral with: echo 5 > /sys/bus/platform/devices/1d84000.ufshc/rpm_lvl sleep 30 touch /data && sync # hangs before this fix Also verified via kvm-xfstests that blk-crypto-fallback continues to work both with and without CONFIG_PM=y. Bug: 137270441 Bug: 149368295 Change-Id: I6bc9fb81854afe7edf490d71796ee68a61f7cbc8 Signed-off-by: Eric Biggers --- block/blk-crypto-fallback.c | 2 +- block/keyslot-manager.c | 90 +++++++++++++++++++++++++++----- drivers/md/dm.c | 3 +- drivers/scsi/ufs/ufshcd-crypto.c | 6 +-- include/linux/keyslot-manager.h | 5 +- 5 files changed, 87 insertions(+), 19 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index cce3317cba80..b8e9ae1c1d5b 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -608,7 +608,7 @@ int __init blk_crypto_fallback_init(void) crypto_mode_supported[i] = 0xFFFFFFFF; crypto_mode_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; - blk_crypto_ksm = keyslot_manager_create(blk_crypto_num_keyslots, + blk_crypto_ksm = keyslot_manager_create(NULL, blk_crypto_num_keyslots, &blk_crypto_ksm_ll_ops, crypto_mode_supported, NULL); if (!blk_crypto_ksm) diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index 7e42813c9de0..0b6dd460645e 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,11 @@ struct keyslot_manager { unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; void *ll_priv_data; +#ifdef CONFIG_PM + /* Device for runtime power management (NULL if none) */ + struct device *dev; +#endif + /* Protects programming and evicting keys from the device */ struct rw_semaphore lock; @@ -72,8 +78,60 @@ static inline bool keyslot_manager_is_passthrough(struct keyslot_manager *ksm) return ksm->num_slots == 0; } +#ifdef CONFIG_PM +static inline void keyslot_manager_set_dev(struct keyslot_manager *ksm, + struct device *dev) +{ + ksm->dev = dev; +} + +/* If there's an underlying device and it's suspended, resume it. */ +static inline void keyslot_manager_pm_get(struct keyslot_manager *ksm) +{ + if (ksm->dev) + pm_runtime_get_sync(ksm->dev); +} + +static inline void keyslot_manager_pm_put(struct keyslot_manager *ksm) +{ + if (ksm->dev) + pm_runtime_put_sync(ksm->dev); +} +#else /* CONFIG_PM */ +static inline void keyslot_manager_set_dev(struct keyslot_manager *ksm, + struct device *dev) +{ +} + +static inline void keyslot_manager_pm_get(struct keyslot_manager *ksm) +{ +} + +static inline void keyslot_manager_pm_put(struct keyslot_manager *ksm) +{ +} +#endif /* !CONFIG_PM */ + +static inline void keyslot_manager_hw_enter(struct keyslot_manager *ksm) +{ + /* + * Calling into the driver requires ksm->lock held and the device + * resumed. But we must resume the device first, since that can acquire + * and release ksm->lock via keyslot_manager_reprogram_all_keys(). + */ + keyslot_manager_pm_get(ksm); + down_write(&ksm->lock); +} + +static inline void keyslot_manager_hw_exit(struct keyslot_manager *ksm) +{ + up_write(&ksm->lock); + keyslot_manager_pm_put(ksm); +} + /** * keyslot_manager_create() - Create a keyslot manager + * @dev: Device for runtime power management (NULL if none) * @num_slots: The number of key slots to manage. * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot * manager will use to perform operations like programming and @@ -93,7 +151,9 @@ static inline bool keyslot_manager_is_passthrough(struct keyslot_manager *ksm) * Context: May sleep * Return: Pointer to constructed keyslot manager or NULL on error. */ -struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, +struct keyslot_manager *keyslot_manager_create( + struct device *dev, + unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) @@ -119,6 +179,7 @@ struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; + keyslot_manager_set_dev(ksm, dev); init_rwsem(&ksm->lock); @@ -227,10 +288,10 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, return slot; for (;;) { - down_write(&ksm->lock); + keyslot_manager_hw_enter(ksm); slot = find_and_grab_keyslot(ksm, key); if (slot != -ENOKEY) { - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return slot; } @@ -241,7 +302,7 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, if (!list_empty(&ksm->idle_slots)) break; - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); wait_event(ksm->idle_slots_wait_queue, !list_empty(&ksm->idle_slots)); } @@ -253,7 +314,7 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); if (err) { wake_up(&ksm->idle_slots_wait_queue); - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return err; } @@ -267,7 +328,7 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, remove_slot_from_lru_list(ksm, slot); - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return slot; } @@ -369,15 +430,16 @@ int keyslot_manager_evict_key(struct keyslot_manager *ksm, if (keyslot_manager_is_passthrough(ksm)) { if (ksm->ksm_ll_ops.keyslot_evict) { - down_write(&ksm->lock); + keyslot_manager_hw_enter(ksm); err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return err; } return 0; } - down_write(&ksm->lock); + keyslot_manager_hw_enter(ksm); + slot = find_keyslot(ksm, key); if (slot < 0) { err = slot; @@ -397,7 +459,7 @@ int keyslot_manager_evict_key(struct keyslot_manager *ksm, memzero_explicit(&slotp->key, sizeof(slotp->key)); err = 0; out_unlock: - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return err; } @@ -417,6 +479,7 @@ void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm) if (WARN_ON(keyslot_manager_is_passthrough(ksm))) return; + /* This is for device initialization, so don't resume the device */ down_write(&ksm->lock); for (slot = 0; slot < ksm->num_slots; slot++) { const struct keyslot *slotp = &ksm->slots[slot]; @@ -456,6 +519,7 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); /** * keyslot_manager_create_passthrough() - Create a passthrough keyslot manager + * @dev: Device for runtime power management (NULL if none) * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops * @crypto_mode_supported: Bitmasks for supported encryption modes * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. @@ -472,6 +536,7 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); * Return: Pointer to constructed keyslot manager or NULL on error. */ struct keyslot_manager *keyslot_manager_create_passthrough( + struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) @@ -486,6 +551,7 @@ struct keyslot_manager *keyslot_manager_create_passthrough( memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; + keyslot_manager_set_dev(ksm, dev); init_rwsem(&ksm->lock); @@ -545,15 +611,15 @@ int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, { int err; - down_write(&ksm->lock); if (ksm->ksm_ll_ops.derive_raw_secret) { + keyslot_manager_hw_enter(ksm); err = ksm->ksm_ll_ops.derive_raw_secret(ksm, wrapped_key, wrapped_key_size, secret, secret_size); + keyslot_manager_hw_exit(ksm); } else { err = -EOPNOTSUPP; } - up_write(&ksm->lock); return err; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0189f70e87a0..0271ca072453 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2102,7 +2102,8 @@ static int dm_init_inline_encryption(struct mapped_device *md) */ memset(mode_masks, 0xFF, sizeof(mode_masks)); - md->queue->ksm = keyslot_manager_create_passthrough(&dm_ksm_ll_ops, + md->queue->ksm = keyslot_manager_create_passthrough(NULL, + &dm_ksm_ll_ops, mode_masks, md); if (!md->queue->ksm) return -ENOMEM; diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 276b49ad13be..e3de448c9bbe 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -125,7 +125,6 @@ static int ufshcd_program_key(struct ufs_hba *hba, u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); int err; - pm_runtime_get_sync(hba->dev); ufshcd_hold(hba, false); if (hba->vops->program_key) { @@ -155,7 +154,6 @@ static int ufshcd_program_key(struct ufs_hba *hba, err = 0; out: ufshcd_release(hba); - pm_runtime_put_sync(hba->dev); return err; } @@ -337,8 +335,8 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, ufshcd_clear_all_keyslots(hba); - hba->ksm = keyslot_manager_create(ufshcd_num_keyslots(hba), ksm_ops, - crypto_modes_supported, hba); + hba->ksm = keyslot_manager_create(hba->dev, ufshcd_num_keyslots(hba), + ksm_ops, crypto_modes_supported, hba); if (!hba->ksm) { err = -ENOMEM; diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index 6d32a031218e..2f4aac2851bf 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -41,7 +41,9 @@ struct keyslot_mgmt_ll_ops { u8 *secret, unsigned int secret_size); }; -struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, +struct keyslot_manager *keyslot_manager_create( + struct device *dev, + unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); @@ -67,6 +69,7 @@ void *keyslot_manager_private(struct keyslot_manager *ksm); void keyslot_manager_destroy(struct keyslot_manager *ksm); struct keyslot_manager *keyslot_manager_create_passthrough( + struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); -- GitLab From e723e513c2fc9cdca3974dd811431ea8d6d13d64 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 26 Feb 2020 13:25:24 -0800 Subject: [PATCH 0037/1278] ANDROID: increase limit on sched-tune boost groups Some devices need an additional sched-tune boost group to optimize performance for key tasks Bug: 150302001 Change-Id: I392c8cc05a8851f1d416c381b4a27242924c2c27 Signed-off-by: Todd Kjos --- kernel/sched/tune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 74a45606dc8c..b420258f9732 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -78,7 +78,7 @@ root_schedtune = { * implementation especially for the computation of the per-CPU boost * value */ -#define BOOSTGROUPS_COUNT 5 +#define BOOSTGROUPS_COUNT 6 /* Array of configured boostgroups */ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { -- GitLab From 304407a616abe4ce26cf5c234de245a651b35f4a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 26 Feb 2020 15:28:50 -0800 Subject: [PATCH 0038/1278] ANDROID: Disable wq fp check in CFI builds With non-canonical CFI, LLVM generates jump table entries for external symbols in modules and as a result, a function pointer passed from a module to the core kernel will have a different address. Disable the warning for now. Bug: 145210207 Change-Id: Ifdcee3479280f7b97abdee6b4c746f447e0944e6 Signed-off-by: Sami Tolvanen Signed-off-by: Alistair Delva --- kernel/workqueue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fa4a72c6d5f5..fce73f9a2742 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1530,8 +1530,10 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_ONCE(timer->function != delayed_work_timer_fn || - timer->data != (unsigned long)dwork); +#ifndef CONFIG_CFI + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); +#endif + WARN_ON_ONCE(timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); -- GitLab From 166fda7a7ed0e60e05fb4cfdc1f25fd59ff198ba Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 6 Feb 2020 18:01:20 -0800 Subject: [PATCH 0039/1278] ANDROID: block: Prevent crypto fallback for wrapped keys blk-crypto-fallback does not support wrapped keys, hence prevent falling back when program_key fails. Add 'is_hw_wrapped' flag to blk-crypto-key to mention if the key is wrapped when the key is initialized. Bug: 147209885 Test: Validate FBE, simulate a failure in the underlying blk device and ensure the call fails without falling back to blk-crypto-fallback. Change-Id: I8bc301ca1ac9e55ba6ab622e8325486916b45c56 Signed-off-by: Barani Muthukumaran --- block/blk-crypto-fallback.c | 6 ++++++ block/blk-crypto.c | 17 +++++++++++++---- drivers/md/dm-default-key.c | 2 +- fs/crypto/fscrypt_private.h | 3 +++ fs/crypto/inline_crypt.c | 3 ++- fs/crypto/keysetup.c | 14 +++++++------- fs/crypto/keysetup_v1.c | 2 +- include/linux/bio-crypt-ctx.h | 3 +++ include/linux/blk-crypto.h | 1 + 9 files changed, 37 insertions(+), 14 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index b8e9ae1c1d5b..195b04b5df0d 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -571,6 +571,12 @@ int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) struct bio_crypt_ctx *bc = bio->bi_crypt_context; struct bio_fallback_crypt_ctx *f_ctx; + if (bc->bc_key->is_hw_wrapped) { + pr_warn_once("HW wrapped key cannot be used with fallback.\n"); + bio->bi_status = BLK_STS_NOTSUPP; + return -EOPNOTSUPP; + } + if (!tfms_inited[bc->bc_key->crypto_mode]) { bio->bi_status = BLK_STS_IOERR; return -EIO; diff --git a/block/blk-crypto.c b/block/blk-crypto.c index a8de0d9680e0..88df1c0e7e5f 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -175,7 +175,9 @@ bool blk_crypto_endio(struct bio *bio) * @raw_key_size: Size of raw key. Must be at least the required size for the * chosen @crypto_mode; see blk_crypto_modes[]. (It's allowed * to be longer than the mode's actual key size, in order to - * support inline encryption hardware that accepts wrapped keys.) + * support inline encryption hardware that accepts wrapped keys. + * @is_hw_wrapped has to be set for such keys) + * @is_hw_wrapped: Denotes @raw_key is wrapped. * @crypto_mode: identifier for the encryption algorithm to use * @data_unit_size: the data unit size to use for en/decryption * @@ -184,6 +186,7 @@ bool blk_crypto_endio(struct bio *bio) */ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size) { @@ -198,9 +201,14 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, BUILD_BUG_ON(BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE < BLK_CRYPTO_MAX_KEY_SIZE); mode = &blk_crypto_modes[crypto_mode]; - if (raw_key_size < mode->keysize || - raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) - return -EINVAL; + if (is_hw_wrapped) { + if (raw_key_size < mode->keysize || + raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) + return -EINVAL; + } else { + if (raw_key_size != mode->keysize) + return -EINVAL; + } if (!is_power_of_2(data_unit_size)) return -EINVAL; @@ -209,6 +217,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, blk_key->data_unit_size = data_unit_size; blk_key->data_unit_size_bits = ilog2(data_unit_size); blk_key->size = raw_key_size; + blk_key->is_hw_wrapped = is_hw_wrapped; memcpy(blk_key->raw, raw_key, raw_key_size); /* diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c index 43a30c076aa6..e23ceb1390c8 100644 --- a/drivers/md/dm-default-key.c +++ b/drivers/md/dm-default-key.c @@ -226,7 +226,7 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, - cipher->mode_num, dkc->sector_size); + false, cipher->mode_num, dkc->sector_size); if (err) { ti->error = "Error initializing blk-crypto key"; goto bad; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 2e7a7a8171a1..bc53caf70497 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -319,6 +319,7 @@ extern int fscrypt_prepare_inline_crypt_key( struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci); extern void fscrypt_destroy_inline_crypt_key( @@ -363,6 +364,7 @@ static inline bool fscrypt_using_inline_encryption( static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci) { WARN_ON(1); @@ -557,6 +559,7 @@ extern struct fscrypt_mode fscrypt_modes[]; extern int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci); extern void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 92c471d3db73..cd901697d62a 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -51,6 +51,7 @@ void fscrypt_select_encryption_impl(struct fscrypt_info *ci) int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci) { const struct inode *inode = ci->ci_inode; @@ -81,7 +82,7 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE); err = blk_crypto_init_key(&blk_key->base, raw_key, raw_key_size, - crypto_mode, sb->s_blocksize); + is_hw_wrapped, crypto_mode, sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index e474d6c09cbd..c289f4e32186 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -117,15 +117,15 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - const struct fscrypt_info *ci) + bool is_hw_wrapped, const struct fscrypt_info *ci) { struct crypto_skcipher *tfm; if (fscrypt_using_inline_encryption(ci)) return fscrypt_prepare_inline_crypt_key(prep_key, - raw_key, raw_key_size, ci); + raw_key, raw_key_size, is_hw_wrapped, ci); - if (WARN_ON(raw_key_size != ci->ci_mode->keysize)) + if (WARN_ON(is_hw_wrapped || raw_key_size != ci->ci_mode->keysize)) return -EINVAL; tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); @@ -150,8 +150,8 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) { ci->ci_owns_key = true; - return fscrypt_prepare_key(&ci->ci_key, raw_key, - ci->ci_mode->keysize, ci); + return fscrypt_prepare_key(&ci->ci_key, raw_key, ci->ci_mode->keysize, + false /*is_hw_wrapped*/, ci); } static int setup_per_mode_enc_key(struct fscrypt_info *ci, @@ -202,7 +202,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, } } err = fscrypt_prepare_key(prep_key, mk->mk_secret.raw, - mk->mk_secret.size, ci); + mk->mk_secret.size, true, ci); if (err) goto out_unlock; } else { @@ -221,7 +221,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, if (err) goto out_unlock; err = fscrypt_prepare_key(prep_key, mode_key, mode->keysize, - ci); + false /*is_hw_wrapped*/, ci); memzero_explicit(mode_key, mode->keysize); if (err) goto out_unlock; diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 29fe4df6ec75..3f7bb48f7317 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -234,7 +234,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci->ci_mode->keysize, - ci); + false /*is_hw_wrapped*/, ci); if (err) goto err_free_dk; memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor, diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index ab22dbe7b880..8456a409fc21 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -31,6 +31,8 @@ enum blk_crypto_mode_num { * @data_unit_size_bits: log2 of data_unit_size * @size: size of this key in bytes (determined by @crypto_mode) * @hash: hash of this key, for keyslot manager use only + * @is_hw_wrapped: @raw points to a wrapped key to be used by an inline + * encryption hardware that accepts wrapped keys. * @raw: the raw bytes of this key. Only the first @size bytes are used. * * A blk_crypto_key is immutable once created, and many bios can reference it at @@ -42,6 +44,7 @@ struct blk_crypto_key { unsigned int data_unit_size_bits; unsigned int size; unsigned int hash; + bool is_hw_wrapped; u8 raw[BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE]; }; diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 485cee0b92dd..913b367d42bd 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -18,6 +18,7 @@ bool blk_crypto_endio(struct bio *bio); int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size); -- GitLab From 23e670a1b851251c0144722352e649e61e29ed8e Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 6 Feb 2020 18:01:29 -0800 Subject: [PATCH 0040/1278] ANDROID: dm: add support for passing through derive_raw_secret Update the device-mapper core to support exposing the inline crypto support of wrapped keys through the device-mapper device. derive_raw_secret in keyslot manager is used to derive the software raw secret from the given wrapped keyblob using the underlying blk device. Given that the raw_secret is the same for a given wrapped keyblob the call exits when the first underlying blk-device suceeds. Bug: 147209885 Test: Validated FBE with wrappedkey_v0 when /data is mounted on a dm device. Change-Id: Ia49ed61613607f8b82f2be0615e5b6d2f7533859 Signed-off-by: Barani Muthukumaran --- drivers/md/dm.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0271ca072453..c41c2eddba31 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2088,8 +2088,79 @@ static int dm_keyslot_evict(struct keyslot_manager *ksm, return args.err; } +struct dm_derive_raw_secret_args { + const u8 *wrapped_key; + unsigned int wrapped_key_size; + u8 *secret; + unsigned int secret_size; + int err; +}; + +static int dm_derive_raw_secret_callback(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct dm_derive_raw_secret_args *args = data; + struct request_queue *q = dev->bdev->bd_queue; + + if (!args->err) + return 0; + + if (!q->ksm) { + args->err = -EOPNOTSUPP; + return 0; + } + + args->err = keyslot_manager_derive_raw_secret(q->ksm, args->wrapped_key, + args->wrapped_key_size, + args->secret, + args->secret_size); + /* Try another device in case this fails. */ + return 0; +} + +/* + * Retrieve the raw_secret from the underlying device. Given that + * only only one raw_secret can exist for a particular wrappedkey, + * retrieve it only from the first device that supports derive_raw_secret() + */ +static int dm_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size) +{ + struct mapped_device *md = keyslot_manager_private(ksm); + struct dm_derive_raw_secret_args args = { + .wrapped_key = wrapped_key, + .wrapped_key_size = wrapped_key_size, + .secret = secret, + .secret_size = secret_size, + .err = -EOPNOTSUPP, + }; + struct dm_table *t; + int srcu_idx; + int i; + struct dm_target *ti; + + t = dm_get_live_table(md, &srcu_idx); + if (!t) + return -EOPNOTSUPP; + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, dm_derive_raw_secret_callback, + &args); + if (!args.err) + break; + } + dm_put_live_table(md, srcu_idx); + return args.err; +} + static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { .keyslot_evict = dm_keyslot_evict, + .derive_raw_secret = dm_derive_raw_secret, }; static int dm_init_inline_encryption(struct mapped_device *md) -- GitLab From 06ab740983752ed19ad995b77409b17b3b56db11 Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 6 Feb 2020 18:01:33 -0800 Subject: [PATCH 0041/1278] ANDROID: dm: Add wrapped key support in dm-default-key To prevent keys from being compromised if an attacker acquires read access to kernel memory, some inline encryption hardware supports protecting the keys in hardware without software having access to or the ability to set the plaintext keys. Instead, software only sees "wrapped keys", which may differ on every boot. The keys can be initially generated either by software (in which case they need to be imported to hardware to be wrapped), or directly by the hardware. Add support for this type of hardware by allowing keys to be flagged as hardware-wrapped. When used, dm-default-key will pass the wrapped key to the inline encryption hardware to encryption metadata. The hardware will internally unwrap the key and derive the metadata encryption key. Bug: 147209885 Test: Validate metadata encryption & FBE with wrapped keys. Change-Id: I8078b116dab9e04d7f3f15f29f11823185ea5d50 Signed-off-by: Barani Muthukumaran --- drivers/md/dm-default-key.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c index e23ceb1390c8..b3da6afa6927 100644 --- a/drivers/md/dm-default-key.c +++ b/drivers/md/dm-default-key.c @@ -9,7 +9,7 @@ #define DM_MSG_PREFIX "default-key" -#define DM_DEFAULT_KEY_MAX_KEY_SIZE 64 +#define DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE 128 #define SECTOR_SIZE (1 << SECTOR_SHIFT) @@ -49,6 +49,7 @@ struct default_key_c { unsigned int sector_size; unsigned int sector_bits; struct blk_crypto_key key; + bool is_hw_wrapped; }; static const struct dm_default_key_cipher * @@ -84,7 +85,7 @@ static int default_key_ctr_optional(struct dm_target *ti, struct default_key_c *dkc = ti->private; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 3, "Invalid number of feature args"}, + {0, 4, "Invalid number of feature args"}, }; unsigned int opt_params; const char *opt_string; @@ -117,6 +118,8 @@ static int default_key_ctr_optional(struct dm_target *ti, } } else if (!strcmp(opt_string, "iv_large_sectors")) { iv_large_sectors = true; + } else if (!strcmp(opt_string, "wrappedkey_v0")) { + dkc->is_hw_wrapped = true; } else { ti->error = "Invalid feature arguments"; return -EINVAL; @@ -144,7 +147,8 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct default_key_c *dkc; const struct dm_default_key_cipher *cipher; - u8 raw_key[DM_DEFAULT_KEY_MAX_KEY_SIZE]; + u8 raw_key[DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE]; + unsigned int raw_key_size; unsigned long long tmpll; char dummy; int err; @@ -176,12 +180,15 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* */ - if (strlen(argv[1]) != 2 * cipher->key_size) { - ti->error = "Incorrect key size for cipher"; + raw_key_size = strlen(argv[1]); + if (raw_key_size > 2 * DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE || + raw_key_size % 2) { + ti->error = "Invalid keysize"; err = -EINVAL; goto bad; } - if (hex2bin(raw_key, argv[1], cipher->key_size) != 0) { + raw_key_size /= 2; + if (hex2bin(raw_key, argv[1], raw_key_size) != 0) { ti->error = "Malformed key string"; err = -EINVAL; goto bad; @@ -226,7 +233,8 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, - false, cipher->mode_num, dkc->sector_size); + dkc->is_hw_wrapped, cipher->mode_num, + dkc->sector_size); if (err) { ti->error = "Error initializing blk-crypto key"; goto bad; @@ -319,6 +327,8 @@ static void default_key_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; if (dkc->sector_size != SECTOR_SIZE) num_feature_args += 2; + if (dkc->is_hw_wrapped) + num_feature_args += 1; if (num_feature_args != 0) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) @@ -327,6 +337,8 @@ static void default_key_status(struct dm_target *ti, status_type_t type, DMEMIT(" sector_size:%u", dkc->sector_size); DMEMIT(" iv_large_sectors"); } + if (dkc->is_hw_wrapped) + DMEMIT(" wrappedkey_v0"); } break; } @@ -372,7 +384,7 @@ static void default_key_io_hints(struct dm_target *ti, static struct target_type default_key_target = { .name = "default-key", - .version = {2, 0, 0}, + .version = {2, 1, 0}, .module = THIS_MODULE, .ctr = default_key_ctr, .dtr = default_key_dtr, -- GitLab From 9ec56ec3132b38ba351b3f9db7b70b269022834e Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Thu, 27 Feb 2020 12:52:46 -0800 Subject: [PATCH 0042/1278] ANDROID: gki_defconfig: Enable CONFIG_RD_LZ4 Support future decompression of LZ4-compressed ramdisk images. This support is in addition to the default support for gzip. Bug: 150391496 Test: TreeHugger Change-Id: I8c07e9363333c53cd6ff9c7e16fc42cbe318fe83 Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index c1b2fc17bdbb..0b9fbd2d3d14 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -29,7 +29,6 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set CONFIG_SGETMASK_SYSCALL=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_KALLSYMS_ALL=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index b18d574c8143..fbe017c1f5cf 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -27,7 +27,10 @@ CONFIG_NAMESPACES=y # CONFIG_PID_NS is not set CONFIG_SCHED_TUNE=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_RD_LZ4 is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set CONFIG_KALLSYMS_ALL=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_BPF_SYSCALL=y -- GitLab From 9a29b43c06d3b114d351e115ab00f76f628fefe9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 13 Aug 2018 09:56:43 +0800 Subject: [PATCH 0043/1278] ANDROID: sdcardfs: fix -ENOENT lookup race issue The negative lower dentry created by vfs_path_lookup could be reclaimed between vfs_path_lookup and d_hash_and_lookup. Therefore, it is unsafe to just lookup dcache again for the negative dentry cases. Without this patch, users could occasionally get trapped into `failed to create' under memory pressure. So here is a workaround to hack it and in my opinion sdcardfs should be refactored to close all races in the long term as pointed out in the code comment of this commit. Test: (Thread 1) while true; do echo 3 > /proc/sys/vm/drop_caches done (Thread 2) i=0 while true; do echo 123 > /sdcard/$i i=$((i+1)) done Bug: 63872684 Cc: Daniel Rosenberg Cc: Miao Xie Cc: Chao Yu Change-Id: Ic033e1f84a8b271c1f48010f4e1f189982bbbea2 Signed-off-by: Gao Xiang Signed-off-by: Daniel Rosenberg (cherry picked from commit bd77267426ed5ffe6a25aa77c149cde28f479f95) --- fs/sdcardfs/inode.c | 3 +++ fs/sdcardfs/lookup.c | 34 ++++++++++++++++------------------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4dd681e0d59d..edeca118cce5 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -87,6 +87,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, lower_dentry_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); + if (d_is_positive(lower_dentry)) + return -EEXIST; + /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 73179ce2591f..886aee279920 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -257,7 +257,6 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *lower_dentry; const struct qstr *name; struct path lower_path; - struct qstr dname; struct dentry *ret_dentry = NULL; struct sdcardfs_sb_info *sbi; @@ -316,6 +315,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* no error: handle positive dentries */ if (!err) { +found: /* check if the dentry is an obb dentry * if true, the lower_inode must be replaced with * the inode of the graft path @@ -362,28 +362,26 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, if (err && err != -ENOENT) goto out; - /* instatiate a new negative dentry */ - dname.name = name->name; - dname.len = name->len; - - /* See if the low-level filesystem might want - * to use its own hash - */ - lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname); - if (IS_ERR(lower_dentry)) - return lower_dentry; - - if (!lower_dentry) { - /* We called vfs_path_lookup earlier, and did not get a negative - * dentry then. Don't confuse the lower filesystem by forcing - * one on it now... - */ - err = -ENOENT; + /* get a (very likely) new negative dentry */ + lower_dentry = lookup_one_len_unlocked(name->name, + lower_dir_dentry, name->len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); goto out; } lower_path.dentry = lower_dentry; lower_path.mnt = mntget(lower_dir_mnt); + + /* + * Check if someone sneakily filled in the dentry when + * we weren't looking. We'll check again in create. + */ + if (unlikely(d_inode_rcu(lower_dentry))) { + err = 0; + goto found; + } + sdcardfs_set_lower_path(dentry, &lower_path); /* -- GitLab From 699d26b883139a817426581a76b4edd2fb610616 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 27 Feb 2020 16:29:08 -0800 Subject: [PATCH 0044/1278] ANDROID: cuttlefish_defconfig: Add CONFIG_UNICODE This adds support for unicode to cuttlefish_defconfig Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Change-Id: I177d0b301087733234de3d6d283af946213eebca --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 0b9fbd2d3d14..8b5ebe05fe9f 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -449,6 +449,7 @@ CONFIG_SDCARD_FS=y CONFIG_PSTORE=y CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_RAM=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index fbe017c1f5cf..43c457eb9446 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -474,6 +474,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -- GitLab From 045b7dadf1d6680f9b9b2190b91a513896b9ef8e Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 5 Feb 2020 16:40:52 -0800 Subject: [PATCH 0045/1278] FROMLIST: unicode: Add utf8_casefold_hash This adds a case insensitive hash function to allow taking the hash without needing to allocate a casefolded copy of the string. Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I43c7d38a8e22f4479397f35e6343bd326901cdba --- fs/unicode/utf8-core.c | 23 ++++++++++++++++++++++- include/linux/unicode.h | 3 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 71ca4d047d65..d18789f27650 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "utf8n.h" @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, } return -EINVAL; } - EXPORT_SYMBOL(utf8_casefold); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur; + int c; + unsigned long hash = init_name_hash(salt); + + if (utf8ncursor(&cur, data, str->name, str->len) < 0) + return -EINVAL; + + while ((c = utf8byte(&cur))) { + if (c < 0) + return c; + hash = partial_name_hash((unsigned char)c, hash); + } + str->hash = end_name_hash(hash); + return 0; +} +EXPORT_SYMBOL(utf8_casefold_hash); + int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d8049..74484d44c755 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); -- GitLab From 4200d6e3d17f4f3d59b55f78ea3b95d5fe362473 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 22 Jan 2020 20:05:18 -0800 Subject: [PATCH 0046/1278] FROMLIST: Add standard casefolding support This adds general supporting functions for filesystems that use utf8 casefolding. It provides standard dentry_operations and adds the necessary structures in struct super_block to allow this standardization. Ext4 and F2fs are switch to these implementations. Signed-off-by: Daniel Rosenberg Note: Fixed issue with non-strictly enforced fallback hash Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I81b5fb5d3ce0259a60712ae2505c1e4b03dbafde --- fs/libfs.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 22 +++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index 49623301e5f0..f59b163c38ac 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,8 @@ #include #include #include /* sync_mapping_buffers */ +#include +#include #include @@ -1219,3 +1221,62 @@ bool is_empty_dir_inode(struct inode *inode) return (inode->i_fop == &empty_dir_operations) && (inode->i_op == &empty_dir_inode_operations); } + +#ifdef CONFIG_UNICODE +bool needs_casefold(const struct inode *dir) +{ + return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding && + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); +} +EXPORT_SYMBOL(needs_casefold); + +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); + const struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + struct qstr entry = QSTR_INIT(str, len); + int ret; + + if (!inode || !needs_casefold(inode)) + goto fallback; + + ret = utf8_strncasecmp(um, name, &entry); + if (ret >= 0) + return ret; + + if (sb_has_enc_strict_mode(sb)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); +} +EXPORT_SYMBOL(generic_ci_d_compare); + +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) +{ + const struct inode *inode = READ_ONCE(dentry->d_inode); + struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + int ret = 0; + + if (!inode || !needs_casefold(inode)) + return 0; + + ret = utf8_casefold_hash(um, dentry, str); + if (ret < 0) + goto err; + + return 0; +err: + if (sb_has_enc_strict_mode(sb)) + ret = -EINVAL; + else + ret = 0; + return ret; +} +EXPORT_SYMBOL(generic_ci_d_hash); +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 6495cb54f42c..828d4a55bf37 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1309,6 +1309,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_enc_strict_mode(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1377,6 +1383,10 @@ struct super_block { #endif struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; +#endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; @@ -3190,6 +3200,18 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNICODE +extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +extern bool needs_casefold(const struct inode *dir); +#else +static inline bool needs_casefold(const struct inode *dir) +{ + return 0; +} +#endif + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, -- GitLab From ed5f8d20bf3c6ae668e6e88e15b383adae0735d0 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 6 Feb 2020 14:41:31 -0800 Subject: [PATCH 0047/1278] FROMLIST: f2fs: Use generic casefolding support This switches f2fs over to the generic support provided in commit 65832afbeaaf ("fs: Add standard casefolding support") Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: Ib2aecfdf029a53d116929bf6c0fc4802585f4383 --- fs/f2fs/dir.c | 63 +++++++---------------------------------- fs/f2fs/f2fs.h | 4 --- fs/f2fs/hash.c | 2 +- fs/f2fs/super.c | 10 +++---- fs/f2fs/sysfs.c | 10 ++++--- include/linux/f2fs_fs.h | 3 -- 6 files changed, 22 insertions(+), 70 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 331c90556a0f..704d68c34130 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -114,8 +114,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -127,7 +127,7 @@ int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (f2fs_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -154,7 +154,7 @@ static void f2fs_fname_setup_ci_filename(struct inode *dir, if (!cf_name->name) return; - cf_name->len = utf8_casefold(sbi->s_encoding, + cf_name->len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, F2FS_NAME_LEN); if ((int)cf_name->len <= 0) { @@ -173,7 +173,7 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - struct f2fs_sb_info *sbi = F2FS_I_SB(parent); + struct super_block *sb = parent->i_sb; struct qstr entry; #endif @@ -184,7 +184,7 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, entry.name = d->filename[bit_pos]; entry.len = de->name_len; - if (sbi->s_encoding && IS_CASEFOLDED(parent)) { + if (sb->s_encoding && IS_CASEFOLDED(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; @@ -357,8 +357,8 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, int err; #ifdef CONFIG_UNICODE - if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && - utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { + if (sb_has_enc_strict_mode(dir->i_sb) && IS_CASEFOLDED(dir) && + utf8_validate(dir->i_sb->s_encoding, child)) { *res_page = ERR_PTR(-EINVAL); return NULL; } @@ -1079,51 +1079,8 @@ const struct file_operations f2fs_dir_operations = { }; #ifdef CONFIG_UNICODE -static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *inode = READ_ONCE(parent->d_inode); - - if (!inode || !IS_CASEFOLDED(inode)) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return f2fs_ci_compare(inode, name, &qstr, false); -} - -static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - const struct inode *inode = READ_ONCE(dentry->d_inode); - unsigned char *norm; - int len, ret = 0; - - if (!inode || !IS_CASEFOLDED(inode)) - return 0; - - norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (f2fs_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kvfree(norm); - return ret; -} - const struct dentry_operations f2fs_dentry_ops = { - .d_hash = f2fs_d_hash, - .d_compare = f2fs_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 57475e687e2f..3beb72a44ff9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1285,10 +1285,6 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5bc4dcd8fc03..28acb24e7a7a 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -110,7 +110,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr folded; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 705bd7a5abe1..7e1b9a6d1f90 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1230,7 +1230,7 @@ static void f2fs_put_super(struct super_block *sb) for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kvfree(sbi); } @@ -3293,7 +3293,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { #ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) { + if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3324,8 +3324,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sbi->sb->s_encoding = encoding; + sbi->sb->s_encoding_flags = encoding_flags; sbi->sb->s_d_op = &f2fs_dentry_ops; } #else @@ -3800,7 +3800,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif free_options: #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 227d3db5c989..a32b3a392fbd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -175,12 +175,14 @@ static ssize_t encoding_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { #ifdef CONFIG_UNICODE + struct super_block *sb = sbi->sb; + if (f2fs_sb_has_casefold(sbi)) return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", - sbi->s_encoding->charset, - (sbi->s_encoding->version >> 16) & 0xff, - (sbi->s_encoding->version >> 8) & 0xff, - sbi->s_encoding->version & 0xff); + sb->s_encoding->charset, + (sb->s_encoding->version >> 16) & 0xff, + (sb->s_encoding->version >> 8) & 0xff, + sb->s_encoding->version & 0xff); #endif return sprintf(buf, "(none)"); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac3f4888b3df..e8763a955f90 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -38,9 +38,6 @@ #define F2FS_MAX_QUOTAS 3 #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_ENC_STRICT_MODE_FL (1 << 0) -#define f2fs_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL) #define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ -- GitLab From d2cb77395ecdb80dbc77b1dd58634c2ac26868a9 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 6 Feb 2020 14:41:16 -0800 Subject: [PATCH 0048/1278] FROMLIST: ext4: Use generic casefolding support This switches ext4 over to the generic support provided in commit 65832afbeaaf ("fs: Add standard casefolding support") Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I3a0705278100590df4c7cdd0dcdf945e9f11feb7 --- fs/ext4/dir.c | 45 ++------------------------------------------- fs/ext4/ext4.h | 12 ------------ fs/ext4/hash.c | 2 +- fs/ext4/namei.c | 18 ++++++++---------- fs/ext4/super.c | 12 ++++++------ 5 files changed, 17 insertions(+), 72 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 97a3b29131bb..bb6ff465ae3d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -664,49 +664,8 @@ const struct file_operations ext4_dir_operations = { }; #ifdef CONFIG_UNICODE -static int ext4_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; - - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return ext4_ci_compare(inode, name, &qstr, false); -} - -static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) -{ - const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - unsigned char *norm; - int len, ret = 0; - - if (!IS_CASEFOLDED(dentry->d_inode) || !um) - return 0; - - norm = kmalloc(PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (ext4_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kfree(norm); - return ret; -} - const struct dentry_operations ext4_dentry_ops = { - .d_hash = ext4_d_hash, - .d_compare = ext4_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0b98ac498971..35dca3a91323 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1376,14 +1376,6 @@ struct ext4_super_block { #define EXT4_ENC_UTF8_12_1 1 -/* - * Flags for ext4_sb_info.s_encoding_flags. - */ -#define EXT4_ENC_STRICT_MODE_FL (1 << 0) - -#define ext4_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL) - /* * fourth extended-fs super-block data in memory */ @@ -1435,10 +1427,6 @@ struct ext4_sb_info { struct kobject s_kobj; struct completion s_kobj_unregister; struct super_block *s_sb; -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif /* Journaling */ struct journal_s *s_journal; diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index ed76a6d7a2d8..f3bc69b8d4e5 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -277,7 +277,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { #ifdef CONFIG_UNICODE - const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr qstr = {.name = name, .len = len }; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 42177a426ca9..57aff1ae37a4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1280,8 +1280,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) int ext4_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -1293,7 +1293,7 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (ext4_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -1310,7 +1310,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, { int len; - if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) { + if (!needs_casefold(dir)) { cf_name->name = NULL; return; } @@ -1319,7 +1319,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, if (!cf_name->name) return; - len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, EXT4_NAME_LEN); if (len <= 0) { @@ -1356,7 +1356,7 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (fname->cf_name.name) { struct qstr cf = {.name = fname->cf_name.name, .len = fname->cf_name.len}; @@ -2172,7 +2172,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; struct super_block *sb; - struct ext4_sb_info *sbi; struct ext4_filename fname; int retval; int dx_fallback=0; @@ -2184,14 +2183,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; - sbi = EXT4_SB(sb); blocksize = sb->s_blocksize; if (!dentry->d_name.len) return -EINVAL; #ifdef CONFIG_UNICODE - if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) && - sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name)) + if (sb_has_enc_strict_mode(sb) && IS_CASEFOLDED(dir) && + sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; #endif diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 25fe536638d0..bbbf4775d2f6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -989,7 +989,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kfree(sbi); } @@ -3815,7 +3815,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; #ifdef CONFIG_UNICODE - if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) { + if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3846,8 +3846,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sb->s_encoding = encoding; + sb->s_encoding_flags = encoding_flags; } #endif @@ -4470,7 +4470,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } #ifdef CONFIG_UNICODE - if (sbi->s_encoding) + if (sb->s_encoding) sb->s_d_op = &ext4_dentry_ops; #endif @@ -4654,7 +4654,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) crypto_free_shash(sbi->s_chksum_driver); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif #ifdef CONFIG_QUOTA -- GitLab From 207be96aba2472e1dd875bd192fe39b5b0d6a257 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 27 Jan 2020 20:04:40 -0800 Subject: [PATCH 0049/1278] FROMLIST: fscrypt: Have filesystems handle their d_ops This shifts the responsibility of setting up dentry operations from fscrypt to the individual filesystems, allowing them to have their own operations while still setting fscrypt's d_revalidate as appropriate. Also added helper function to libfs to unify ext4 and f2fs implementations. Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: Iaf77f8c5961ecf22e22478701ab0b7fe2025225d --- fs/crypto/fname.c | 7 ++---- fs/crypto/fscrypt_private.h | 1 - fs/crypto/hooks.c | 1 - fs/ext4/dir.c | 7 ------ fs/ext4/namei.c | 1 + fs/ext4/super.c | 5 ---- fs/f2fs/dir.c | 7 ------ fs/f2fs/f2fs.h | 3 --- fs/f2fs/namei.c | 1 + fs/f2fs/super.c | 1 - fs/libfs.c | 50 +++++++++++++++++++++++++++++++++++++ fs/ubifs/dir.c | 18 +++++++++++++ include/linux/fs.h | 2 ++ include/linux/fscrypt.h | 6 +++-- 14 files changed, 78 insertions(+), 32 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index f64d66e10129..63bfe5e8accd 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. */ -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; int err; @@ -583,7 +583,4 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return valid; } - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; +EXPORT_SYMBOL(fscrypt_d_revalidate); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index bc53caf70497..f78cc4dfb452 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -275,7 +275,6 @@ extern int fscrypt_fname_encrypt(const struct inode *inode, extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); -extern const struct dentry_operations fscrypt_d_ops; /* hkdf.c */ diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 4ca167017d67..a6396bf721ac 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -117,7 +117,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); - d_set_d_op(dentry, &fscrypt_d_ops); } return err; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index bb6ff465ae3d..9c68ffb4c2cf 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -662,10 +662,3 @@ const struct file_operations ext4_dir_operations = { .open = ext4_dir_open, .release = ext4_release_dir, }; - -#ifdef CONFIG_UNICODE -const struct dentry_operations ext4_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 57aff1ae37a4..87c0fe645337 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1607,6 +1607,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) return NULL; if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bbbf4775d2f6..a7ac63e921c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4469,11 +4469,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } -#ifdef CONFIG_UNICODE - if (sb->s_encoding) - sb->s_d_op = &ext4_dentry_ops; -#endif - sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 704d68c34130..2620132f3bad 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1077,10 +1077,3 @@ const struct file_operations f2fs_dir_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif }; - -#ifdef CONFIG_UNICODE -const struct dentry_operations f2fs_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3beb72a44ff9..23aa07c86499 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3626,9 +3626,6 @@ static inline void update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; -#ifdef CONFIG_UNICODE -extern const struct dentry_operations f2fs_dentry_ops; -#endif extern const struct file_operations f2fs_file_operations; extern const struct inode_operations f2fs_file_inode_operations; extern const struct address_space_operations f2fs_dblock_aops; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 76ba1e2fee48..198e77c15f59 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -494,6 +494,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = fscrypt_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) goto out_splice; if (err) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7e1b9a6d1f90..6aaefdc269a7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3326,7 +3326,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) sbi->sb->s_encoding = encoding; sbi->sb->s_encoding_flags = encoding_flags; - sbi->sb->s_d_op = &f2fs_dentry_ops; } #else if (f2fs_sb_has_casefold(sbi)) { diff --git a/fs/libfs.c b/fs/libfs.c index f59b163c38ac..27def8ba162d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1279,4 +1279,54 @@ int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) return ret; } EXPORT_SYMBOL(generic_ci_d_hash); + +static const struct dentry_operations generic_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; +#endif + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations generic_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION) +static const struct dentry_operations generic_encrypted_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +/** + * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry + * @dir: parent of dentry whose ops to set + * @dentry: detnry to set ops on + * + * This function sets the dentry ops for the given dentry to handle both + * casefolding and encryption of the dentry name. + */ +void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { +#ifdef CONFIG_UNICODE + if (dir->i_sb->s_encoding) { + d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); + return; + } #endif + d_set_d_op(dentry, &generic_encrypted_dentry_ops); + return; + } +#endif +#ifdef CONFIG_UNICODE + if (dir->i_sb->s_encoding) { + d_set_d_op(dentry, &generic_ci_dentry_ops); + return; + } +#endif +} +EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 3c806194942d..7d5c2cf95353 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -208,6 +208,7 @@ static int dbg_check_name(const struct ubifs_info *c, return 0; } +static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry); static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -221,6 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); err = fscrypt_prepare_lookup(dir, dentry, &nm); + ubifs_set_d_ops(dir, dentry); if (err == -ENOENT) return d_splice_alias(NULL, dentry); if (err) @@ -1684,3 +1686,19 @@ const struct file_operations ubifs_dir_operations = { .compat_ioctl = ubifs_compat_ioctl, #endif }; + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations ubifs_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { + d_set_d_op(dentry, &ubifs_encrypted_dentry_ops); + return; + } +#endif +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 828d4a55bf37..90d15f08f4dc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3211,6 +3211,8 @@ static inline bool needs_casefold(const struct inode *dir) return 0; } #endif +extern void generic_set_encrypted_ci_d_ops(struct inode *dir, + struct dentry *dentry); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 12b53d0f6961..db2dbc0b972a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -138,6 +138,7 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) } extern void fscrypt_free_bounce_page(struct page *bounce_page); +extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); @@ -670,8 +671,9 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * - * This also installs a custom ->d_revalidate() method which will invalidate the - * dentry if it was created without the key and the key is later added. + * After calling this function, a filesystem should ensure that it's dentry + * operations contain fscrypt_d_revalidate if DCACHE_ENCRYPTED_NAME was set, + * so that the dentry can be invalidated if the key is later added. * * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a * correctly formed encoded ciphertext name, so a negative dentry should be -- GitLab From 10d4512505762621bbd20bef4195681fee5a7c42 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 30 Sep 2019 12:53:51 -0700 Subject: [PATCH 0050/1278] FROMLIST: f2fs: Handle casefolding with Encryption This expands f2fs's casefolding support to include encrypted directories. For encrypted directories, we use the siphash of the casefolded name. This ensures there is no direct way to go from an unencrypted name to the stored hash on disk without knowledge of the encryption policy keys. Additionally, we switch to using the vfs layer's casefolding support instead of storing this information inside of f2fs's private data. Signed-off-by: Daniel Rosenberg Note: Fixed some missing type conversions, crypto length issue and hash check for ciphertext name Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I8f1e324472668e27d3e059cc80e4c981ce89dd9b --- fs/f2fs/dir.c | 65 +++++++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 11 +++----- fs/f2fs/hash.c | 25 +++++++++++++------ fs/f2fs/inline.c | 9 ++++--- fs/f2fs/super.c | 6 ----- 5 files changed, 67 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2620132f3bad..8f83bfa8ffc6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -108,34 +108,52 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, * Test whether a case-insensitive directory entry matches the filename * being searched for. * + * Only called for encrypted names if the key is available. + * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ -int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry, bool quick) +static int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, + u8 *de_name, size_t de_name_len, bool quick) { const struct super_block *sb = parent->i_sb; const struct unicode_map *um = sb->s_encoding; + struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); + struct qstr entry = QSTR_INIT(de_name, de_name_len); int ret; + if (IS_ENCRYPTED(parent)) { + const struct fscrypt_str encrypted_name = + FSTR_INIT(de_name, de_name_len); + + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!decrypted_name.name) + return -ENOMEM; + ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, + &decrypted_name); + if (ret < 0) + goto out; + entry.name = decrypted_name.name; + entry.len = decrypted_name.len; + } + if (quick) - ret = utf8_strncasecmp_folded(um, name, entry); + ret = utf8_strncasecmp_folded(um, name, &entry); else - ret = utf8_strncasecmp(um, name, entry); - + ret = utf8_strncasecmp(um, name, &entry); if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ if (sb_has_enc_strict_mode(sb)) - return -EINVAL; - - if (name->len != entry->len) - return 1; - - return !!memcmp(name->name, entry->name, name->len); + ret = -EINVAL; + else if (name->len != entry.len) + ret = 1; + else + ret = !!memcmp(name->name, entry.name, entry.len); } - +out: + kfree(decrypted_name.name); return ret; } @@ -173,24 +191,24 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - struct super_block *sb = parent->i_sb; - struct qstr entry; + u8 *name; + int len; #endif if (de->hash_code != namehash) return false; #ifdef CONFIG_UNICODE - entry.name = d->filename[bit_pos]; - entry.len = de->name_len; + name = d->filename[bit_pos]; + len = le16_to_cpu(de->name_len); - if (sb->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, &entry, true); + return !f2fs_ci_compare(parent, &cf, name, len, true); } - return !f2fs_ci_compare(parent, fname->usr_fname, &entry, + return !f2fs_ci_compare(parent, fname->usr_fname, name, len, false); } #endif @@ -616,13 +634,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, + f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; - f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -632,7 +650,6 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -718,17 +735,19 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; + f2fs_hash_t dentry_hash; int err = -EAGAIN; new_name.name = fname_name(fname); new_name.len = fname_len(fname); if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, + err = f2fs_add_inline_entry(dir, &new_name, fname, inode, ino, mode); + dentry_hash = f2fs_dentry_hash(dir, &new_name, fname); if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + dentry_hash, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 23aa07c86499..ad0ec1b7c141 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3075,11 +3075,6 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -extern int f2fs_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry, - bool quick); - /* * dir.c */ @@ -3113,7 +3108,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct qstr *orig_name, f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode); int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); @@ -3146,7 +3141,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); * hash.c */ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname); + const struct qstr *name_info, const struct fscrypt_name *fname); /* * node.c @@ -3656,7 +3651,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 28acb24e7a7a..8f7ee4362312 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -68,8 +68,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname) +static f2fs_hash_t __f2fs_dentry_hash(const struct inode *dir, + const struct qstr *name_info, + const struct fscrypt_name *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -79,12 +80,17 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, size_t len = name_info->len; /* encrypted bigname case */ - if (fname && !fname->disk_name.name) + if (fname && fname->is_ciphertext_name) return cpu_to_le32(fname->hash); if (is_dot_dotdot(name_info)) return 0; + if (IS_CASEFOLDED(dir) && IS_ENCRYPTED(dir)) { + f2fs_hash = cpu_to_le32(fscrypt_fname_siphash(dir, name_info)); + return f2fs_hash; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; @@ -106,7 +112,7 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, } f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname) + const struct qstr *name_info, const struct fscrypt_name *fname) { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); @@ -114,27 +120,30 @@ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, int r, dlen; unsigned char *buff; struct qstr folded; + const struct qstr *name = fname ? fname->usr_fname : name_info; if (!name_info->len || !IS_CASEFOLDED(dir)) goto opaque_seq; + if (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir)) + goto opaque_seq; + buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); if (!buff) return -ENOMEM; - - dlen = utf8_casefold(um, name_info, buff, PATH_MAX); + dlen = utf8_casefold(um, name, buff, PATH_MAX); if (dlen < 0) { kvfree(buff); goto opaque_seq; } folded.name = buff; folded.len = dlen; - r = __f2fs_dentry_hash(&folded, fname); + r = __f2fs_dentry_hash(dir, &folded, fname); kvfree(buff); return r; opaque_seq: #endif - return __f2fs_dentry_hash(name_info, fname); + return __f2fs_dentry_hash(dir, name_info, fname); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cabc7a69cee4..c68a32369f44 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -483,8 +483,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) ino = le32_to_cpu(de->ino); fake_mode = f2fs_get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, - ino, fake_mode); + err = f2fs_add_regular_entry(dir, &new_name, NULL, + de->hash_code, NULL, ino, fake_mode); if (err) goto punch_dentry_pages; @@ -596,7 +596,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -607,6 +607,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; + const struct qstr *orig_name = fname->usr_fname; int err = 0; ipage = f2fs_get_node_page(sbi, dir->i_ino); @@ -637,7 +638,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(dir, new_name, NULL); + name_hash = f2fs_dentry_hash(dir, new_name, fname); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6aaefdc269a7..e67f2a86105f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3298,12 +3298,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) struct unicode_map *encoding; __u16 encoding_flags; - if (f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, - "Can't mount with encoding and encryption"); - return -EINVAL; - } - if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info, &encoding_flags)) { f2fs_err(sbi, -- GitLab From 4195d64396b7e3ddd90942a19e20c08041b21614 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 12 Sep 2019 18:34:52 +0800 Subject: [PATCH 0051/1278] UPSTREAM: sched/psi: Correct overly pessimistic size calculation When passing a equal or more then 32 bytes long string to psi_write(), psi_write() copies 31 bytes to its buf and overwrites buf[30] with '\0'. Which makes the input string 1 byte shorter than it should be. Fix it by copying sizeof(buf) bytes when nbytes >= sizeof(buf). This does not cause problems in normal use case like: "some 500000 10000000" or "full 500000 10000000" because they are less than 32 bytes in length. /* assuming nbytes == 35 */ char buf[32]; buf_size = min(nbytes, (sizeof(buf) - 1)); /* buf_size = 31 */ if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; buf[buf_size - 1] = '\0'; /* buf[30] = '\0' */ Before: %cd /proc/pressure/ %echo "123456789|123456789|123456789|1234" > memory [ 22.473497] nbytes=35,buf_size=31 [ 22.473775] 123456789|123456789|123456789| (print 30 chars) %sh: write error: Invalid argument %echo "123456789|123456789|123456789|1" > memory [ 64.916162] nbytes=32,buf_size=31 [ 64.916331] 123456789|123456789|123456789| (print 30 chars) %sh: write error: Invalid argument After: %cd /proc/pressure/ %echo "123456789|123456789|123456789|1234" > memory [ 254.837863] nbytes=35,buf_size=32 [ 254.838541] 123456789|123456789|123456789|1 (print 31 chars) %sh: write error: Invalid argument %echo "123456789|123456789|123456789|1" > memory [ 9965.714935] nbytes=32,buf_size=32 [ 9965.715096] 123456789|123456789|123456789|1 (print 31 chars) %sh: write error: Invalid argument Also remove the superfluous parentheses. Signed-off-by: Miles Chen Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190912103452.13281-1-miles.chen@mediatek.com Signed-off-by: Ingo Molnar (cherry picked from commit 4adcdcea717cb2d8436bef00dd689aa5bc76f11b) Signed-off-by: Suren Baghdasaryan Change-Id: I9371b4d5e465bb8b84ff7adf5f40f30696c6ff70 --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 3f85da02be4e..457eae0b39c5 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1199,7 +1199,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; - buf_size = min(nbytes, (sizeof(buf) - 1)); + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; -- GitLab From 8f19a0ef6a3e5f5af96e0435266a4ba5054cde61 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 3 Dec 2019 13:35:23 -0500 Subject: [PATCH 0052/1278] UPSTREAM: sched/psi: Fix sampling error and rare div0 crashes with cgroups and high uptime Jingfeng reports rare div0 crashes in psi on systems with some uptime: [58914.066423] divide error: 0000 [#1] SMP [58914.070416] Modules linked in: ipmi_poweroff ipmi_watchdog toa overlay fuse tcp_diag inet_diag binfmt_misc aisqos(O) aisqos_hotfixes(O) [58914.083158] CPU: 94 PID: 140364 Comm: kworker/94:2 Tainted: G W OE K 4.9.151-015.ali3000.alios7.x86_64 #1 [58914.093722] Hardware name: Alibaba Alibaba Cloud ECS/Alibaba Cloud ECS, BIOS 3.23.34 02/14/2019 [58914.102728] Workqueue: events psi_update_work [58914.107258] task: ffff8879da83c280 task.stack: ffffc90059dcc000 [58914.113336] RIP: 0010:[] [] psi_update_stats+0x1c1/0x330 [58914.122183] RSP: 0018:ffffc90059dcfd60 EFLAGS: 00010246 [58914.127650] RAX: 0000000000000000 RBX: ffff8858fe98be50 RCX: 000000007744d640 [58914.134947] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00003594f700648e [58914.142243] RBP: ffffc90059dcfdf8 R08: 0000359500000000 R09: 0000000000000000 [58914.149538] R10: 0000000000000000 R11: 0000000000000000 R12: 0000359500000000 [58914.156837] R13: 0000000000000000 R14: 0000000000000000 R15: ffff8858fe98bd78 [58914.164136] FS: 0000000000000000(0000) GS:ffff887f7f380000(0000) knlGS:0000000000000000 [58914.172529] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [58914.178467] CR2: 00007f2240452090 CR3: 0000005d5d258000 CR4: 00000000007606f0 [58914.185765] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [58914.193061] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [58914.200360] PKRU: 55555554 [58914.203221] Stack: [58914.205383] ffff8858fe98bd48 00000000000002f0 0000002e81036d09 ffffc90059dcfde8 [58914.213168] ffff8858fe98bec8 0000000000000000 0000000000000000 0000000000000000 [58914.220951] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [58914.228734] Call Trace: [58914.231337] [] psi_update_work+0x22/0x60 [58914.237067] [] process_one_work+0x189/0x420 [58914.243063] [] worker_thread+0x4e/0x4b0 [58914.248701] [] ? process_one_work+0x420/0x420 [58914.254869] [] kthread+0xe6/0x100 [58914.259994] [] ? kthread_park+0x60/0x60 [58914.265640] [] ret_from_fork+0x39/0x50 [58914.271193] Code: 41 29 c3 4d 39 dc 4d 0f 42 dc <49> f7 f1 48 8b 13 48 89 c7 48 c1 [58914.279691] RIP [] psi_update_stats+0x1c1/0x330 The crashing instruction is trying to divide the observed stall time by the sampling period. The period, stored in R8, is not 0, but we are dividing by the lower 32 bits only, which are all 0 in this instance. We could switch to a 64-bit division, but the period shouldn't be that big in the first place. It's the time between the last update and the next scheduled one, and so should always be around 2s and comfortably fit into 32 bits. The bug is in the initialization of new cgroups: we schedule the first sampling event in a cgroup as an offset of sched_clock(), but fail to initialize the last_update timestamp, and it defaults to 0. That results in a bogusly large sampling period the first time we run the sampling code, and consequently we underreport pressure for the first 2s of a cgroup's life. But worse, if sched_clock() is sufficiently advanced on the system, and the user gets unlucky, the period's lower 32 bits can all be 0 and the sampling division will crash. Fix this by initializing the last update timestamp to the creation time of the cgroup, thus correctly marking the start of the first pressure sampling period in a new cgroup. Reported-by: Jingfeng Xie Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Cc: Suren Baghdasaryan Link: https://lkml.kernel.org/r/20191203183524.41378-2-hannes@cmpxchg.org Signed-off-by: Sasha Levin (cherry picked from commit 3dfbe25c27eab7c90c8a7e97b4c354a9d24dd985) Signed-off-by: Suren Baghdasaryan Change-Id: Iaada5c2f1a03cf38cbb053adde478f762ce40843 --- kernel/sched/psi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 457eae0b39c5..bb66cadba361 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -186,7 +186,8 @@ static void group_init(struct psi_group *group) for_each_possible_cpu(cpu) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); - group->avg_next_update = sched_clock() + psi_period; + group->avg_last_update = sched_clock(); + group->avg_next_update = group->avg_last_update + psi_period; INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); mutex_init(&group->avgs_lock); /* Init trigger-related members */ -- GitLab From f2920d064c055152e8775c786f3010bd5ec3322e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 3 Dec 2019 13:35:24 -0500 Subject: [PATCH 0053/1278] UPSTREAM: psi: Fix a division error in psi poll() The psi window size is a u64 an can be up to 10 seconds right now, which exceeds the lower 32 bits of the variable. We currently use div_u64 for it, which is meant only for 32-bit divisors. The result is garbage pressure sampling values and even potential div0 crashes. Use div64_u64. Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Suren Baghdasaryan Cc: Jingfeng Xie Link: https://lkml.kernel.org/r/20191203183524.41378-3-hannes@cmpxchg.org Signed-off-by: Sasha Levin (cherry picked from commit c3466952ca1514158d7c16c9cfc48c27d5c5dc0f) Signed-off-by: Suren Baghdasaryan Change-Id: I49fdfd55751d1a2cde19666624c9c5d76dc78dad --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index bb66cadba361..1043170f04ed 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -483,7 +483,7 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) u32 remaining; remaining = win->size - elapsed; - growth += div_u64(win->prev_growth * remaining, win->size); + growth += div64_u64(win->prev_growth * remaining, win->size); } return growth; -- GitLab From ec932f3932e53f4cf5eceef1e88071a325655a2d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 3 Feb 2020 13:22:16 -0800 Subject: [PATCH 0054/1278] UPSTREAM: sched/psi: Fix OOB write when writing 0 bytes to PSI files Issuing write() with count parameter set to 0 on any file under /proc/pressure/ will cause an OOB write because of the access to buf[buf_size-1] when NUL-termination is performed. Fix this by checking for buf_size to be non-zero. Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20200203212216.7076-1-surenb@google.com (cherry picked from commit 6fcca0fa48118e6d63733eb4644c6cd880c15b8f) Bug: 148159562 Signed-off-by: Suren Baghdasaryan Change-Id: I9ec7acfc6e1083c677a95b0ea1c559ab50152873 --- kernel/sched/psi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 1043170f04ed..bc4b72527b02 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1200,6 +1200,9 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; + if (!nbytes) + return -EINVAL; + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; -- GitLab From 104b89080f2fad90fc67ab108d9a826015e28d73 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Jul 2016 11:04:26 +0200 Subject: [PATCH 0055/1278] KVM: x86: emulate RDPID commit fb6d4d340e0532032c808a9933eaaa7b8de435ab upstream. This is encoded as F3 0F C7 /7 with a register argument. The register argument is the second array in the group9 GroupDual, while F3 is the fourth element of a Prefix. Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 7 ++++++- arch/x86/kvm/emulate.c | 22 +++++++++++++++++++++- arch/x86/kvm/vmx.c | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 38959b173a42..1152afad524f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, { switch (func) { case 0: - entry->eax = 1; /* only one leaf currently */ + entry->eax = 7; ++*nent; break; case 1: entry->ecx = F(MOVBE); ++*nent; break; + case 7: + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (index == 0) + entry->ecx = F(RDPID); + ++*nent; default: break; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 041b9b05fae1..70f3636aff11 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3539,6 +3539,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_rdpid(struct x86_emulate_ctxt *ctxt) +{ + u64 tsc_aux = 0; + + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + return emulate_gp(ctxt, 0); + ctxt->dst.val = tsc_aux; + return X86EMUL_CONTINUE; +} + static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { u64 tsc = 0; @@ -4431,10 +4441,20 @@ static const struct opcode group8[] = { F(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; +/* + * The "memory" destination is actually always a register, since we come + * from the register case of group9. + */ +static const struct gprefix pfx_0f_c7_7 = { + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), +}; + + static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { - N, N, N, N, N, N, N, N, + N, N, N, N, N, N, N, + GP(0, &pfx_0f_c7_7), } }; static const struct opcode group11[] = { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 809d1b031fd9..4790994854bb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12322,6 +12322,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ + if (info->intercept == x86_intercept_rdtscp && + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + + /* TODO: check more intercepts... */ return X86EMUL_CONTINUE; } -- GitLab From f16be63f67450e6964cd90878d875f1b9eb2be6e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 18 Feb 2020 18:12:41 +0000 Subject: [PATCH 0056/1278] iommu/qcom: Fix bogus detach logic commit faf305c51aeabd1ea2d7131e798ef5f55f4a7750 upstream. Currently, the implementation of qcom_iommu_domain_free() is guaranteed to do one of two things: WARN() and leak everything, or dereference NULL and crash. That alone is terrible, but in fact the whole idea of trying to track the liveness of a domain via the qcom_domain->iommu pointer as a sanity check is full of fundamentally flawed assumptions. Make things robust and actually functional by not trying to be quite so clever. Reported-by: Brian Masney Tested-by: Brian Masney Reported-by: Naresh Kamboju Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Signed-off-by: Robin Murphy Tested-by: Stephan Gerhold Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/qcom_iommu.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c8a587d034b0..b08002851e06 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -327,21 +327,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ - return; - iommu_put_dma_cookie(domain); - /* NOTE: unmap can be called after client device is powered off, - * for example, with GPUs or anything involving dma-buf. So we - * cannot rely on the device_link. Make sure the IOMMU is on to - * avoid unclocked accesses in the TLB inv path: - */ - pm_runtime_get_sync(qcom_domain->iommu->dev); - - free_io_pgtable_ops(qcom_domain->pgtbl_ops); - - pm_runtime_put_sync(qcom_domain->iommu->dev); + if (qcom_domain->iommu) { + /* + * NOTE: unmap can be called after client device is powered + * off, for example, with GPUs or anything involving dma-buf. + * So we cannot rely on the device_link. Make sure the IOMMU + * is on to avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + pm_runtime_put_sync(qcom_domain->iommu->dev); + } kfree(qcom_domain); } @@ -386,7 +384,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; - if (!qcom_domain->iommu) + if (WARN_ON(!qcom_domain->iommu)) return; pm_runtime_get_sync(qcom_iommu->dev); @@ -397,8 +395,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); } pm_runtime_put_sync(qcom_iommu->dev); - - qcom_domain->iommu = NULL; } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, -- GitLab From feb0b94a36b3aa610841d13017bc8c64148a01e0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 10:14:09 +0100 Subject: [PATCH 0057/1278] ALSA: hda: Use scnprintf() for printing texts for sysfs/procfs commit 44eeb081b8630bb3ad3cd381d1ae1831463e48bb upstream. Some code in HD-audio driver calls snprintf() in a loop and still expects that the return value were actually written size, while snprintf() returns the expected would-be length instead. When the given buffer limit were small, this leads to a buffer overflow. Use scnprintf() for addressing those issues. It returns the actually written size unlike snprintf(). Cc: Link: https://lore.kernel.org/r/20200218091409.27162-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdmi_chmap.c | 2 +- sound/pci/hda/hda_codec.c | 2 +- sound/pci/hda/hda_eld.c | 2 +- sound/pci/hda/hda_sysfs.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index f21633cd9b38..acbe61b8db7b 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) { if (spk_alloc & (1 << i)) - j += snprintf(buf + j, buflen - j, " %s", + j += scnprintf(buf + j, buflen - j, " %s", cea_speaker_allocation_names[i]); } buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e3f3351da480..a6f7561e7bb9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4002,7 +4002,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++) if (pcm & (AC_SUPPCM_BITS_8 << i)) - j += snprintf(buf + j, buflen - j, " %d", bits[i]); + j += scnprintf(buf + j, buflen - j, " %d", bits[i]); buf[j] = '\0'; /* necessary when j == 0 */ } diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index ba7fe9b6655c..864cc8c9ada0 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++) if (pcm & (1 << i)) - j += snprintf(buf + j, buflen - j, " %d", + j += scnprintf(buf + j, buflen - j, " %d", alsa_rates[i]); buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 9b7efece4484..2a173de7ca02 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->init_verbs.used; i++) { struct hda_verb *v = snd_array_elem(&codec->init_verbs, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "0x%02x 0x%03x 0x%04x\n", v->nid, v->verb, v->param); } @@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->hints.used; i++) { struct hda_hint *hint = snd_array_elem(&codec->hints, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s = %s\n", hint->key, hint->val); } mutex_unlock(&codec->user_mutex); -- GitLab From 0b3dc89884f3df90b5c85edc6812d0033780e1f6 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 17 Feb 2020 00:42:22 -0600 Subject: [PATCH 0058/1278] ASoC: sun8i-codec: Fix setting DAI data format commit 96781fd941b39e1f78098009344ebcd7af861c67 upstream. Use the correct mask for this two-bit field. This fixes setting the DAI data format to RIGHT_J or DSP_A. Fixes: 36c684936fae ("ASoC: Add sun8i digital audio codec") Signed-off-by: Samuel Holland Acked-by: Chen-Yu Tsai Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200217064250.15516-7-samuel@sholland.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sunxi/sun8i-codec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 7a312168f864..a031f25031b4 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -71,6 +71,7 @@ #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12) #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8) +#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2) #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4) #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6) #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9) @@ -221,7 +222,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL, - BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT), + SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK, value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT); return 0; -- GitLab From 344ea2525869377c7906ff53079ba1d4c388bc60 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 00:16:40 -0500 Subject: [PATCH 0059/1278] ecryptfs: fix a memory leak bug in parse_tag_1_packet() commit fe2e082f5da5b4a0a92ae32978f81507ef37ec66 upstream. In parse_tag_1_packet(), if tag 1 packet contains a key larger than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES, no cleanup is executed, leading to a memory leak on the allocated 'auth_tok_list_item'. To fix this issue, go to the label 'out_free' to perform the cleanup work. Cc: stable@vger.kernel.org Fixes: dddfa461fc89 ("[PATCH] eCryptfs: Public key; packet management") Signed-off-by: Wenwen Wang Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/keystore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa218cd64f74..3f3ec50bf773 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n", ECRYPTFS_TAG_1_PACKET_TYPE); rc = -EINVAL; - goto out; + goto out_free; } /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ -- GitLab From 0db1fe06b692f5d18b6815d8f0e87b05d167ff21 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 00:33:54 -0500 Subject: [PATCH 0060/1278] ecryptfs: fix a memory leak bug in ecryptfs_init_messaging() commit b4a81b87a4cfe2bb26a4a943b748d96a43ef20e8 upstream. In ecryptfs_init_messaging(), if the allocation for 'ecryptfs_msg_ctx_arr' fails, the previously allocated 'ecryptfs_daemon_hash' is not deallocated, leading to a memory leak bug. To fix this issue, free 'ecryptfs_daemon_hash' before returning the error. Cc: stable@vger.kernel.org Fixes: 88b4a07e6610 ("[PATCH] eCryptfs: Public key transport mechanism") Signed-off-by: Wenwen Wang Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/messaging.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 4f457d5c4933..26464f9d9b76 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void) * ecryptfs_message_buf_len), GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { + kfree(ecryptfs_daemon_hash); rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; -- GitLab From a16888a6ad3f3f72634f02f07b97b993f5e6338d Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 13 Feb 2020 16:59:15 -0800 Subject: [PATCH 0061/1278] Input: synaptics - switch T470s to RMI4 by default commit bf502391353b928e63096127e5fd8482080203f5 upstream. This supports RMI4 and everything seems to work, including the touchpad buttons. So, let's enable this by default. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200204194322.112638-1-lyude@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 111a71190547..82456816c472 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,6 +172,7 @@ static const char * const smbus_pnp_ids[] = { "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ + "LEN006c", /* T470s */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ -- GitLab From 659cda0d2beacb821dc84ebbd224667e02565544 Mon Sep 17 00:00:00 2001 From: Gaurav Agrawal Date: Thu, 13 Feb 2020 17:06:10 -0800 Subject: [PATCH 0062/1278] Input: synaptics - enable SMBus on ThinkPad L470 commit b8a3d819f872e0a3a0a6db0dbbcd48071042fb98 upstream. Add touchpad LEN2044 to the list, as it is capable of working with psmouse.synaptics_intertouch=1 Signed-off-by: Gaurav Agrawal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/CADdtggVzVJq5gGNmFhKSz2MBwjTpdN5YVOdr4D3Hkkv=KZRc9g@mail.gmail.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 82456816c472..512709da9bbe 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -183,6 +183,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0097", /* X280 -> ALPS trackpoint */ "LEN009b", /* T580 */ "LEN200f", /* T450s */ + "LEN2044", /* L470 */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ -- GitLab From c73c7fd542088a1b70f7f23795c4eaf2d5ef9462 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 13 Feb 2020 17:07:47 -0800 Subject: [PATCH 0063/1278] Input: synaptics - remove the LEN0049 dmi id from topbuttonpad list commit 5179a9dfa9440c1781816e2c9a183d1d2512dc61 upstream. The Yoga 11e is using LEN0049, but it doesn't have a trackstick. Thus, there is no need to create a software top buttons row. However, it seems that the device works under SMBus, so keep it as part of the smbus_pnp_ids. Signed-off-by: Benjamin Tissoires Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200115013023.9710-1-benjamin.tissoires@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 512709da9bbe..5f764e0993a4 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -149,7 +149,6 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0042", /* Yoga */ "LEN0045", "LEN0047", - "LEN0049", "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ @@ -169,6 +168,7 @@ static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ + "LEN0049", /* Yoga 11e */ "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ -- GitLab From 333f517d48fa54aed52776bb0b881124a8fb0e58 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Feb 2020 11:22:35 -0500 Subject: [PATCH 0064/1278] ALSA: usb-audio: Apply sample rate quirk for Audioengine D1 commit 93f9d1a4ac5930654c17412e3911b46ece73755a upstream. The Audioengine D1 (0x2912:0x30c8) does support reading the sample rate, but it returns the rate in byte-reversed order. When setting sampling rate, the driver produces these warning messages: [168840.944226] usb 3-2.2: current rate 4500480 is different from the runtime rate 44100 [168854.930414] usb 3-2.2: current rate 8436480 is different from the runtime rate 48000 [168905.185825] usb 3-2.1.2: current rate 30465 is different from the runtime rate 96000 As can be seen from the hexadecimal conversion, the current rate read back is byte-reversed from the rate that was set. 44100 == 0x00ac44, 4500480 == 0x44ac00 48000 == 0x00bb80, 8436480 == 0x80bb00 96000 == 0x017700, 30465 == 0x007701 Rather than implementing a new quirk to reverse the order, just skip checking the rate to avoid spamming the log. Signed-off-by: Arvind Sankar Cc: Link: https://lore.kernel.org/r/20200211162235.1639889-1-nivedita@alum.mit.edu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 51ee7910e98c..4872c27f6054 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1151,6 +1151,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ return true; } return false; -- GitLab From 1f32a6a26818115a5d544e249b72c2471e575357 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Feb 2020 15:19:35 +0000 Subject: [PATCH 0065/1278] arm64: cpufeature: Set the FP/SIMD compat HWCAP bits properly commit 7559950aef1ab8792c50797c6c5c7c5150a02460 upstream We set the compat_elf_hwcap bits unconditionally on arm64 to include the VFP and NEON support. However, the FP/SIMD unit is optional on Arm v8 and thus could be missing. We already handle this properly in the kernel, but still advertise to the COMPAT applications that the VFP is available. Fix this to make sure we only advertise when we really have them. Cc: stable@vger.kernel.org # v4.14 Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- arch/arm64/kernel/cpufeature.c | 52 +++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c477fd34a912..6b3bb67596ae 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -41,9 +41,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap); #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -1134,17 +1132,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ - { \ - .desc = #cap, \ - .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ + +#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ + +#define __HWCAP_CAP(name, cap_type, cap) \ + .desc = name, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .hwcap_type = cap_type, \ .hwcap = cap, \ + +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + } + +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ } static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { @@ -1177,8 +1188,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), -- GitLab From 06fb1c6d23bba70a69cf62e3b972b3061a693401 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Feb 2020 15:19:36 +0000 Subject: [PATCH 0066/1278] arm64: ptrace: nofpsimd: Fail FP/SIMD regset operations commit c9d66999f064947e6b577ceacc1eb2fbca6a8d3c upstream When fp/simd is not supported on the system, fail the operations of FP/SIMD regsets. Cc: stable@vger.kernel.org # v4.14 Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- arch/arm64/kernel/ptrace.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 242527f29c41..e230b4dff960 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -624,6 +624,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -634,6 +641,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -648,6 +658,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state newstate = target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) return ret; @@ -740,6 +753,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -914,6 +928,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; if (target == current) @@ -947,6 +964,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1004,6 +1024,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, -- GitLab From afe95b7f56a8908c2c1cbf1e9e20058786f14b09 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Feb 2020 15:19:37 +0000 Subject: [PATCH 0067/1278] arm64: nofpsimd: Handle TIF_FOREIGN_FPSTATE flag cleanly commit 52f73c383b2418f2d31b798e765ae7d596c35021 upstream We detect the absence of FP/SIMD after an incapable CPU is brought up, and by then we have kernel threads running already with TIF_FOREIGN_FPSTATE set which could be set for early userspace applications (e.g, modprobe triggered from initramfs) and init. This could cause the applications to loop forever in do_nofity_resume() as we never clear the TIF flag, once we now know that we don't support FP. Fix this by making sure that we clear the TIF_FOREIGN_FPSTATE flag for tasks which may have them set, as we would have done in the normal case, but avoiding touching the hardware state (since we don't support any). Cc: stable@vger.kernel.org # v4.14 Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f4fdf6420ac5..4cd962f6c430 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -206,8 +206,19 @@ void fpsimd_preserve_current_state(void) */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } local_bh_disable(); @@ -229,7 +240,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct fpsimd_state *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; local_bh_disable(); -- GitLab From 93b79ac8be3f12b306b41bfae5977b9be6737fe9 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Nov 2017 07:52:52 +0100 Subject: [PATCH 0068/1278] ARM: 8723/2: always assume the "unified" syntax for assembly code [ Upstream commit 75fea300d73ae5b18957949a53ec770daaeb6fc2 ] The GNU assembler has implemented the "unified syntax" parsing since 2005. This "unified" syntax is required when the kernel is built in Thumb2 mode. However the "unified" syntax is a mixed bag of features, including not requiring a `#' prefix with immediate operands. This leads to situations where some code builds just fine in Thumb2 mode and fails to build in ARM mode if that prefix is missing. This behavior discrepancy makes build tests less valuable, forcing both ARM and Thumb2 builds for proper coverage. Let's "fix" this issue by always using the "unified" syntax for both ARM and Thumb2 mode. Given that the documented minimum binutils version that properly builds the kernel is version 2.20 released in 2010, we can assume that any toolchain capable of building the latest kernel is also "unified syntax" capable. Whith this, a bunch of macros used to mask some differences between both syntaxes can be removed, with the side effect of making LTO easier. Suggested-by: Robin Murphy Signed-off-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 7 +--- arch/arm/Makefile | 6 ++- arch/arm/include/asm/unified.h | 77 ++-------------------------------- 3 files changed, 8 insertions(+), 82 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf69aab648fb..ba9325fc75b8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1533,12 +1533,10 @@ config THUMB2_KERNEL bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K default y if CPU_THUMBONLY - select ARM_ASM_UNIFIED select ARM_UNWIND help By enabling this option, the kernel will be compiled in - Thumb-2 mode. A compiler/assembler that understand the unified - ARM-Thumb syntax is needed. + Thumb-2 mode. If unsure, say N. @@ -1573,9 +1571,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 Unless you are sure your tools don't have this problem, say Y. -config ARM_ASM_UNIFIED - bool - config ARM_PATCH_IDIV bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" depends on CPU_32v7 && !XIP_KERNEL diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 17e80f483281..234ee43b4438 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -115,9 +115,11 @@ ifeq ($(CONFIG_ARM_UNWIND),y) CFLAGS_ABI +=-funwind-tables endif +# Accept old syntax despite ".syntax unified" +AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) + ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) -AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: @@ -125,7 +127,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) CFLAGS_MODULE +=-fno-optimize-sibling-calls endif else -CFLAGS_ISA :=$(call cc-option,-marm,) +CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) endif diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index a91ae499614c..2c3b952be63e 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -20,8 +20,10 @@ #ifndef __ASM_UNIFIED_H #define __ASM_UNIFIED_H -#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) +#if defined(__ASSEMBLY__) .syntax unified +#else +__asm__(".syntax unified"); #endif #ifdef CONFIG_CPU_V7M @@ -64,77 +66,4 @@ #endif /* CONFIG_THUMB2_KERNEL */ -#ifndef CONFIG_ARM_ASM_UNIFIED - -/* - * If the unified assembly syntax isn't used (in ARM mode), these - * macros expand to an empty string - */ -#ifdef __ASSEMBLY__ - .macro it, cond - .endm - .macro itt, cond - .endm - .macro ite, cond - .endm - .macro ittt, cond - .endm - .macro itte, cond - .endm - .macro itet, cond - .endm - .macro itee, cond - .endm - .macro itttt, cond - .endm - .macro ittte, cond - .endm - .macro ittet, cond - .endm - .macro ittee, cond - .endm - .macro itett, cond - .endm - .macro itete, cond - .endm - .macro iteet, cond - .endm - .macro iteee, cond - .endm -#else /* !__ASSEMBLY__ */ -__asm__( -" .macro it, cond\n" -" .endm\n" -" .macro itt, cond\n" -" .endm\n" -" .macro ite, cond\n" -" .endm\n" -" .macro ittt, cond\n" -" .endm\n" -" .macro itte, cond\n" -" .endm\n" -" .macro itet, cond\n" -" .endm\n" -" .macro itee, cond\n" -" .endm\n" -" .macro itttt, cond\n" -" .endm\n" -" .macro ittte, cond\n" -" .endm\n" -" .macro ittet, cond\n" -" .endm\n" -" .macro ittee, cond\n" -" .endm\n" -" .macro itett, cond\n" -" .endm\n" -" .macro itete, cond\n" -" .endm\n" -" .macro iteet, cond\n" -" .endm\n" -" .macro iteee, cond\n" -" .endm\n"); -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_ARM_ASM_UNIFIED */ - #endif /* !__ASM_UNIFIED_H */ -- GitLab From 6f67ad9855dd7d3640e757d9d81f3bd88388d86e Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Sun, 26 Jan 2020 15:03:34 -0700 Subject: [PATCH 0069/1278] ext4: don't assume that mmp_nodename/bdevname have NUL commit 14c9ca0583eee8df285d68a0e6ec71053efd2228 upstream. Don't assume that the mmp_nodename and mmp_bdevname strings are NUL terminated, since they are filled in by snprintf(), which is not guaranteed to do so. Link: https://lore.kernel.org/r/1580076215-1048-1-git-send-email-adilger@dilger.ca Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mmp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 38e6a846aac1..0c042bd43246 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, - "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s", - (long long unsigned int) le64_to_cpu(mmp->mmp_time), - mmp->mmp_nodename, mmp->mmp_bdevname); + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", + (unsigned long long)le64_to_cpu(mmp->mmp_time), + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* @@ -154,6 +154,7 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, @@ -375,7 +376,8 @@ int ext4_multi_mount_protect(struct super_block *sb, /* * Start a kernel thread to update the MMP block periodically. */ - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s", + (int)sizeof(mmp->mmp_bdevname), bdevname(bh->b_bdev, mmp->mmp_bdevname)); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { -- GitLab From e073def68e1c683b009809a045f76facec1eb600 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 6 Feb 2020 17:35:01 -0500 Subject: [PATCH 0070/1278] ext4: fix support for inode sizes > 1024 bytes commit 4f97a68192bd33b9963b400759cef0ca5963af00 upstream. A recent commit, 9803387c55f7 ("ext4: validate the debug_want_extra_isize mount option at parse time"), moved mount-time checks around. One of those changes moved the inode size check before the blocksize variable was set to the blocksize of the file system. After 9803387c55f7 was set to the minimum allowable blocksize, which in practice on most systems would be 1024 bytes. This cuased file systems with inode sizes larger than 1024 bytes to be rejected with a message: EXT4-fs (sdXX): unsupported inode size: 4096 Fixes: 9803387c55f7 ("ext4: validate the debug_want_extra_isize mount option at parse time") Link: https://lore.kernel.org/r/20200206225252.GA3673@mit.edu Reported-by: Herbert Poetzl Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 93d8aa6ef661..e142e1f51676 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3668,6 +3668,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + if (blocksize < EXT4_MIN_BLOCK_SIZE || + blocksize > EXT4_MAX_BLOCK_SIZE) { + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; @@ -3685,6 +3694,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "unsupported inode size: %d", sbi->s_inode_size); + ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); goto failed_mount; } /* @@ -3848,14 +3858,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } if (le32_to_cpu(es->s_log_block_size) > (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, -- GitLab From 418899d96606265650c80fbb3e7bc3afd95589b7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 10 Feb 2020 15:43:16 +0100 Subject: [PATCH 0071/1278] ext4: fix checksum errors with indexed dirs commit 48a34311953d921235f4d7bbd2111690d2e469cf upstream. DIR_INDEX has been introduced as a compat ext4 feature. That means that even kernels / tools that don't understand the feature may modify the filesystem. This works because for kernels not understanding indexed dir format, internal htree nodes appear just as empty directory entries. Index dir aware kernels then check the htree structure is still consistent before using the data. This all worked reasonably well until metadata checksums were introduced. The problem is that these effectively made DIR_INDEX only ro-compatible because internal htree nodes store checksums in a different place than normal directory blocks. Thus any modification ignorant to DIR_INDEX (or just clearing EXT4_INDEX_FL from the inode) will effectively cause checksum mismatch and trigger kernel errors. So we have to be more careful when dealing with indexed directories on filesystems with checksumming enabled. 1) We just disallow loading any directory inodes with EXT4_INDEX_FL when DIR_INDEX is not enabled. This is harsh but it should be very rare (it means someone disabled DIR_INDEX on existing filesystem and didn't run e2fsck), e2fsck can fix the problem, and we don't want to answer the difficult question: "Should we rather corrupt the directory more or should we ignore that DIR_INDEX feature is not set?" 2) When we find out htree structure is corrupted (but the filesystem and the directory should in support htrees), we continue just ignoring htree information for reading but we refuse to add new entries to the directory to avoid corrupting it more. Link: https://lore.kernel.org/r/20200210144316.22081-1-jack@suse.cz Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes") Reviewed-by: Andreas Dilger Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/dir.c | 14 ++++++++------ fs/ext4/ext4.h | 5 ++++- fs/ext4/inode.c | 12 ++++++++++++ fs/ext4/namei.c | 7 +++++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index c17855fead7b..90beca85c416 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -125,12 +125,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 95ef26b39e69..fcee1f9c7fe3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2386,8 +2386,11 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a91b8404d3dc..57118ba82929 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4817,6 +4817,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EFSCORRUPTED; goto bad_inode; } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + EXT4_ERROR_INODE(inode, + "iget: Dir with htree data on filesystem without dir_index feature."); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0b5c36bd5418..a7b7e0783eed 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2094,6 +2094,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); -- GitLab From ddf391e8ae8ff0ed1cc80fcb6f2221636d40992f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 14 Feb 2020 18:11:19 -0500 Subject: [PATCH 0072/1278] ext4: improve explanation of a mount failure caused by a misconfigured kernel commit d65d87a07476aa17df2dcb3ad18c22c154315bec upstream. If CONFIG_QFMT_V2 is not enabled, but CONFIG_QUOTA is enabled, when a user tries to mount a file system with the quota or project quota enabled, the kernel will emit a very confusing messsage: EXT4-fs warning (device vdc): ext4_enable_quotas:5914: Failed to enable quota tracking (type=0, err=-3). Please run e2fsck to fix. EXT4-fs (vdc): mount failed We will now report an explanatory message indicating which kernel configuration options have to be enabled, to avoid customer/sysadmin confusion. Link: https://lore.kernel.org/r/20200215012738.565735-1-tytso@mit.edu Google-Bug-Id: 149093531 Fixes: 7c319d328505b778 ("ext4: make quota as first class supported feature") Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e142e1f51676..5b9e7377f26e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2863,17 +2863,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && !readonly) { +#if !defined(CONFIG_QUOTA) || !defined(CONFIG_QFMT_V2) + if (!readonly && (ext4_has_feature_quota(sb) || + ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return 0; - } - if (ext4_has_feature_project(sb) && !readonly) { - ext4_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0; } #endif /* CONFIG_QUOTA */ -- GitLab From 841793cd072c701c373f1f1f27c91e435b8c864d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 31 Jan 2020 14:06:07 +0000 Subject: [PATCH 0073/1278] Btrfs: fix race between using extent maps and merging them commit ac05ca913e9f3871126d61da275bfe8516ff01ca upstream. We have a few cases where we allow an extent map that is in an extent map tree to be merged with other extents in the tree. Such cases include the unpinning of an extent after the respective ordered extent completed or after logging an extent during a fast fsync. This can lead to subtle and dangerous problems because when doing the merge some other task might be using the same extent map and as consequence see an inconsistent state of the extent map - for example sees the new length but has seen the old start offset. With luck this triggers a BUG_ON(), and not some silent bug, such as the following one in __do_readpage(): $ cat -n fs/btrfs/extent_io.c 3061 static int __do_readpage(struct extent_io_tree *tree, 3062 struct page *page, (...) 3127 em = __get_extent_map(inode, page, pg_offset, cur, 3128 end - cur + 1, get_extent, em_cached); 3129 if (IS_ERR_OR_NULL(em)) { 3130 SetPageError(page); 3131 unlock_extent(tree, cur, end); 3132 break; 3133 } 3134 extent_offset = cur - em->start; 3135 BUG_ON(extent_map_end(em) <= cur); (...) Consider the following example scenario, where we end up hitting the BUG_ON() in __do_readpage(). We have an inode with a size of 8KiB and 2 extent maps: extent A: file offset 0, length 4KiB, disk_bytenr = X, persisted on disk by a previous transaction extent B: file offset 4KiB, length 4KiB, disk_bytenr = X + 4KiB, not yet persisted but writeback started for it already. The extent map is pinned since there's writeback and an ordered extent in progress, so it can not be merged with extent map A yet The following sequence of steps leads to the BUG_ON(): 1) The ordered extent for extent B completes, the respective page gets its writeback bit cleared and the extent map is unpinned, at that point it is not yet merged with extent map A because it's in the list of modified extents; 2) Due to memory pressure, or some other reason, the MM subsystem releases the page corresponding to extent B - btrfs_releasepage() is called and returns 1, meaning the page can be released as it's not dirty, not under writeback anymore and the extent range is not locked in the inode's iotree. However the extent map is not released, either because we are not in a context that allows memory allocations to block or because the inode's size is smaller than 16MiB - in this case our inode has a size of 8KiB; 3) Task B needs to read extent B and ends up __do_readpage() through the btrfs_readpage() callback. At __do_readpage() it gets a reference to extent map B; 4) Task A, doing a fast fsync, calls clear_em_loggin() against extent map B while holding the write lock on the inode's extent map tree - this results in try_merge_map() being called and since it's possible to merge extent map B with extent map A now (the extent map B was removed from the list of modified extents), the merging begins - it sets extent map B's start offset to 0 (was 4KiB), but before it increments the map's length to 8KiB (4kb + 4KiB), task A is at: BUG_ON(extent_map_end(em) <= cur); The call to extent_map_end() sees the extent map has a start of 0 and a length still at 4KiB, so it returns 4KiB and 'cur' is 4KiB, so the BUG_ON() is triggered. So it's dangerous to modify an extent map that is in the tree, because some other task might have got a reference to it before and still using it, and needs to see a consistent map while using it. Generally this is very rare since most paths that lookup and use extent maps also have the file range locked in the inode's iotree. The fsync path is pretty much the only exception where we don't do it to avoid serialization with concurrent reads. Fix this by not allowing an extent map do be merged if if it's being used by tasks other then the one attempting to merge the extent map (when the reference count of the extent map is greater than 2). Reported-by: ryusuke1925 Reported-by: Koki Mitani Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206211 CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_map.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2e348fb0b280..c87d673ce334 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -228,6 +228,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) struct extent_map *merge = NULL; struct rb_node *rb; + /* + * We can't modify an extent map that is in the tree and that is being + * used by another task, as it can cause that other task to see it in + * inconsistent state during the merging. We always have 1 reference for + * the tree and 1 for this task (which is unpinning the extent map or + * clearing the logging flag), so anything > 2 means it's being used by + * other tasks too. + */ + if (refcount_read(&em->refs) > 2) + return; + if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) -- GitLab From 893bb1890fb956ab960aec89ef51f1422b0a4405 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2020 17:12:16 +0100 Subject: [PATCH 0074/1278] btrfs: print message when tree-log replay starts commit e8294f2f6aa6208ed0923aa6d70cea3be178309a upstream. There's no logged information about tree-log replay although this is something that points to previous unclean unmount. Other filesystems report that as well. Suggested-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44b15617c7b9..d8ab9c5a8b7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2913,6 +2913,7 @@ int open_ctree(struct super_block *sb, /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { + btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; -- GitLab From 13b91b8b704340c2ffd1a926a16c63091660ed3d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2020 17:12:28 +0100 Subject: [PATCH 0075/1278] btrfs: log message when rw remount is attempted with unclean tree-log commit 10a3a3edc5b89a8cd095bc63495fb1e0f42047d9 upstream. A remount to a read-write filesystem is not safe when there's tree-log to be replayed. Files that could be opened until now might be affected by the changes in the tree-log. A regular mount is needed to replay the log so the filesystem presents the consistent view with the pending changes included. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3ab79fa00dc7..17a8463ef35c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1801,6 +1801,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } if (btrfs_super_log_root(fs_info->super_copy) != 0) { + btrfs_warn(fs_info, + "mount required to replay tree-log, cannot remount read-write"); ret = -EINVAL; goto restore; } -- GitLab From cd24510b31c1fb04afcd84847664a76b9033d3c3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Feb 2020 10:42:58 +0000 Subject: [PATCH 0076/1278] arm64: ssbs: Fix context-switch when SSBS is present on all CPUs commit fca3d33d8ad61eb53eca3ee4cac476d1e31b9008 upstream. When all CPUs in the system implement the SSBS extension, the SSBS field in PSTATE is the definitive indication of the mitigation state. Further, when the CPUs implement the SSBS manipulation instructions (advertised to userspace via an HWCAP), EL0 can toggle the SSBS field directly and so we cannot rely on any shadow state such as TIF_SSBD at all. Avoid forcing the SSBS field in context-switch on such a system, and simply rely on the PSTATE register instead. Cc: Cc: Catalin Marinas Cc: Srinivas Ramana Fixes: cbdf8a189a66 ("arm64: Force SSBS on context switch") Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/process.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 243fd247d04e..ee5ce03c9315 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -354,6 +354,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) -- GitLab From 8d8d60598a831f5f500fbced9b89475d39202666 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 Feb 2020 09:37:41 -0800 Subject: [PATCH 0077/1278] KVM: nVMX: Use correct root level for nested EPT shadow page tables commit 148d735eb55d32848c3379e460ce365f2c1cbe4b upstream. Hardcode the EPT page-walk level for L2 to be 4 levels, as KVM's MMU currently also hardcodes the page walk level for nested EPT to be 4 levels. The L2 guest is all but guaranteed to soft hang on its first instruction when L1 is using EPT, as KVM will construct 4-level page tables and then tell hardware to use 5-level page tables. Fixes: 855feb673640 ("KVM: MMU: Add 5 level EPT & Shadow page table support.") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3791ce8d269e..997926a9121c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2968,6 +2968,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; -- GitLab From 7680efd75c6d8b5c67d68a2aafd5218efe72307e Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 21 Jan 2020 11:12:31 -0600 Subject: [PATCH 0078/1278] perf/x86/amd: Add missing L2 misses event spec to AMD Family 17h's event map commit 25d387287cf0330abf2aad761ce6eee67326a355 upstream. Commit 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h"), claimed L2 misses were unsupported, due to them not being found in its referenced documentation, whose link has now moved [1]. That old documentation listed PMCx064 unit mask bit 3 as: "LsRdBlkC: LS Read Block C S L X Change to X Miss." and bit 0 as: "IcFillMiss: IC Fill Miss" We now have new public documentation [2] with improved descriptions, that clearly indicate what events those unit mask bits represent: Bit 3 now clearly states: "LsRdBlkC: Data Cache Req Miss in L2 (all types)" and bit 0 is: "IcFillMiss: Instruction Cache Req Miss in L2." So we can now add support for L2 misses in perf's genericised events as PMCx064 with both the above unit masks. [1] The commit's original documentation reference, "Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors", originally available here: https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf is now available here: https://developer.amd.com/wordpress/media/2017/11/54945_PPR_Family_17h_Models_00h-0Fh.pdf [2] "Processor Programming Reference (PPR) for Family 17h Model 31h, Revision B0 Processors", available here: https://developer.amd.com/wp-content/resources/55803_0.54-PUB.pdf Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h") Reported-by: Babu Moger Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Babu Moger Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200121171232.28839-1-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index defb536aebce..c3ec535fd36b 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -245,6 +245,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, -- GitLab From c9da8ee1491719001a444f4af688b75e72b58418 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Nov 2019 13:17:31 +0800 Subject: [PATCH 0079/1278] padata: Remove broken queue flushing commit 07928d9bfc81640bab36f5190e8725894d93b659 upstream. The function padata_flush_queues is fundamentally broken because it cannot force padata users to complete the request that is underway. IOW padata has to passively wait for the completion of any outstanding work. As it stands flushing is used in two places. Its use in padata_stop is simply unnecessary because nothing depends on the queues to be flushed afterwards. The other use in padata_replace is more substantial as we depend on it to free the old pd structure. This patch instead uses the pd->refcnt to dynamically free the pd structure once all requests are complete. Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively") Cc: Signed-off-by: Herbert Xu Reviewed-by: Daniel Jordan Signed-off-by: Herbert Xu [dj: leave "pd->pinst = pinst" assignment in padata_alloc_pd()] Signed-off-by: Daniel Jordan Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 87540ce72aea..528a251217df 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -34,6 +34,8 @@ #define MAX_OBJ_NUM 1000 +static void padata_free_pd(struct parallel_data *pd); + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -292,6 +294,7 @@ static void padata_serial_worker(struct work_struct *serial_work) struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); + int cnt; local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); @@ -301,6 +304,8 @@ static void padata_serial_worker(struct work_struct *serial_work) list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); + cnt = 0; + while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -310,9 +315,12 @@ static void padata_serial_worker(struct work_struct *serial_work) list_del_init(&padata->list); padata->serial(padata); - atomic_dec(&pd->refcnt); + cnt++; } local_bh_enable(); + + if (atomic_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); } /** @@ -435,7 +443,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); pd->pinst = pinst; spin_lock_init(&pd->lock); @@ -460,31 +468,6 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - del_timer_sync(&pd->timer); - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; @@ -498,10 +481,6 @@ static void __padata_stop(struct padata_instance *pinst) pinst->flags &= ~PADATA_INIT; synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); } /* Replace the internal control structure with a new one. */ @@ -522,8 +501,8 @@ static void padata_replace(struct padata_instance *pinst, if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + if (atomic_dec_and_test(&pd_old->refcnt)) + padata_free_pd(pd_old); if (notification_mask) blocking_notifier_call_chain(&pinst->cpumask_change_notifier, -- GitLab From d0a06285fee84381d2c054549f74ff840c957070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 2 Mar 2018 11:07:26 +0100 Subject: [PATCH 0080/1278] serial: imx: ensure that RX irqs are off if RX is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76821e222c189b81d553b855ee7054340607eb46 upstream. Make sure that UCR1.RXDMAEN and UCR1.ATDMAEN (for the DMA case) and UCR1.RRDYEN (for the PIO case) are off iff UCR1.RXEN is disabled. This ensures that the fifo isn't read with RX disabled which results in an exception. Signed-off-by: Uwe Kleine-König [Backport to v4.14] Signed-off-by: Frieder Schrempf Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 116 ++++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 38 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a81a5be0cf7a..31e1e32c62c9 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -80,7 +80,7 @@ #define UCR1_IDEN (1<<12) /* Idle condition interrupt */ #define UCR1_ICD_REG(x) (((x) & 3) << 10) /* idle condition detect */ #define UCR1_RRDYEN (1<<9) /* Recv ready interrupt enable */ -#define UCR1_RDMAEN (1<<8) /* Recv ready DMA enable */ +#define UCR1_RXDMAEN (1<<8) /* Recv ready DMA enable */ #define UCR1_IREN (1<<7) /* Infrared interface enable */ #define UCR1_TXMPTYEN (1<<6) /* Transimitter empty interrupt enable */ #define UCR1_RTSDEN (1<<5) /* RTS delta interrupt enable */ @@ -352,6 +352,30 @@ static void imx_port_rts_auto(struct imx_port *sport, unsigned long *ucr2) *ucr2 |= UCR2_CTSC; } +/* + * interrupts disabled on entry + */ +static void imx_start_rx(struct uart_port *port) +{ + struct imx_port *sport = (struct imx_port *)port; + unsigned int ucr1, ucr2; + + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); + + ucr2 |= UCR2_RXEN; + + if (sport->dma_is_enabled) { + ucr1 |= UCR1_RXDMAEN | UCR1_ATDMAEN; + } else { + ucr1 |= UCR1_RRDYEN; + } + + /* Write UCR2 first as it includes RXEN */ + writel(ucr2, port->membase + UCR2); + writel(ucr1, port->membase + UCR1); +} + /* * interrupts disabled on entry */ @@ -378,9 +402,10 @@ static void imx_stop_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - temp |= UCR2_RXEN; writel(temp, port->membase + UCR2); + imx_start_rx(port); + temp = readl(port->membase + UCR4); temp &= ~UCR4_TCEN; writel(temp, port->membase + UCR4); @@ -393,7 +418,7 @@ static void imx_stop_tx(struct uart_port *port) static void imx_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; - unsigned long temp; + unsigned long ucr1, ucr2; if (sport->dma_is_enabled && sport->dma_is_rxing) { if (sport->port.suspended) { @@ -404,12 +429,18 @@ static void imx_stop_rx(struct uart_port *port) } } - temp = readl(sport->port.membase + UCR2); - writel(temp & ~UCR2_RXEN, sport->port.membase + UCR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); - /* disable the `Receiver Ready Interrrupt` */ - temp = readl(sport->port.membase + UCR1); - writel(temp & ~UCR1_RRDYEN, sport->port.membase + UCR1); + if (sport->dma_is_enabled) { + ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN); + } else { + ucr1 &= ~UCR1_RRDYEN; + } + writel(ucr1, port->membase + UCR1); + + ucr2 &= ~UCR2_RXEN; + writel(ucr2, port->membase + UCR2); } /* @@ -581,10 +612,11 @@ static void imx_start_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - temp &= ~UCR2_RXEN; writel(temp, port->membase + UCR2); + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + imx_stop_rx(port); + /* enable transmitter and shifter empty irq */ temp = readl(port->membase + UCR4); temp |= UCR4_TCEN; @@ -1206,7 +1238,7 @@ static void imx_enable_dma(struct imx_port *sport) /* set UCR1 */ temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; + temp |= UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; writel(temp, sport->port.membase + UCR1); temp = readl(sport->port.membase + UCR2); @@ -1224,7 +1256,7 @@ static void imx_disable_dma(struct imx_port *sport) /* clear UCR1 */ temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); + temp &= ~(UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); /* clear UCR2 */ @@ -1289,11 +1321,9 @@ static int imx_startup(struct uart_port *port) writel(USR1_RTSD | USR1_DTRD, sport->port.membase + USR1); writel(USR2_ORE, sport->port.membase + USR2); - if (sport->dma_is_inited && !sport->dma_is_enabled) - imx_enable_dma(sport); - temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RRDYEN | UCR1_UARTEN; + temp &= ~UCR1_RRDYEN; + temp |= UCR1_UARTEN; if (sport->have_rtscts) temp |= UCR1_RTSDEN; @@ -1332,14 +1362,13 @@ static int imx_startup(struct uart_port *port) */ imx_enable_ms(&sport->port); - /* - * Start RX DMA immediately instead of waiting for RX FIFO interrupts. - * In our iMX53 the average delay for the first reception dropped from - * approximately 35000 microseconds to 1000 microseconds. - */ - if (sport->dma_is_enabled) { - imx_disable_rx_int(sport); + if (sport->dma_is_inited) { + imx_enable_dma(sport); start_rx_dma(sport); + } else { + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_RRDYEN; + writel(temp, sport->port.membase + UCR1); } spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1386,7 +1415,8 @@ static void imx_shutdown(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); + temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | + UCR1_RXDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1659,7 +1689,7 @@ static int imx_poll_init(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; unsigned long flags; - unsigned long temp; + unsigned long ucr1, ucr2; int retval; retval = clk_prepare_enable(sport->clk_ipg); @@ -1673,16 +1703,29 @@ static int imx_poll_init(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); - temp = readl(sport->port.membase + UCR1); + /* + * Be careful about the order of enabling bits here. First enable the + * receiver (UARTEN + RXEN) and only then the corresponding irqs. + * This prevents that a character that already sits in the RX fifo is + * triggering an irq but the try to fetch it from there results in an + * exception because UARTEN or RXEN is still off. + */ + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); + if (is_imx1_uart(sport)) - temp |= IMX1_UCR1_UARTCLKEN; - temp |= UCR1_UARTEN | UCR1_RRDYEN; - temp &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN); - writel(temp, sport->port.membase + UCR1); + ucr1 |= IMX1_UCR1_UARTCLKEN; - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); + ucr1 |= UCR1_UARTEN; + ucr1 &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN | UCR1_RRDYEN); + + ucr2 |= UCR2_RXEN; + + writel(ucr1, sport->port.membase + UCR1); + writel(ucr2, sport->port.membase + UCR2); + + /* now enable irqs */ + writel(ucr1 | UCR1_RRDYEN, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1742,11 +1785,8 @@ static int imx_rs485_config(struct uart_port *port, /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || - rs485conf->flags & SER_RS485_RX_DURING_TX) { - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); - } + rs485conf->flags & SER_RS485_RX_DURING_TX) + imx_start_rx(port); port->rs485 = *rs485conf; -- GitLab From 55f5f2c1f39320358df3dcf026480a29a44e98be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 18 Feb 2018 22:02:44 +0100 Subject: [PATCH 0081/1278] serial: imx: Only handle irqs that are actually enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 437768962f754d9501e5ba4d98b1f2a89dc62028 upstream. Handling an irq that isn't enabled can have some undesired side effects. Some of these are mentioned in the newly introduced code comment. Some of the irq sources already had their handling right, some don't. Handle them all in the same consistent way. The change for USR1_RRDY and USR1_AGTIM drops the check for dma_is_enabled. This is correct as UCR1_RRDYEN and UCR2_ATEN are always off if dma is enabled. Signed-off-by: Uwe Kleine-König Reviewed-by: Shawn Guo [Backport to v4.14] Signed-off-by: Frieder Schrempf Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 53 +++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 31e1e32c62c9..969497599e88 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -843,14 +843,42 @@ static void imx_mctrl_check(struct imx_port *sport) static irqreturn_t imx_int(int irq, void *dev_id) { struct imx_port *sport = dev_id; - unsigned int sts; - unsigned int sts2; + unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; - sts = readl(sport->port.membase + USR1); - sts2 = readl(sport->port.membase + USR2); + usr1 = readl(sport->port.membase + USR1); + usr2 = readl(sport->port.membase + USR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); + ucr3 = readl(sport->port.membase + UCR3); + ucr4 = readl(sport->port.membase + UCR4); - if (sts & (USR1_RRDY | USR1_AGTIM)) { + /* + * Even if a condition is true that can trigger an irq only handle it if + * the respective irq source is enabled. This prevents some undesired + * actions, for example if a character that sits in the RX FIFO and that + * should be fetched via DMA is tried to be fetched using PIO. Or the + * receiver is currently off and so reading from URXD0 results in an + * exception. So just mask the (raw) status bits for disabled irqs. + */ + if ((ucr1 & UCR1_RRDYEN) == 0) + usr1 &= ~USR1_RRDY; + if ((ucr2 & UCR2_ATEN) == 0) + usr1 &= ~USR1_AGTIM; + if ((ucr1 & UCR1_TXMPTYEN) == 0) + usr1 &= ~USR1_TRDY; + if ((ucr4 & UCR4_TCEN) == 0) + usr2 &= ~USR2_TXDC; + if ((ucr3 & UCR3_DTRDEN) == 0) + usr1 &= ~USR1_DTRD; + if ((ucr1 & UCR1_RTSDEN) == 0) + usr1 &= ~USR1_RTSD; + if ((ucr3 & UCR3_AWAKEN) == 0) + usr1 &= ~USR1_AWAKE; + if ((ucr4 & UCR4_OREN) == 0) + usr2 &= ~USR2_ORE; + + if (usr1 & (USR1_RRDY | USR1_AGTIM)) { if (sport->dma_is_enabled) imx_dma_rxint(sport); else @@ -858,18 +886,15 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if ((sts & USR1_TRDY && - readl(sport->port.membase + UCR1) & UCR1_TXMPTYEN) || - (sts2 & USR2_TXDC && - readl(sport->port.membase + UCR4) & UCR4_TCEN)) { + if ((usr1 & USR1_TRDY) || (usr2 & USR2_TXDC)) { imx_txint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_DTRD) { + if (usr1 & USR1_DTRD) { unsigned long flags; - if (sts & USR1_DTRD) + if (usr1 & USR1_DTRD) writel(USR1_DTRD, sport->port.membase + USR1); spin_lock_irqsave(&sport->port.lock, flags); @@ -879,17 +904,17 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if (sts & USR1_RTSD) { + if (usr1 & USR1_RTSD) { imx_rtsint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_AWAKE) { + if (usr1 & USR1_AWAKE) { writel(USR1_AWAKE, sport->port.membase + USR1); ret = IRQ_HANDLED; } - if (sts2 & USR2_ORE) { + if (usr2 & USR2_ORE) { sport->port.icount.overrun++; writel(USR2_ORE, sport->port.membase + USR2); ret = IRQ_HANDLED; -- GitLab From dda7557605b53d36a59cac5680f14e83bc06c3fa Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 10 Feb 2020 08:10:33 -0500 Subject: [PATCH 0082/1278] IB/hfi1: Close window for pq and request coliding commit be8638344c70bf492963ace206a9896606b6922d upstream. Cleaning up a pq can result in the following warning and panic: WARNING: CPU: 52 PID: 77418 at lib/list_debug.c:53 __list_del_entry+0x63/0xd0 list_del corruption, ffff88cb2c6ac068->next is LIST_POISON1 (dead000000000100) Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel ast aesni_intel ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev lpc_ich mei_me drm_panel_orientation_quirks i2c_i801 mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci i2c_algo_bit libahci dca ptp libata pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 52 PID: 77418 Comm: pvbatch Kdump: loaded Tainted: G OE ------------ 3.10.0-957.38.3.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 Call Trace: [] dump_stack+0x19/0x1b [] __warn+0xd8/0x100 [] warn_slowpath_fmt+0x5f/0x80 [] __list_del_entry+0x63/0xd0 [] list_del+0xd/0x30 [] kmem_cache_destroy+0x50/0x110 [] hfi1_user_sdma_free_queues+0xf0/0x200 [hfi1] [] hfi1_file_close+0x70/0x1e0 [hfi1] [] __fput+0xec/0x260 [] ____fput+0xe/0x10 [] task_work_run+0xbb/0xe0 [] do_notify_resume+0xa5/0xc0 [] int_signal+0x12/0x17 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] kmem_cache_close+0x7e/0x300 PGD 2cdab19067 PUD 2f7bfdb067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel ast aesni_intel ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev lpc_ich mei_me drm_panel_orientation_quirks i2c_i801 mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci i2c_algo_bit libahci dca ptp libata pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 52 PID: 77418 Comm: pvbatch Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.38.3.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 task: ffff88cc26db9040 ti: ffff88b5393a8000 task.ti: ffff88b5393a8000 RIP: 0010:[] [] kmem_cache_close+0x7e/0x300 RSP: 0018:ffff88b5393abd60 EFLAGS: 00010287 RAX: 0000000000000000 RBX: ffff88cb2c6ac000 RCX: 0000000000000003 RDX: 0000000000000400 RSI: 0000000000000400 RDI: ffffffff9095b800 RBP: ffff88b5393abdb0 R08: ffffffff9095b808 R09: ffffffff8ff77c19 R10: ffff88b73ce1f160 R11: ffffddecddde9800 R12: ffff88cb2c6ac000 R13: 000000000000000c R14: ffff88cf3fdca780 R15: 0000000000000000 FS: 00002aaaaab52500(0000) GS:ffff88b73ce00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000002d27664000 CR4: 00000000007607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: [] __kmem_cache_shutdown+0x14/0x80 [] kmem_cache_destroy+0x58/0x110 [] hfi1_user_sdma_free_queues+0xf0/0x200 [hfi1] [] hfi1_file_close+0x70/0x1e0 [hfi1] [] __fput+0xec/0x260 [] ____fput+0xe/0x10 [] task_work_run+0xbb/0xe0 [] do_notify_resume+0xa5/0xc0 [] int_signal+0x12/0x17 Code: 00 00 ba 00 04 00 00 0f 4f c2 3d 00 04 00 00 89 45 bc 0f 84 e7 01 00 00 48 63 45 bc 49 8d 04 c4 48 89 45 b0 48 8b 80 c8 00 00 00 <48> 8b 78 10 48 89 45 c0 48 83 c0 10 48 89 45 d0 48 8b 17 48 39 RIP [] kmem_cache_close+0x7e/0x300 RSP CR2: 0000000000000010 The panic is the result of slab entries being freed during the destruction of the pq slab. The code attempts to quiesce the pq, but looking for n_req == 0 doesn't account for new requests. Fix the issue by using SRCU to get a pq pointer and adjust the pq free logic to NULL the fd pq pointer prior to the quiesce. Fixes: e87473bc1b6c ("IB/hfi1: Only set fd pointer when base context is completely initialized") Link: https://lore.kernel.org/r/20200210131033.87408.81174.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/file_ops.c | 52 ++++++++++++++--------- drivers/infiniband/hw/hfi1/hfi.h | 5 ++- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 -- drivers/infiniband/hw/hfi1/user_sdma.c | 17 +++++--- 4 files changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 76861a8b5c1e..b3ab803bf8b1 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -195,23 +195,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -417,21 +418,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -449,6 +459,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -824,6 +835,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index af550c1767e3..810ef5114772 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1353,10 +1353,13 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b38e3808836c..c6d085e1c10d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 4854a4a453b5..f23d47194c12 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; -- GitLab From 57456970b8c3d0f3591e92ce197be4593f5c493b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 12 Feb 2020 10:06:51 +0200 Subject: [PATCH 0083/1278] RDMA/core: Fix protection fault in get_pkey_idx_qp_list commit 1dd017882e01d2fcd9c5dbbf1eb376211111c393 upstream. We don't need to set pkey as valid in case that user set only one of pkey index or port number, otherwise it will be resulted in NULL pointer dereference while accessing to uninitialized pkey list. The following crash from Syzkaller revealed it. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 14753 Comm: syz-executor.2 Not tainted 5.5.0-rc5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 RIP: 0010:get_pkey_idx_qp_list+0x161/0x2d0 Code: 01 00 00 49 8b 5e 20 4c 39 e3 0f 84 b9 00 00 00 e8 e4 42 6e fe 48 8d 7b 10 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 01 0f 8e d0 00 00 00 48 8d 7d 04 48 b8 RSP: 0018:ffffc9000bc6f950 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff82c8bdec RDX: 0000000000000002 RSI: ffffc900030a8000 RDI: 0000000000000010 RBP: ffff888112c8ce80 R08: 0000000000000004 R09: fffff5200178df1f R10: 0000000000000001 R11: fffff5200178df1f R12: ffff888115dc4430 R13: ffff888115da8498 R14: ffff888115dc4410 R15: ffff888115da8000 FS: 00007f20777de700(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2f721000 CR3: 00000001173ca002 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: port_pkey_list_insert+0xd7/0x7c0 ib_security_modify_qp+0x6fa/0xfc0 _ib_modify_qp+0x8c4/0xbf0 modify_qp+0x10da/0x16d0 ib_uverbs_modify_qp+0x9a/0x100 ib_uverbs_write+0xaa5/0xdf0 __vfs_write+0x7c/0x100 vfs_write+0x168/0x4a0 ksys_write+0xc8/0x200 do_syscall_64+0x9c/0x390 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Link: https://lore.kernel.org/r/20200212080651.GB679970@unreal Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Message-Id: <20200212080651.GB679970@unreal> Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/security.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index a3dd88c57be7..9b8276691329 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -338,22 +338,16 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = + (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : + qp_attr->pkey_index; + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) -- GitLab From 793a70864ba18df05c4e3be52b0d542e79ea562e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 8 Feb 2020 07:08:59 -0700 Subject: [PATCH 0084/1278] s390/time: Fix clk type in get_tod_clock commit 0f8a206df7c920150d2aa45574fba0ab7ff6be4f upstream. Clang warns: In file included from ../arch/s390/boot/startup.c:3: In file included from ../include/linux/elf.h:5: In file included from ../arch/s390/include/asm/elf.h:132: In file included from ../include/linux/compat.h:10: In file included from ../include/linux/time.h:74: In file included from ../include/linux/time32.h:13: In file included from ../include/linux/timex.h:65: ../arch/s390/include/asm/timex.h:160:20: warning: passing 'unsigned char [16]' to parameter of type 'char *' converts between pointers to integer types with different sign [-Wpointer-sign] get_tod_clock_ext(clk); ^~~ ../arch/s390/include/asm/timex.h:149:44: note: passing argument to parameter 'clk' here static inline void get_tod_clock_ext(char *clk) ^ Change clk's type to just be char so that it matches what happens in get_tod_clock_ext. Fixes: 57b28f66316d ("[S390] s390_hypfs: Add new attributes") Link: https://github.com/ClangBuiltLinux/linux/issues/861 Link: http://lkml.kernel.org/r/20200208140858.47970-1-natechancellor@gmail.com Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 2dc9eb4e1acc..b6a4ce9dafaf 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); -- GitLab From ab9444f69c53374b04906f9a49976048531456a5 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 21 Jan 2020 11:01:25 -0800 Subject: [PATCH 0085/1278] perf/x86/intel: Fix inaccurate period in context switch for auto-reload commit f861854e1b435b27197417f6f90d87188003cb24 upstream. Perf doesn't take the left period into account when auto-reload is enabled with fixed period sampling mode in context switch. Here is the MSR trace of the perf command as below. (The MSR trace is simplified from a ftrace log.) #perf record -e cycles:p -c 2000000 -- ./triad_loop //The MSR trace of task schedule out //perf disable all counters, disable PEBS, disable GP counter 0, //read GP counter 0, and re-enable all counters. //The counter 0 stops at 0xfffffff82840 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 0 write_msr: MSR_P6_EVNTSEL0(186), value 40003003c rdpmc: 0, value fffffff82840 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff //The MSR trace of the same task schedule in again //perf disable all counters, enable and set GP counter 0, //enable PEBS, and re-enable all counters. //0xffffffe17b80 (-2000000) is written to GP counter 0. write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PMC0(4c1), value ffffffe17b80 write_msr: MSR_P6_EVNTSEL0(186), value 40043003c write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 1 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff When the same task schedule in again, the counter should starts from previous left. However, it starts from the fixed period -2000000 again. A special variant of intel_pmu_save_and_restart() is used for auto-reload, which doesn't update the hwc->period_left. When the monitored task schedules in again, perf doesn't know the left period. The fixed period is used, which is inaccurate. With auto-reload, the counter always has a negative counter value. So the left period is -value. Update the period_left in intel_pmu_save_and_restart_reload(). With the patch: //The MSR trace of task schedule out write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 0 write_msr: MSR_P6_EVNTSEL0(186), value 40003003c rdpmc: 0, value ffffffe25cbc write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff //The MSR trace of the same task schedule in again write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PMC0(4c1), value ffffffe25cbc write_msr: MSR_P6_EVNTSEL0(186), value 40043003c write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 1 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff Fixes: d31fc13fdcb2 ("perf/x86/intel: Fix event update for auto-reload") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200121190125.3389-1-kan.liang@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3310f9f6c3e1..550b7814ef92 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1368,6 +1368,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; -- GitLab From 1614d08d34152295c1bb7690d3b16b66bdbb86b4 Mon Sep 17 00:00:00 2001 From: Mike Jones Date: Tue, 28 Jan 2020 10:59:59 -0700 Subject: [PATCH 0086/1278] hwmon: (pmbus/ltc2978) Fix PMBus polling of MFR_COMMON definitions. commit cf2b012c90e74e85d8aea7d67e48868069cfee0c upstream. Change 21537dc driver PMBus polling of MFR_COMMON from bits 5/4 to bits 6/5. This fixs a LTC297X family bug where polling always returns not busy even when the part is busy. This fixes a LTC388X and LTM467X bug where polling used PEND and NOT_IN_TRANS, and BUSY was not polled, which can lead to NACKing of commands. LTC388X and LTM467X modules now poll BUSY and PEND, increasing reliability by eliminating NACKing of commands. Signed-off-by: Mike Jones Link: https://lore.kernel.org/r/1580234400-2829-2-git-send-email-michael-a1.jones@analog.com Fixes: e04d1ce9bbb49 ("hwmon: (ltc2978) Add polling for chips requiring it") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/ltc2978.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index 58b789c28b48..94eea2ac6251 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882, #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */ -#define LTC_NOT_BUSY BIT(5) -#define LTC_NOT_PENDING BIT(4) +#define LTC_NOT_BUSY BIT(6) +#define LTC_NOT_PENDING BIT(5) /* * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which -- GitLab From bb43eea5d29ee1880e920fd171b7998efd849ebc Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 18 Feb 2020 18:58:55 +0800 Subject: [PATCH 0087/1278] jbd2: move the clearing of b_modified flag to the journal_unmap_buffer() [ Upstream commit 6a66a7ded12baa6ebbb2e3e82f8cb91382814839 ] There is no need to delay the clearing of b_modified flag to the transaction committing time when unmapping the journalled buffer, so just move it to the journal_unmap_buffer(). Link: https://lore.kernel.org/r/20200213063821.30455-2-yi.zhang@huawei.com Reviewed-by: Jan Kara Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 43 +++++++++++++++---------------------------- fs/jbd2/transaction.c | 10 ++++++---- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7dd613392592..89cbf45a1dcd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -973,34 +973,21 @@ void jbd2_journal_commit_transaction(journal_t *journal) * it. */ /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { - /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather through in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. - */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); - clear_buffer_mapped(bh); - clear_buffer_new(bh); - clear_buffer_req(bh); - bh->b_bdev = NULL; - } + * A buffer which has been freed while still being journaled + * by a previous transaction, refile the buffer to BJ_Forget of + * the running transaction. If the just committed transaction + * contains "add to orphan" operation, we can completely + * invalidate the buffer now. We are rather through in that + * since the buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial page. + */ + if (buffer_freed(bh) && !jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7fe422eced89..f2ff141a4479 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2231,14 +2231,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, return -EBUSY; } /* - * OK, buffer won't be reachable after truncate. We just set - * j_next_transaction to the running transaction (if there is - * one) and mark buffer as freed so that commit code knows it - * should clear dirty bits when it is done with the buffer. + * OK, buffer won't be reachable after truncate. We just clear + * b_modified to not confuse transaction credit accounting, and + * set j_next_transaction to the running transaction (if there + * is one) and mark buffer as freed so that commit code knows + * it should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) jh->b_next_transaction = journal->j_running_transaction; + jh->b_modified = 0; jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); -- GitLab From 800f71280e07d344af90ad9ce9d84e28841e0e9b Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 18 Feb 2020 18:58:56 +0800 Subject: [PATCH 0088/1278] jbd2: do not clear the BH_Mapped flag when forgetting a metadata buffer [ Upstream commit c96dceeabf765d0b1b1f29c3bf50a5c01315b820 ] Commit 904cdbd41d74 ("jbd2: clear dirty flag when revoking a buffer from an older transaction") set the BH_Freed flag when forgetting a metadata buffer which belongs to the committing transaction, it indicate the committing process clear dirty bits when it is done with the buffer. But it also clear the BH_Mapped flag at the same time, which may trigger below NULL pointer oops when block_size < PAGE_SIZE. rmdir 1 kjournald2 mkdir 2 jbd2_journal_commit_transaction commit transaction N jbd2_journal_forget set_buffer_freed(bh1) jbd2_journal_commit_transaction commit transaction N+1 ... clear_buffer_mapped(bh1) ext4_getblk(bh2 ummapped) ... grow_dev_page init_page_buffers bh1->b_private=NULL bh2->b_private=NULL jbd2_journal_put_journal_head(jh1) __journal_remove_journal_head(hb1) jh1 is NULL and trigger oops *) Dir entry block bh1 and bh2 belongs to one page, and the bh2 has already been unmapped. For the metadata buffer we forgetting, we should always keep the mapped flag and clear the dirty flags is enough, so this patch pick out the these buffers and keep their BH_Mapped flag. Link: https://lore.kernel.org/r/20200213063821.30455-3-yi.zhang@huawei.com Fixes: 904cdbd41d74 ("jbd2: clear dirty flag when revoking a buffer from an older transaction") Reviewed-by: Jan Kara Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 89cbf45a1dcd..cb0da3d4adc0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -982,12 +982,29 @@ void jbd2_journal_commit_transaction(journal_t *journal) * pagesize and it is attached to the last partial page. */ if (buffer_freed(bh) && !jh->b_next_transaction) { + struct address_space *mapping; + clear_buffer_freed(bh); clear_buffer_jbddirty(bh); - clear_buffer_mapped(bh); - clear_buffer_new(bh); - clear_buffer_req(bh); - bh->b_bdev = NULL; + + /* + * Block device buffers need to stay mapped all the + * time, so it is enough to clear buffer_jbddirty and + * buffer_freed bits. For the file mapping buffers (i.e. + * journalled data) we need to unmap buffer and clear + * more bits. We also need to be careful about the check + * because the data page mapping can get cleared under + * out hands, which alse need not to clear more bits + * because the page and buffers will be freed and can + * never be reused once we are done with them. + */ + mapping = READ_ONCE(bh->b_page->mapping); + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { -- GitLab From 2011a54b6161cdeb42ec8d7843170977701f97b6 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Wed, 18 Sep 2019 22:06:58 +0530 Subject: [PATCH 0089/1278] scsi: qla2xxx: fix a potential NULL pointer dereference commit 35a79a63517981a8aea395497c548776347deda8 upstream. alloc_workqueue is not checked for errors and as a result a potential NULL dereference could occur. Link: https://lore.kernel.org/r/1568824618-4366-1-git-send-email-allen.pais@oracle.com Signed-off-by: Allen Pais Reviewed-by: Martin Wilck Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen [Ajay: Modified to apply on v4.14.y] Signed-off-by: Ajay Kaher Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5f9d4dbc4a98..d4024015f859 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3178,6 +3178,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->mgmt_svr_loop_id, host->sg_tablesize); ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + } if (ha->mqenable) { bool mq = false; -- GitLab From aedede2e024cdbb1be3055eaf8683270b5eaccc2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Feb 2020 06:54:06 -0500 Subject: [PATCH 0090/1278] Revert "KVM: nVMX: Use correct root level for nested EPT shadow page tables" This reverts commit 740d876bd9565857a695ce7c05efda4eba5bc585. Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 997926a9121c..3791ce8d269e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2968,9 +2968,6 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { - /* Nested EPT currently only supports 4-level walks. */ - if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) - return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; -- GitLab From 70eb01d7e5c58d755c70dad29cba7465988965e0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Feb 2020 06:54:12 -0500 Subject: [PATCH 0091/1278] Revert "KVM: VMX: Add non-canonical check on writes to RTIT address MSRs" This reverts commit 57211b7366cc2abf784c35e537b256e7fcddc91e. This patch isn't needed on 4.19 and older. Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 8033 ---------------------------------------- 1 file changed, 8033 deletions(-) delete mode 100644 arch/x86/kvm/vmx/vmx.c diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c deleted file mode 100644 index 3791ce8d269e..000000000000 --- a/arch/x86/kvm/vmx/vmx.c +++ /dev/null @@ -1,8033 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. - * - * Copyright (C) 2006 Qumranet, Inc. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Authors: - * Avi Kivity - * Yaniv Kamay - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "capabilities.h" -#include "cpuid.h" -#include "evmcs.h" -#include "irq.h" -#include "kvm_cache_regs.h" -#include "lapic.h" -#include "mmu.h" -#include "nested.h" -#include "ops.h" -#include "pmu.h" -#include "trace.h" -#include "vmcs.h" -#include "vmcs12.h" -#include "vmx.h" -#include "x86.h" - -MODULE_AUTHOR("Qumranet"); -MODULE_LICENSE("GPL"); - -static const struct x86_cpu_id vmx_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_VMX), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); - -bool __read_mostly enable_vpid = 1; -module_param_named(vpid, enable_vpid, bool, 0444); - -static bool __read_mostly enable_vnmi = 1; -module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); - -bool __read_mostly flexpriority_enabled = 1; -module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); - -bool __read_mostly enable_ept = 1; -module_param_named(ept, enable_ept, bool, S_IRUGO); - -bool __read_mostly enable_unrestricted_guest = 1; -module_param_named(unrestricted_guest, - enable_unrestricted_guest, bool, S_IRUGO); - -bool __read_mostly enable_ept_ad_bits = 1; -module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); - -static bool __read_mostly emulate_invalid_guest_state = true; -module_param(emulate_invalid_guest_state, bool, S_IRUGO); - -static bool __read_mostly fasteoi = 1; -module_param(fasteoi, bool, S_IRUGO); - -static bool __read_mostly enable_apicv = 1; -module_param(enable_apicv, bool, S_IRUGO); - -/* - * If nested=1, nested virtualization is supported, i.e., guests may use - * VMX and be a hypervisor for its own guests. If nested=0, guests may not - * use VMX instructions. - */ -static bool __read_mostly nested = 1; -module_param(nested, bool, S_IRUGO); - -bool __read_mostly enable_pml = 1; -module_param_named(pml, enable_pml, bool, S_IRUGO); - -static bool __read_mostly dump_invalid_vmcs = 0; -module_param(dump_invalid_vmcs, bool, 0644); - -#define MSR_BITMAP_MODE_X2APIC 1 -#define MSR_BITMAP_MODE_X2APIC_APICV 2 - -#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL - -/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ -static int __read_mostly cpu_preemption_timer_multi; -static bool __read_mostly enable_preemption_timer = 1; -#ifdef CONFIG_X86_64 -module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); -#endif - -#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE -#define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) -#define KVM_CR4_GUEST_OWNED_BITS \ - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) - -#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE -#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) -#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) - -#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) - -#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \ - RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \ - RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ - RTIT_STATUS_BYTECNT)) - -#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \ - (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f) - -/* - * These 2 parameters are used to config the controls for Pause-Loop Exiting: - * ple_gap: upper bound on the amount of time between two successive - * executions of PAUSE in a loop. Also indicate if ple enabled. - * According to test, this time is usually smaller than 128 cycles. - * ple_window: upper bound on the amount of time a guest is allowed to execute - * in a PAUSE loop. Tests indicate that most spinlocks are held for - * less than 2^12 cycles - * Time is measured based on a counter that runs at the same rate as the TSC, - * refer SDM volume 3b section 21.6.13 & 22.1.3. - */ -static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; -module_param(ple_gap, uint, 0444); - -static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; -module_param(ple_window, uint, 0444); - -/* Default doubles per-vcpu window every exit. */ -static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; -module_param(ple_window_grow, uint, 0444); - -/* Default resets per-vcpu window every exit to ple_window. */ -static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; -module_param(ple_window_shrink, uint, 0444); - -/* Default is to compute the maximum so we can never overflow. */ -static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -module_param(ple_window_max, uint, 0444); - -/* Default is SYSTEM mode, 1 for host-guest mode */ -int __read_mostly pt_mode = PT_MODE_SYSTEM; -module_param(pt_mode, int, S_IRUGO); - -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); -static DEFINE_MUTEX(vmx_l1d_flush_mutex); - -/* Storage for pre module init parameter parsing */ -static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; - -static const struct { - const char *option; - bool for_parse; -} vmentry_l1d_param[] = { - [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, - [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, - [VMENTER_L1D_FLUSH_COND] = {"cond", true}, - [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, - [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, - [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, -}; - -#define L1D_CACHE_ORDER 4 -static void *vmx_l1d_flush_pages; - -static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) -{ - struct page *page; - unsigned int i; - - if (!boot_cpu_has_bug(X86_BUG_L1TF)) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - - if (!enable_ept) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; - return 0; - } - - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - u64 msr; - - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - } - - /* If set to auto use the default l1tf mitigation method */ - if (l1tf == VMENTER_L1D_FLUSH_AUTO) { - switch (l1tf_mitigation) { - case L1TF_MITIGATION_OFF: - l1tf = VMENTER_L1D_FLUSH_NEVER; - break; - case L1TF_MITIGATION_FLUSH_NOWARN: - case L1TF_MITIGATION_FLUSH: - case L1TF_MITIGATION_FLUSH_NOSMT: - l1tf = VMENTER_L1D_FLUSH_COND; - break; - case L1TF_MITIGATION_FULL: - case L1TF_MITIGATION_FULL_FORCE: - l1tf = VMENTER_L1D_FLUSH_ALWAYS; - break; - } - } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { - l1tf = VMENTER_L1D_FLUSH_ALWAYS; - } - - if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && - !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { - /* - * This allocation for vmx_l1d_flush_pages is not tied to a VM - * lifetime and so should not be charged to a memcg. - */ - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); - if (!page) - return -ENOMEM; - vmx_l1d_flush_pages = page_address(page); - - /* - * Initialize each page with a different pattern in - * order to protect against KSM in the nested - * virtualization case. - */ - for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { - memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, - PAGE_SIZE); - } - } - - l1tf_vmx_mitigation = l1tf; - - if (l1tf != VMENTER_L1D_FLUSH_NEVER) - static_branch_enable(&vmx_l1d_should_flush); - else - static_branch_disable(&vmx_l1d_should_flush); - - if (l1tf == VMENTER_L1D_FLUSH_COND) - static_branch_enable(&vmx_l1d_flush_cond); - else - static_branch_disable(&vmx_l1d_flush_cond); - return 0; -} - -static int vmentry_l1d_flush_parse(const char *s) -{ - unsigned int i; - - if (s) { - for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (vmentry_l1d_param[i].for_parse && - sysfs_streq(s, vmentry_l1d_param[i].option)) - return i; - } - } - return -EINVAL; -} - -static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) -{ - int l1tf, ret; - - l1tf = vmentry_l1d_flush_parse(s); - if (l1tf < 0) - return l1tf; - - if (!boot_cpu_has(X86_BUG_L1TF)) - return 0; - - /* - * Has vmx_init() run already? If not then this is the pre init - * parameter parsing. In that case just store the value and let - * vmx_init() do the proper setup after enable_ept has been - * established. - */ - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { - vmentry_l1d_flush_param = l1tf; - return 0; - } - - mutex_lock(&vmx_l1d_flush_mutex); - ret = vmx_setup_l1d_flush(l1tf); - mutex_unlock(&vmx_l1d_flush_mutex); - return ret; -} - -static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) -{ - if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) - return sprintf(s, "???\n"); - - return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); -} - -static const struct kernel_param_ops vmentry_l1d_flush_ops = { - .set = vmentry_l1d_flush_set, - .get = vmentry_l1d_flush_get, -}; -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); - -static bool guest_state_valid(struct kvm_vcpu *vcpu); -static u32 vmx_segment_access_rights(struct kvm_segment *var); -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type); - -void vmx_vmexit(void); - -#define vmx_insn_failed(fmt...) \ -do { \ - WARN_ONCE(1, fmt); \ - pr_warn_ratelimited(fmt); \ -} while (0) - -asmlinkage void vmread_error(unsigned long field, bool fault) -{ - if (fault) - kvm_spurious_fault(); - else - vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); -} - -noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); -} - -noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) -{ - vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); -} - -noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) -{ - vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); -} - -noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) -{ - vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", - ext, vpid, gva); -} - -noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) -{ - vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", - ext, eptp, gpa); -} - -static DEFINE_PER_CPU(struct vmcs *, vmxarea); -DEFINE_PER_CPU(struct vmcs *, current_vmcs); -/* - * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed - * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. - */ -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); - -/* - * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we - * can find which vCPU should be waken up. - */ -static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); - -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); -static DEFINE_SPINLOCK(vmx_vpid_lock); - -struct vmcs_config vmcs_config; -struct vmx_capability vmx_capability; - -#define VMX_SEGMENT_FIELD(seg) \ - [VCPU_SREG_##seg] = { \ - .selector = GUEST_##seg##_SELECTOR, \ - .base = GUEST_##seg##_BASE, \ - .limit = GUEST_##seg##_LIMIT, \ - .ar_bytes = GUEST_##seg##_AR_BYTES, \ - } - -static const struct kvm_vmx_segment_field { - unsigned selector; - unsigned base; - unsigned limit; - unsigned ar_bytes; -} kvm_vmx_segment_fields[] = { - VMX_SEGMENT_FIELD(CS), - VMX_SEGMENT_FIELD(DS), - VMX_SEGMENT_FIELD(ES), - VMX_SEGMENT_FIELD(FS), - VMX_SEGMENT_FIELD(GS), - VMX_SEGMENT_FIELD(SS), - VMX_SEGMENT_FIELD(TR), - VMX_SEGMENT_FIELD(LDTR), -}; - -u64 host_efer; -static unsigned long host_idt_base; - -/* - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm - * will emulate SYSCALL in legacy mode if the vendor string in guest - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, IA32_STAR must always be included in - * vmx_msr_index[], even in i386 builds. - */ -const u32 vmx_msr_index[] = { -#ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, -#endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, - MSR_IA32_TSX_CTRL, -}; - -#if IS_ENABLED(CONFIG_HYPERV) -static bool __read_mostly enlightened_vmcs = true; -module_param(enlightened_vmcs, bool, 0444); - -/* check_ept_pointer() should be under protection of ept_pointer_lock. */ -static void check_ept_pointer_match(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - u64 tmp_eptp = INVALID_PAGE; - int i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!VALID_PAGE(tmp_eptp)) { - tmp_eptp = to_vmx(vcpu)->ept_pointer; - } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { - to_kvm_vmx(kvm)->ept_pointers_match - = EPT_POINTERS_MISMATCH; - return; - } - } - - to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; -} - -static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, - void *data) -{ - struct kvm_tlb_range *range = data; - - return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, - range->pages); -} - -static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, - struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) -{ - u64 ept_pointer = to_vmx(vcpu)->ept_pointer; - - /* - * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address - * of the base of EPT PML4 table, strip off EPT configuration - * information. - */ - if (range) - return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK, - kvm_fill_hv_flush_list_func, (void *)range); - else - return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK); -} - -static int hv_remote_flush_tlb_with_range(struct kvm *kvm, - struct kvm_tlb_range *range) -{ - struct kvm_vcpu *vcpu; - int ret = 0, i; - - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); - - if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) - check_ept_pointer_match(kvm); - - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { - kvm_for_each_vcpu(i, vcpu, kvm) { - /* If ept_pointer is invalid pointer, bypass flush request. */ - if (VALID_PAGE(to_vmx(vcpu)->ept_pointer)) - ret |= __hv_remote_flush_tlb_with_range( - kvm, vcpu, range); - } - } else { - ret = __hv_remote_flush_tlb_with_range(kvm, - kvm_get_vcpu(kvm, 0), range); - } - - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); - return ret; -} -static int hv_remote_flush_tlb(struct kvm *kvm) -{ - return hv_remote_flush_tlb_with_range(kvm, NULL); -} - -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) -{ - struct hv_enlightened_vmcs *evmcs; - struct hv_partition_assist_pg **p_hv_pa_pg = - &vcpu->kvm->arch.hyperv.hv_pa_pg; - /* - * Synthetic VM-Exit is not enabled in current code and so All - * evmcs in singe VM shares same assist page. - */ - if (!*p_hv_pa_pg) - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); - - if (!*p_hv_pa_pg) - return -ENOMEM; - - evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; - - evmcs->partition_assist_page = - __pa(*p_hv_pa_pg); - evmcs->hv_vm_id = (unsigned long)vcpu->kvm; - evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; - - return 0; -} - -#endif /* IS_ENABLED(CONFIG_HYPERV) */ - -/* - * Comment's format: document - errata name - stepping - processor name. - * Refer from - * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp - */ -static u32 vmx_preemption_cpu_tfms[] = { -/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ -0x000206E6, -/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ -/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ -/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ -0x00020652, -/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ -0x00020655, -/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ -/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ -/* - * 320767.pdf - AAP86 - B1 - - * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile - */ -0x000106E5, -/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ -0x000106A0, -/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ -0x000106A1, -/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ -0x000106A4, - /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ - /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ - /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ -0x000106A5, - /* Xeon E3-1220 V2 */ -0x000306A8, -}; - -static inline bool cpu_has_broken_vmx_preemption_timer(void) -{ - u32 eax = cpuid_eax(0x00000001), i; - - /* Clear the reserved bits */ - eax &= ~(0x3U << 14 | 0xfU << 28); - for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) - if (eax == vmx_preemption_cpu_tfms[i]) - return true; - - return false; -} - -static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) -{ - return flexpriority_enabled && lapic_in_kernel(vcpu); -} - -static inline bool report_flexpriority(void) -{ - return flexpriority_enabled; -} - -static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - for (i = 0; i < vmx->nmsrs; ++i) - if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) - return i; - return -1; -} - -struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - i = __find_msr_index(vmx, msr); - if (i >= 0) - return &vmx->guest_msrs[i]; - return NULL; -} - -static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data) -{ - int ret = 0; - - u64 old_msr_data = msr->data; - msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) { - preempt_disable(); - ret = kvm_set_shared_msr(msr->index, msr->data, - msr->mask); - preempt_enable(); - if (ret) - msr->data = old_msr_data; - } - return ret; -} - -void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) -{ - vmcs_clear(loaded_vmcs->vmcs); - if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) - vmcs_clear(loaded_vmcs->shadow_vmcs); - loaded_vmcs->cpu = -1; - loaded_vmcs->launched = 0; -} - -#ifdef CONFIG_KEXEC_CORE -/* - * This bitmap is used to indicate whether the vmclear - * operation is enabled on all cpus. All disabled by - * default. - */ -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; - -static inline void crash_enable_local_vmclear(int cpu) -{ - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline void crash_disable_local_vmclear(int cpu) -{ - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline int crash_local_vmclear_enabled(int cpu) -{ - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static void crash_vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v; - - if (!crash_local_vmclear_enabled(cpu)) - return; - - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - vmcs_clear(v->vmcs); -} -#else -static inline void crash_enable_local_vmclear(int cpu) { } -static inline void crash_disable_local_vmclear(int cpu) { } -#endif /* CONFIG_KEXEC_CORE */ - -static void __loaded_vmcs_clear(void *arg) -{ - struct loaded_vmcs *loaded_vmcs = arg; - int cpu = raw_smp_processor_id(); - - if (loaded_vmcs->cpu != cpu) - return; /* vcpu migration can race with cpu offline */ - if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) - per_cpu(current_vmcs, cpu) = NULL; - crash_disable_local_vmclear(cpu); - list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); - - /* - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link - * is before setting loaded_vmcs->vcpu to -1 which is done in - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist - * then adds the vmcs into percpu list before it is deleted. - */ - smp_wmb(); - - loaded_vmcs_init(loaded_vmcs); - crash_enable_local_vmclear(cpu); -} - -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) -{ - int cpu = loaded_vmcs->cpu; - - if (cpu != -1) - smp_call_function_single(cpu, - __loaded_vmcs_clear, loaded_vmcs, 1); -} - -static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, - unsigned field) -{ - bool ret; - u32 mask = 1 << (seg * SEG_FIELD_NR + field); - - if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { - kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); - vmx->segment_cache.bitmask = 0; - } - ret = vmx->segment_cache.bitmask & mask; - vmx->segment_cache.bitmask |= mask; - return ret; -} - -static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) -{ - u16 *p = &vmx->segment_cache.seg[seg].selector; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) - *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); - return *p; -} - -static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) -{ - ulong *p = &vmx->segment_cache.seg[seg].base; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) - *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); - return *p; -} - -static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) -{ - u32 *p = &vmx->segment_cache.seg[seg].limit; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) - *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); - return *p; -} - -static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) -{ - u32 *p = &vmx->segment_cache.seg[seg].ar; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) - *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); - return *p; -} - -void update_exception_bitmap(struct kvm_vcpu *vcpu) -{ - u32 eb; - - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << DB_VECTOR) | (1u << AC_VECTOR); - /* - * Guest access to VMware backdoor ports could legitimately - * trigger #GP because of TSS I/O permission bitmap. - * We intercept those #GP and allow access to them anyway - * as VMware does. - */ - if (enable_vmware_backdoor) - eb |= (1u << GP_VECTOR); - if ((vcpu->guest_debug & - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) - eb |= 1u << BP_VECTOR; - if (to_vmx(vcpu)->rmode.vm86_active) - eb = ~0; - if (enable_ept) - eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ - - /* When we are running a nested L2 guest and L1 specified for it a - * certain exception bitmap, we must trap the same exceptions and pass - * them to L1. When running L2, we will only handle the exceptions - * specified above if L1 did not want them. - */ - if (is_guest_mode(vcpu)) - eb |= get_vmcs12(vcpu)->exception_bitmap; - - vmcs_write32(EXCEPTION_BITMAP, eb); -} - -/* - * Check if MSR is intercepted for currently loaded MSR bitmap. - */ -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) -{ - unsigned long *msr_bitmap; - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return true; - - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; - - if (msr <= 0x1fff) { - return !!test_bit(msr, msr_bitmap + 0x800 / f); - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - return !!test_bit(msr, msr_bitmap + 0xc00 / f); - } - - return true; -} - -static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit) -{ - vm_entry_controls_clearbit(vmx, entry); - vm_exit_controls_clearbit(vmx, exit); -} - -int vmx_find_msr_index(struct vmx_msrs *m, u32 msr) -{ - unsigned int i; - - for (i = 0; i < m->nr; ++i) { - if (m->val[i].index == msr) - return i; - } - return -ENOENT; -} - -static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) -{ - int i; - struct msr_autoload *m = &vmx->msr_autoload; - - switch (msr) { - case MSR_EFER: - if (cpu_has_load_ia32_efer()) { - clear_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_EFER, - VM_EXIT_LOAD_IA32_EFER); - return; - } - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (cpu_has_load_perf_global_ctrl()) { - clear_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); - return; - } - break; - } - i = vmx_find_msr_index(&m->guest, msr); - if (i < 0) - goto skip_guest; - --m->guest.nr; - m->guest.val[i] = m->guest.val[m->guest.nr]; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); - -skip_guest: - i = vmx_find_msr_index(&m->host, msr); - if (i < 0) - return; - - --m->host.nr; - m->host.val[i] = m->host.val[m->host.nr]; - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); -} - -static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit, - unsigned long guest_val_vmcs, unsigned long host_val_vmcs, - u64 guest_val, u64 host_val) -{ - vmcs_write64(guest_val_vmcs, guest_val); - if (host_val_vmcs != HOST_IA32_EFER) - vmcs_write64(host_val_vmcs, host_val); - vm_entry_controls_setbit(vmx, entry); - vm_exit_controls_setbit(vmx, exit); -} - -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, - u64 guest_val, u64 host_val, bool entry_only) -{ - int i, j = 0; - struct msr_autoload *m = &vmx->msr_autoload; - - switch (msr) { - case MSR_EFER: - if (cpu_has_load_ia32_efer()) { - add_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_EFER, - VM_EXIT_LOAD_IA32_EFER, - GUEST_IA32_EFER, - HOST_IA32_EFER, - guest_val, host_val); - return; - } - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (cpu_has_load_perf_global_ctrl()) { - add_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, - GUEST_IA32_PERF_GLOBAL_CTRL, - HOST_IA32_PERF_GLOBAL_CTRL, - guest_val, host_val); - return; - } - break; - case MSR_IA32_PEBS_ENABLE: - /* PEBS needs a quiescent period after being disabled (to write - * a record). Disabling PEBS through VMX MSR swapping doesn't - * provide that period, so a CPU could write host's record into - * guest's memory. - */ - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); - } - - i = vmx_find_msr_index(&m->guest, msr); - if (!entry_only) - j = vmx_find_msr_index(&m->host, msr); - - if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) || - (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) { - printk_once(KERN_WARNING "Not enough msr switch entries. " - "Can't add msr %x\n", msr); - return; - } - if (i < 0) { - i = m->guest.nr++; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); - } - m->guest.val[i].index = msr; - m->guest.val[i].value = guest_val; - - if (entry_only) - return; - - if (j < 0) { - j = m->host.nr++; - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); - } - m->host.val[j].index = msr; - m->host.val[j].value = host_val; -} - -static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) -{ - u64 guest_efer = vmx->vcpu.arch.efer; - u64 ignore_bits = 0; - - /* Shadow paging assumes NX to be available. */ - if (!enable_ept) - guest_efer |= EFER_NX; - - /* - * LMA and LME handled by hardware; SCE meaningless outside long mode. - */ - ignore_bits |= EFER_SCE; -#ifdef CONFIG_X86_64 - ignore_bits |= EFER_LMA | EFER_LME; - /* SCE is meaningful only in long mode on Intel */ - if (guest_efer & EFER_LMA) - ignore_bits &= ~(u64)EFER_SCE; -#endif - - /* - * On EPT, we can't emulate NX, so we must switch EFER atomically. - * On CPUs that support "load IA32_EFER", always switch EFER - * atomically, since it's faster than switching it manually. - */ - if (cpu_has_load_ia32_efer() || - (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { - if (!(guest_efer & EFER_LMA)) - guest_efer &= ~EFER_LME; - if (guest_efer != host_efer) - add_atomic_switch_msr(vmx, MSR_EFER, - guest_efer, host_efer, false); - else - clear_atomic_switch_msr(vmx, MSR_EFER); - return false; - } else { - clear_atomic_switch_msr(vmx, MSR_EFER); - - guest_efer &= ~ignore_bits; - guest_efer |= host_efer & ignore_bits; - - vmx->guest_msrs[efer_offset].data = guest_efer; - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; - - return true; - } -} - -#ifdef CONFIG_X86_32 -/* - * On 32-bit kernels, VM exits still load the FS and GS bases from the - * VMCS rather than the segment table. KVM uses this helper to figure - * out the current bases to poke them into the VMCS before entry. - */ -static unsigned long segment_base(u16 selector) -{ - struct desc_struct *table; - unsigned long v; - - if (!(selector & ~SEGMENT_RPL_MASK)) - return 0; - - table = get_current_gdt_ro(); - - if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { - u16 ldt_selector = kvm_read_ldt(); - - if (!(ldt_selector & ~SEGMENT_RPL_MASK)) - return 0; - - table = (struct desc_struct *)segment_base(ldt_selector); - } - v = get_desc_base(&table[selector >> 3]); - return v; -} -#endif - -static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) -{ - u32 i; - - wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status); - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); - wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); - for (i = 0; i < addr_range; i++) { - wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); - wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); - } -} - -static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range) -{ - u32 i; - - rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status); - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); - rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); - for (i = 0; i < addr_range; i++) { - rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); - rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); - } -} - -static void pt_guest_enter(struct vcpu_vmx *vmx) -{ - if (pt_mode == PT_MODE_SYSTEM) - return; - - /* - * GUEST_IA32_RTIT_CTL is already set in the VMCS. - * Save host state before VM entry. - */ - rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { - wrmsrl(MSR_IA32_RTIT_CTL, 0); - pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); - pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); - } -} - -static void pt_guest_exit(struct vcpu_vmx *vmx) -{ - if (pt_mode == PT_MODE_SYSTEM) - return; - - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { - pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); - pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); - } - - /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */ - wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); -} - -void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) -{ - if (unlikely(fs_sel != host->fs_sel)) { - if (!(fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, fs_sel); - else - vmcs_write16(HOST_FS_SELECTOR, 0); - host->fs_sel = fs_sel; - } - if (unlikely(gs_sel != host->gs_sel)) { - if (!(gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - else - vmcs_write16(HOST_GS_SELECTOR, 0); - host->gs_sel = gs_sel; - } - if (unlikely(fs_base != host->fs_base)) { - vmcs_writel(HOST_FS_BASE, fs_base); - host->fs_base = fs_base; - } - if (unlikely(gs_base != host->gs_base)) { - vmcs_writel(HOST_GS_BASE, gs_base); - host->gs_base = gs_base; - } -} - -void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs_host_state *host_state; -#ifdef CONFIG_X86_64 - int cpu = raw_smp_processor_id(); -#endif - unsigned long fs_base, gs_base; - u16 fs_sel, gs_sel; - int i; - - vmx->req_immediate_exit = false; - - /* - * Note that guest MSRs to be saved/restored can also be changed - * when guest state is loaded. This happens when guest transitions - * to/from long-mode by setting MSR_EFER.LMA. - */ - if (!vmx->guest_msrs_ready) { - vmx->guest_msrs_ready = true; - for (i = 0; i < vmx->save_nmsrs; ++i) - kvm_set_shared_msr(vmx->guest_msrs[i].index, - vmx->guest_msrs[i].data, - vmx->guest_msrs[i].mask); - - } - if (vmx->guest_state_loaded) - return; - - host_state = &vmx->loaded_vmcs->host_state; - - /* - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not - * allow segment selectors with cpl > 0 or ti == 1. - */ - host_state->ldt_sel = kvm_read_ldt(); - -#ifdef CONFIG_X86_64 - savesegment(ds, host_state->ds_sel); - savesegment(es, host_state->es_sel); - - gs_base = cpu_kernelmode_gs_base(cpu); - if (likely(is_64bit_mm(current->mm))) { - save_fsgs_for_kvm(); - fs_sel = current->thread.fsindex; - gs_sel = current->thread.gsindex; - fs_base = current->thread.fsbase; - vmx->msr_host_kernel_gs_base = current->thread.gsbase; - } else { - savesegment(fs, fs_sel); - savesegment(gs, gs_sel); - fs_base = read_msr(MSR_FS_BASE); - vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); - } - - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#else - savesegment(fs, fs_sel); - savesegment(gs, gs_sel); - fs_base = segment_base(fs_sel); - gs_base = segment_base(gs_sel); -#endif - - vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); - vmx->guest_state_loaded = true; -} - -static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) -{ - struct vmcs_host_state *host_state; - - if (!vmx->guest_state_loaded) - return; - - host_state = &vmx->loaded_vmcs->host_state; - - ++vmx->vcpu.stat.host_state_reload; - -#ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#endif - if (host_state->ldt_sel || (host_state->gs_sel & 7)) { - kvm_load_ldt(host_state->ldt_sel); -#ifdef CONFIG_X86_64 - load_gs_index(host_state->gs_sel); -#else - loadsegment(gs, host_state->gs_sel); -#endif - } - if (host_state->fs_sel & 7) - loadsegment(fs, host_state->fs_sel); -#ifdef CONFIG_X86_64 - if (unlikely(host_state->ds_sel | host_state->es_sel)) { - loadsegment(ds, host_state->ds_sel); - loadsegment(es, host_state->es_sel); - } -#endif - invalidate_tss_limit(); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); -#endif - load_fixmap_gdt(raw_smp_processor_id()); - vmx->guest_state_loaded = false; - vmx->guest_msrs_ready = false; -} - -#ifdef CONFIG_X86_64 -static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) -{ - preempt_disable(); - if (vmx->guest_state_loaded) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - preempt_enable(); - return vmx->msr_guest_kernel_gs_base; -} - -static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) -{ - preempt_disable(); - if (vmx->guest_state_loaded) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - vmx->msr_guest_kernel_gs_base = data; -} -#endif - -static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - /* - * In case of hot-plug or hot-unplug, we may have to undo - * vmx_vcpu_pi_put even if there is no assigned device. And we - * always keep PI.NDST up to date for simplicity: it makes the - * code easier, and CPU migration is not a fast path. - */ - if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) - return; - - /* - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change - * PI.NDST: pi_post_block is the one expected to change PID.NDST and the - * wakeup handler expects the vCPU to be on the blocked_vcpu_list that - * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up - * correctly. - */ - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { - pi_clear_sn(pi_desc); - goto after_clear_sn; - } - - /* The full case. */ - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - new.sn = 0; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - -after_clear_sn: - - /* - * Clear SN before reading the bitmap. The VT-d firmware - * writes the bitmap and reads SN atomically (5.2.3 in the - * spec), so it doesn't really have a memory barrier that - * pairs with this, but we cannot do that and we need one. - */ - smp_mb__after_atomic(); - - if (!pi_is_pir_empty(pi_desc)) - pi_set_on(pi_desc); -} - -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool already_loaded = vmx->loaded_vmcs->cpu == cpu; - - if (!already_loaded) { - loaded_vmcs_clear(vmx->loaded_vmcs); - local_irq_disable(); - crash_disable_local_vmclear(cpu); - - /* - * Read loaded_vmcs->cpu should be before fetching - * loaded_vmcs->loaded_vmcss_on_cpu_link. - * See the comments in __loaded_vmcs_clear(). - */ - smp_rmb(); - - list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, - &per_cpu(loaded_vmcss_on_cpu, cpu)); - crash_enable_local_vmclear(cpu); - local_irq_enable(); - } - - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; - vmcs_load(vmx->loaded_vmcs->vmcs); - indirect_branch_prediction_barrier(); - } - - if (!already_loaded) { - void *gdt = get_current_gdt_ro(); - unsigned long sysenter_esp; - - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - - /* - * Linux uses per-cpu TSS and GDT, so set these when switching - * processors. See 22.2.4. - */ - vmcs_writel(HOST_TR_BASE, - (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); - vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ - - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); - vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - - vmx->loaded_vmcs->cpu = cpu; - } - - /* Setup TSC multiplier */ - if (kvm_has_tsc_control && - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) - decache_tsc_multiplier(vmx); -} - -/* - * Switches to specified vcpu, until a matching vcpu_put(), but assumes - * vcpu mutex is already taken. - */ -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vmx_vcpu_load_vmcs(vcpu, cpu); - - vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_pkru = read_pkru(); - vmx->host_debugctlmsr = get_debugctlmsr(); -} - -static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) - return; - - /* Set SN when the vCPU is preempted */ - if (vcpu->preempted) - pi_set_sn(pi_desc); -} - -static void vmx_vcpu_put(struct kvm_vcpu *vcpu) -{ - vmx_vcpu_pi_put(vcpu); - - vmx_prepare_switch_to_host(to_vmx(vcpu)); -} - -static bool emulation_required(struct kvm_vcpu *vcpu) -{ - return emulate_invalid_guest_state && !guest_state_valid(vcpu); -} - -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); - -unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long rflags, save_rflags; - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); - rflags = vmcs_readl(GUEST_RFLAGS); - if (vmx->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = vmx->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; - } - vmx->rflags = rflags; - } - return vmx->rflags; -} - -void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long old_rflags; - - if (enable_unrestricted_guest) { - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); - vmx->rflags = rflags; - vmcs_writel(GUEST_RFLAGS, rflags); - return; - } - - old_rflags = vmx_get_rflags(vcpu); - vmx->rflags = rflags; - if (vmx->rmode.vm86_active) { - vmx->rmode.save_rflags = rflags; - rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; - } - vmcs_writel(GUEST_RFLAGS, rflags); - - if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) - vmx->emulation_required = emulation_required(vcpu); -} - -u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) -{ - u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - int ret = 0; - - if (interruptibility & GUEST_INTR_STATE_STI) - ret |= KVM_X86_SHADOW_INT_STI; - if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= KVM_X86_SHADOW_INT_MOV_SS; - - return ret; -} - -void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) -{ - u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - u32 interruptibility = interruptibility_old; - - interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - - if (mask & KVM_X86_SHADOW_INT_MOV_SS) - interruptibility |= GUEST_INTR_STATE_MOV_SS; - else if (mask & KVM_X86_SHADOW_INT_STI) - interruptibility |= GUEST_INTR_STATE_STI; - - if ((interruptibility != interruptibility_old)) - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); -} - -static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long value; - - /* - * Any MSR write that attempts to change bits marked reserved will - * case a #GP fault. - */ - if (data & vmx->pt_desc.ctl_bitmask) - return 1; - - /* - * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will - * result in a #GP unless the same write also clears TraceEn. - */ - if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) - return 1; - - /* - * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit - * and FabricEn would cause #GP, if - * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0 - */ - if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) && - !(data & RTIT_CTL_FABRIC_EN) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) - return 1; - - /* - * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that - * utilize encodings marked reserved will casue a #GP fault. - */ - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) && - !test_bit((data & RTIT_CTL_MTC_RANGE) >> - RTIT_CTL_MTC_RANGE_OFFSET, &value)) - return 1; - value = intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cycle_thresholds); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && - !test_bit((data & RTIT_CTL_CYC_THRESH) >> - RTIT_CTL_CYC_THRESH_OFFSET, &value)) - return 1; - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && - !test_bit((data & RTIT_CTL_PSB_FREQ) >> - RTIT_CTL_PSB_FREQ_OFFSET, &value)) - return 1; - - /* - * If ADDRx_CFG is reserved or the encodings is >2 will - * cause a #GP fault. - */ - value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2)) - return 1; - - return 0; -} - -static int skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - unsigned long rip; - - /* - * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on - * undefined behavior: Intel's SDM doesn't mandate the VMCS field be - * set when EPT misconfig occurs. In practice, real hardware updates - * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors - * (namely Hyper-V) don't set it due to it being undefined behavior, - * i.e. we end up advancing IP with some random value. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || - to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); - } else { - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) - return 0; - } - - /* skipping an emulated instruction also counts */ - vmx_set_interrupt_shadow(vcpu, 0); - - return 1; -} - -static void vmx_clear_hlt(struct kvm_vcpu *vcpu) -{ - /* - * Ensure that we clear the HLT state in the VMCS. We don't need to - * explicitly skip the instruction because if the HLT state is set, - * then the instruction is already executing and RIP has already been - * advanced. - */ - if (kvm_hlt_in_guest(vcpu->kvm) && - vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); -} - -static void vmx_queue_exception(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned nr = vcpu->arch.exception.nr; - bool has_error_code = vcpu->arch.exception.has_error_code; - u32 error_code = vcpu->arch.exception.error_code; - u32 intr_info = nr | INTR_INFO_VALID_MASK; - - kvm_deliver_exception_payload(vcpu); - - if (has_error_code) { - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); - intr_info |= INTR_INFO_DELIVER_CODE_MASK; - } - - if (vmx->rmode.vm86_active) { - int inc_eip = 0; - if (kvm_exception_is_soft(nr)) - inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); - return; - } - - WARN_ON_ONCE(vmx->emulation_required); - - if (kvm_exception_is_soft(nr)) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - intr_info |= INTR_TYPE_SOFT_EXCEPTION; - } else - intr_info |= INTR_TYPE_HARD_EXCEPTION; - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); - - vmx_clear_hlt(vcpu); -} - -static bool vmx_rdtscp_supported(void) -{ - return cpu_has_vmx_rdtscp(); -} - -static bool vmx_invpcid_supported(void) -{ - return cpu_has_vmx_invpcid(); -} - -/* - * Swap MSR entry in host/guest MSR entry array. - */ -static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) -{ - struct shared_msr_entry tmp; - - tmp = vmx->guest_msrs[to]; - vmx->guest_msrs[to] = vmx->guest_msrs[from]; - vmx->guest_msrs[from] = tmp; -} - -/* - * Set up the vmcs to automatically save and restore system - * msrs. Don't touch the 64-bit msrs if the guest is in legacy - * mode, as fiddling with msrs is very expensive. - */ -static void setup_msrs(struct vcpu_vmx *vmx) -{ - int save_nmsrs, index; - - save_nmsrs = 0; -#ifdef CONFIG_X86_64 - /* - * The SYSCALL MSRs are only needed on long mode guests, and only - * when EFER.SCE is set. - */ - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { - index = __find_msr_index(vmx, MSR_STAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_LSTAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_SYSCALL_MASK); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - } -#endif - index = __find_msr_index(vmx, MSR_EFER); - if (index >= 0 && update_transition_efer(vmx, index)) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - - vmx->save_nmsrs = save_nmsrs; - vmx->guest_msrs_ready = false; - - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(&vmx->vcpu); -} - -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - return vcpu->arch.tsc_offset - vmcs12->tsc_offset; - - return vcpu->arch.tsc_offset; -} - -static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u64 g_tsc_offset = 0; - - /* - * We're here if L1 chose not to trap WRMSR to TSC. According - * to the spec, this should set L1's TSC; The offset that L1 - * set for L2 remains unchanged, and still needs to be added - * to the newly set TSC to get L2's TSC. - */ - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - g_tsc_offset = vmcs12->tsc_offset; - - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - vcpu->arch.tsc_offset - g_tsc_offset, - offset); - vmcs_write64(TSC_OFFSET, offset + g_tsc_offset); - return offset + g_tsc_offset; -} - -/* - * nested_vmx_allowed() checks whether a guest should be allowed to use VMX - * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for - * all guests if the "nested" module option is off, and can also be disabled - * for a single guest by disabling its VMX cpuid bit. - */ -bool nested_vmx_allowed(struct kvm_vcpu *vcpu) -{ - return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); -} - -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, - uint64_t val) -{ - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; - - return !(val & ~valid_bits); -} - -static int vmx_get_msr_feature(struct kvm_msr_entry *msr) -{ - switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!nested) - return 1; - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); - default: - return 1; - } -} - -/* - * Reads an msr value (of 'msr_index') into 'pdata'. - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr; - u32 index; - - switch (msr_info->index) { -#ifdef CONFIG_X86_64 - case MSR_FS_BASE: - msr_info->data = vmcs_readl(GUEST_FS_BASE); - break; - case MSR_GS_BASE: - msr_info->data = vmcs_readl(GUEST_GS_BASE); - break; - case MSR_KERNEL_GS_BASE: - msr_info->data = vmx_read_guest_kernel_gs_base(vmx); - break; -#endif - case MSR_EFER: - return kvm_get_msr_common(vcpu, msr_info); - case MSR_IA32_TSX_CTRL: - if (!msr_info->host_initiated && - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) - return 1; - goto find_shared_msr; - case MSR_IA32_UMWAIT_CONTROL: - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) - return 1; - - msr_info->data = vmx->msr_ia32_umwait_control; - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - msr_info->data = to_vmx(vcpu)->spec_ctrl; - break; - case MSR_IA32_SYSENTER_CS: - msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); - break; - case MSR_IA32_SYSENTER_EIP: - msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); - break; - case MSR_IA32_SYSENTER_ESP: - msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); - break; - case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || - (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) - return 1; - msr_info->data = vmcs_read64(GUEST_BNDCFGS); - break; - case MSR_IA32_MCG_EXT_CTL: - if (!msr_info->host_initiated && - !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) - return 1; - msr_info->data = vcpu->arch.mcg_ext_ctl; - break; - case MSR_IA32_FEATURE_CONTROL: - msr_info->data = vmx->msr_ia32_feature_control; - break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!nested_vmx_allowed(vcpu)) - return 1; - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, - &msr_info->data); - case MSR_IA32_RTIT_CTL: - if (pt_mode != PT_MODE_HOST_GUEST) - return 1; - msr_info->data = vmx->pt_desc.guest.ctl; - break; - case MSR_IA32_RTIT_STATUS: - if (pt_mode != PT_MODE_HOST_GUEST) - return 1; - msr_info->data = vmx->pt_desc.guest.status; - break; - case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) - return 1; - msr_info->data = vmx->pt_desc.guest.cr3_match; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - msr_info->data = vmx->pt_desc.guest.output_base; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - msr_info->data = vmx->pt_desc.guest.output_mask; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) - return 1; - if (is_noncanonical_address(data, vcpu)) - return 1; - if (index % 2) - msr_info->data = vmx->pt_desc.guest.addr_b[index / 2]; - else - msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; - break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - goto find_shared_msr; - default: - find_shared_msr: - msr = find_msr_entry(vmx, msr_info->index); - if (msr) { - msr_info->data = msr->data; - break; - } - return kvm_get_msr_common(vcpu, msr_info); - } - - return 0; -} - -/* - * Writes msr value into the appropriate "register". - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr; - int ret = 0; - u32 msr_index = msr_info->index; - u64 data = msr_info->data; - u32 index; - - switch (msr_index) { - case MSR_EFER: - ret = kvm_set_msr_common(vcpu, msr_info); - break; -#ifdef CONFIG_X86_64 - case MSR_FS_BASE: - vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_FS_BASE, data); - break; - case MSR_GS_BASE: - vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_GS_BASE, data); - break; - case MSR_KERNEL_GS_BASE: - vmx_write_guest_kernel_gs_base(vmx, data); - break; -#endif - case MSR_IA32_SYSENTER_CS: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_cs = data; - vmcs_write32(GUEST_SYSENTER_CS, data); - break; - case MSR_IA32_SYSENTER_EIP: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_eip = data; - vmcs_writel(GUEST_SYSENTER_EIP, data); - break; - case MSR_IA32_SYSENTER_ESP: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_esp = data; - vmcs_writel(GUEST_SYSENTER_ESP, data); - break; - case MSR_IA32_DEBUGCTLMSR: - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & - VM_EXIT_SAVE_DEBUG_CONTROLS) - get_vmcs12(vcpu)->guest_ia32_debugctl = data; - - ret = kvm_set_msr_common(vcpu, msr_info); - break; - - case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || - (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) - return 1; - if (is_noncanonical_address(data & PAGE_MASK, vcpu) || - (data & MSR_IA32_BNDCFGS_RSVD)) - return 1; - vmcs_write64(GUEST_BNDCFGS, data); - break; - case MSR_IA32_UMWAIT_CONTROL: - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) - return 1; - - /* The reserved bit 1 and non-32 bit [63:32] should be zero */ - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) - return 1; - - vmx->msr_ia32_umwait_control = data; - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) - return 1; - - vmx->spec_ctrl = data; - - if (!data) - break; - - /* - * For non-nested: - * When it's written (to non-zero) for the first time, pass - * it through. - * - * For nested: - * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_prepare_msr_bitmap. We should not touch the - * vmcs02.msr_bitmap here since it gets completely overwritten - * in the merging. We update the vmcs01 here for L1 as well - * since it will end up touching the MSR anyway now. - */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, - MSR_IA32_SPEC_CTRL, - MSR_TYPE_RW); - break; - case MSR_IA32_TSX_CTRL: - if (!msr_info->host_initiated && - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) - return 1; - if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) - return 1; - goto find_shared_msr; - case MSR_IA32_PRED_CMD: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - if (data & ~PRED_CMD_IBPB) - return 1; - - if (!data) - break; - - wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); - - /* - * For non-nested: - * When it's written (to non-zero) for the first time, pass - * it through. - * - * For nested: - * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_prepare_msr_bitmap. We should not touch the - * vmcs02.msr_bitmap here since it gets completely overwritten - * in the merging. - */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, - MSR_TYPE_W); - break; - case MSR_IA32_CR_PAT: - if (!kvm_pat_valid(data)) - return 1; - - if (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) - get_vmcs12(vcpu)->guest_ia32_pat = data; - - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - vmcs_write64(GUEST_IA32_PAT, data); - vcpu->arch.pat = data; - break; - } - ret = kvm_set_msr_common(vcpu, msr_info); - break; - case MSR_IA32_TSC_ADJUST: - ret = kvm_set_msr_common(vcpu, msr_info); - break; - case MSR_IA32_MCG_EXT_CTL: - if ((!msr_info->host_initiated && - !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || - (data & ~MCG_EXT_CTL_LMCE_EN)) - return 1; - vcpu->arch.mcg_ext_ctl = data; - break; - case MSR_IA32_FEATURE_CONTROL: - if (!vmx_feature_control_msr_valid(vcpu, data) || - (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) - return 1; - vmx->msr_ia32_feature_control = data; - if (msr_info->host_initiated && data == 0) - vmx_leave_nested(vcpu); - break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!msr_info->host_initiated) - return 1; /* they are read-only */ - if (!nested_vmx_allowed(vcpu)) - return 1; - return vmx_set_vmx_msr(vcpu, msr_index, data); - case MSR_IA32_RTIT_CTL: - if ((pt_mode != PT_MODE_HOST_GUEST) || - vmx_rtit_ctl_check(vcpu, data) || - vmx->nested.vmxon) - return 1; - vmcs_write64(GUEST_IA32_RTIT_CTL, data); - vmx->pt_desc.guest.ctl = data; - pt_update_intercept_for_msr(vmx); - break; - case MSR_IA32_RTIT_STATUS: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (data & MSR_IA32_RTIT_STATUS_MASK)) - return 1; - vmx->pt_desc.guest.status = data; - break; - case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) - return 1; - vmx->pt_desc.guest.cr3_match = data; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) || - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) - return 1; - vmx->pt_desc.guest.output_base = data; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - vmx->pt_desc.guest.output_mask = data; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) - return 1; - if (is_noncanonical_address(data, vcpu)) - return 1; - if (index % 2) - vmx->pt_desc.guest.addr_b[index / 2] = data; - else - vmx->pt_desc.guest.addr_a[index / 2] = data; - break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - /* Check reserved bit, higher 32 bits should be zero */ - if ((data >> 32) != 0) - return 1; - goto find_shared_msr; - - default: - find_shared_msr: - msr = find_msr_entry(vmx, msr_index); - if (msr) - ret = vmx_set_guest_msr(vmx, msr, data); - else - ret = kvm_set_msr_common(vcpu, msr_info); - } - - return ret; -} - -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) -{ - kvm_register_mark_available(vcpu, reg); - - switch (reg) { - case VCPU_REGS_RSP: - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); - break; - case VCPU_REGS_RIP: - vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); - break; - case VCPU_EXREG_PDPTR: - if (enable_ept) - ept_save_pdptrs(vcpu); - break; - case VCPU_EXREG_CR3: - if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - break; - default: - WARN_ON_ONCE(1); - break; - } -} - -static __init int cpu_has_kvm_support(void) -{ - return cpu_has_vmx(); -} - -static __init int vmx_disabled_by_bios(void) -{ - u64 msr; - - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && tboot_enabled()) - return 1; - /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && !tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - "activate TXT before enabling KVM\n"); - return 1; - } - /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) - return 1; - } - - return 0; -} - -static void kvm_cpu_vmxon(u64 addr) -{ - cr4_set_bits(X86_CR4_VMXE); - intel_pt_handle_vmx(1); - - asm volatile ("vmxon %0" : : "m"(addr)); -} - -static int hardware_enable(void) -{ - int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; - - if (cr4_read_shadow() & X86_CR4_VMXE) - return -EBUSY; - - /* - * This can happen if we hot-added a CPU but failed to allocate - * VP assist page for it. - */ - if (static_branch_unlikely(&enable_evmcs) && - !hv_get_vp_assist_page(cpu)) - return -EFAULT; - - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - - /* - * Now we can enable the vmclear operation in kdump - * since the loaded_vmcss_on_cpu list on this cpu - * has been initialized. - * - * Though the cpu is not in VMX operation now, there - * is no problem to enable the vmclear operation - * for the loaded_vmcss_on_cpu list is empty! - */ - crash_enable_local_vmclear(cpu); - - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { - /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } - kvm_cpu_vmxon(phys_addr); - if (enable_ept) - ept_sync_global(); - - return 0; -} - -static void vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v, *n; - - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - __loaded_vmcs_clear(v); -} - - -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() - * tricks. - */ -static void kvm_cpu_vmxoff(void) -{ - asm volatile (__ex("vmxoff")); - - intel_pt_handle_vmx(0); - cr4_clear_bits(X86_CR4_VMXE); -} - -static void hardware_disable(void) -{ - vmclear_local_loaded_vmcss(); - kvm_cpu_vmxoff(); -} - -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, - u32 msr, u32 *result) -{ - u32 vmx_msr_low, vmx_msr_high; - u32 ctl = ctl_min | ctl_opt; - - rdmsr(msr, vmx_msr_low, vmx_msr_high); - - ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ - ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ - - /* Ensure minimum (required) set of control bits are supported. */ - if (ctl_min & ~ctl) - return -EIO; - - *result = ctl; - return 0; -} - -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, - struct vmx_capability *vmx_cap) -{ - u32 vmx_msr_low, vmx_msr_high; - u32 min, opt, min2, opt2; - u32 _pin_based_exec_control = 0; - u32 _cpu_based_exec_control = 0; - u32 _cpu_based_2nd_exec_control = 0; - u32 _vmexit_control = 0; - u32 _vmentry_control = 0; - - memset(vmcs_conf, 0, sizeof(*vmcs_conf)); - min = CPU_BASED_HLT_EXITING | -#ifdef CONFIG_X86_64 - CPU_BASED_CR8_LOAD_EXITING | - CPU_BASED_CR8_STORE_EXITING | -#endif - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_UNCOND_IO_EXITING | - CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETTING | - CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING | - CPU_BASED_INVLPG_EXITING | - CPU_BASED_RDPMC_EXITING; - - opt = CPU_BASED_TPR_SHADOW | - CPU_BASED_USE_MSR_BITMAPS | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, - &_cpu_based_exec_control) < 0) - return -EIO; -#ifdef CONFIG_X86_64 - if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) - _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & - ~CPU_BASED_CR8_STORE_EXITING; -#endif - if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { - min2 = 0; - opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_ENABLE_VPID | - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST | - SECONDARY_EXEC_PAUSE_LOOP_EXITING | - SECONDARY_EXEC_DESC | - SECONDARY_EXEC_RDTSCP | - SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_RDSEED_EXITING | - SECONDARY_EXEC_RDRAND_EXITING | - SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_TSC_SCALING | - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | - SECONDARY_EXEC_PT_USE_GPA | - SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_ENCLS_EXITING; - if (adjust_vmx_controls(min2, opt2, - MSR_IA32_VMX_PROCBASED_CTLS2, - &_cpu_based_2nd_exec_control) < 0) - return -EIO; - } -#ifndef CONFIG_X86_64 - if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) - _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; -#endif - - if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) - _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - - rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, - &vmx_cap->ept, &vmx_cap->vpid); - - if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { - /* CR3 accesses and invlpg don't need to cause VM Exits when EPT - enabled */ - _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_INVLPG_EXITING); - } else if (vmx_cap->ept) { - vmx_cap->ept = 0; - pr_warn_once("EPT CAP should not exist if not support " - "1-setting enable EPT VM-execution control\n"); - } - if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && - vmx_cap->vpid) { - vmx_cap->vpid = 0; - pr_warn_once("VPID CAP should not exist if not support " - "1-setting enable VPID VM-execution control\n"); - } - - min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; -#ifdef CONFIG_X86_64 - min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; -#endif - opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_LOAD_IA32_PAT | - VM_EXIT_LOAD_IA32_EFER | - VM_EXIT_CLEAR_BNDCFGS | - VM_EXIT_PT_CONCEAL_PIP | - VM_EXIT_CLEAR_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, - &_vmexit_control) < 0) - return -EIO; - - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | - PIN_BASED_VMX_PREEMPTION_TIMER; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - - if (cpu_has_broken_vmx_preemption_timer()) - _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) - _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; - - min = VM_ENTRY_LOAD_DEBUG_CONTROLS; - opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_ENTRY_LOAD_IA32_PAT | - VM_ENTRY_LOAD_IA32_EFER | - VM_ENTRY_LOAD_BNDCFGS | - VM_ENTRY_PT_CONCEAL_PIP | - VM_ENTRY_LOAD_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, - &_vmentry_control) < 0) - return -EIO; - - /* - * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they - * can't be used due to an errata where VM Exit may incorrectly clear - * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the - * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. - */ - if (boot_cpu_data.x86 == 0x6) { - switch (boot_cpu_data.x86_model) { - case 26: /* AAK155 */ - case 30: /* AAP115 */ - case 37: /* AAT100 */ - case 44: /* BC86,AAY89,BD102 */ - case 46: /* BA97 */ - _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; - _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " - "does not work properly. Using workaround\n"); - break; - default: - break; - } - } - - - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); - - /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) - return -EIO; - -#ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) - return -EIO; -#endif - - /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) - return -EIO; - - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->order = get_order(vmcs_conf->size); - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - - vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; - vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; - vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; - vmcs_conf->vmexit_ctrl = _vmexit_control; - vmcs_conf->vmentry_ctrl = _vmentry_control; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_sanitize_exec_ctrls(vmcs_conf); - - return 0; -} - -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) -{ - int node = cpu_to_node(cpu); - struct page *pages; - struct vmcs *vmcs; - - pages = __alloc_pages_node(node, flags, vmcs_config.order); - if (!pages) - return NULL; - vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); - - /* KVM supports Enlightened VMCS v1 only */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs->hdr.revision_id = KVM_EVMCS_VERSION; - else - vmcs->hdr.revision_id = vmcs_config.revision_id; - - if (shadow) - vmcs->hdr.shadow_vmcs = 1; - return vmcs; -} - -void free_vmcs(struct vmcs *vmcs) -{ - free_pages((unsigned long)vmcs, vmcs_config.order); -} - -/* - * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded - */ -void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -{ - if (!loaded_vmcs->vmcs) - return; - loaded_vmcs_clear(loaded_vmcs); - free_vmcs(loaded_vmcs->vmcs); - loaded_vmcs->vmcs = NULL; - if (loaded_vmcs->msr_bitmap) - free_page((unsigned long)loaded_vmcs->msr_bitmap); - WARN_ON(loaded_vmcs->shadow_vmcs != NULL); -} - -int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -{ - loaded_vmcs->vmcs = alloc_vmcs(false); - if (!loaded_vmcs->vmcs) - return -ENOMEM; - - loaded_vmcs->shadow_vmcs = NULL; - loaded_vmcs->hv_timer_soft_disabled = false; - loaded_vmcs_init(loaded_vmcs); - - if (cpu_has_vmx_msr_bitmap()) { - loaded_vmcs->msr_bitmap = (unsigned long *) - __get_free_page(GFP_KERNEL_ACCOUNT); - if (!loaded_vmcs->msr_bitmap) - goto out_vmcs; - memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); - - if (IS_ENABLED(CONFIG_HYPERV) && - static_branch_unlikely(&enable_evmcs) && - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { - struct hv_enlightened_vmcs *evmcs = - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; - - evmcs->hv_enlightenments_control.msr_bitmap = 1; - } - } - - memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); - memset(&loaded_vmcs->controls_shadow, 0, - sizeof(struct vmcs_controls_shadow)); - - return 0; - -out_vmcs: - free_loaded_vmcs(loaded_vmcs); - return -ENOMEM; -} - -static void free_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - free_vmcs(per_cpu(vmxarea, cpu)); - per_cpu(vmxarea, cpu) = NULL; - } -} - -static __init int alloc_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct vmcs *vmcs; - - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); - if (!vmcs) { - free_kvm_area(); - return -ENOMEM; - } - - /* - * When eVMCS is enabled, alloc_vmcs_cpu() sets - * vmcs->revision_id to KVM_EVMCS_VERSION instead of - * revision_id reported by MSR_IA32_VMX_BASIC. - * - * However, even though not explicitly documented by - * TLFS, VMXArea passed as VMXON argument should - * still be marked with revision_id reported by - * physical CPU. - */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs->hdr.revision_id = vmcs_config.revision_id; - - per_cpu(vmxarea, cpu) = vmcs; - } - return 0; -} - -static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, - struct kvm_segment *save) -{ - if (!emulate_invalid_guest_state) { - /* - * CS and SS RPL should be equal during guest entry according - * to VMX spec, but in reality it is not always so. Since vcpu - * is in the middle of the transition from real mode to - * protected mode it is safe to assume that RPL 0 is a good - * default value. - */ - if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) - save->selector &= ~SEGMENT_RPL_MASK; - save->dpl = save->selector & SEGMENT_RPL_MASK; - save->s = 1; - } - vmx_set_segment(vcpu, save, seg); -} - -static void enter_pmode(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* - * Update real mode segment cache. It may be not up-to-date if sement - * register was written while vcpu was in a guest mode. - */ - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - - vmx->rmode.vm86_active = 0; - - vmx_segment_cache_clear(vmx); - - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); - - flags = vmcs_readl(GUEST_RFLAGS); - flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; - vmcs_writel(GUEST_RFLAGS, flags); - - vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | - (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); - - update_exception_bitmap(vcpu); - - fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); -} - -static void fix_rmode_seg(int seg, struct kvm_segment *save) -{ - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - struct kvm_segment var = *save; - - var.dpl = 0x3; - if (seg == VCPU_SREG_CS) - var.type = 0x3; - - if (!emulate_invalid_guest_state) { - var.selector = var.base >> 4; - var.base = var.base & 0xffff0; - var.limit = 0xffff; - var.g = 0; - var.db = 0; - var.present = 1; - var.s = 1; - var.l = 0; - var.unusable = 0; - var.type = 0x3; - var.avl = 0; - if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not " - "paragraph aligned when entering " - "protected mode (seg=%d)", seg); - } - - vmcs_write16(sf->selector, var.selector); - vmcs_writel(sf->base, var.base); - vmcs_write32(sf->limit, var.limit); - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); -} - -static void enter_rmode(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); - - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - - vmx->rmode.vm86_active = 1; - - /* - * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Warn the user that an update is overdue. - */ - if (!kvm_vmx->tss_addr) - printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " - "called before entering vcpu\n"); - - vmx_segment_cache_clear(vmx); - - vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); - vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); - - flags = vmcs_readl(GUEST_RFLAGS); - vmx->rmode.save_rflags = flags; - - flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; - - vmcs_writel(GUEST_RFLAGS, flags); - vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); - update_exception_bitmap(vcpu); - - fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - - kvm_mmu_reset_context(vcpu); -} - -void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); - - if (!msr) - return; - - vcpu->arch.efer = efer; - if (efer & EFER_LMA) { - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - msr->data = efer; - } else { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - - msr->data = efer & ~EFER_LME; - } - setup_msrs(vmx); -} - -#ifdef CONFIG_X86_64 - -static void enter_lmode(struct kvm_vcpu *vcpu) -{ - u32 guest_tr_ar; - - vmx_segment_cache_clear(to_vmx(vcpu)); - - guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); - if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { - pr_debug_ratelimited("%s: tss fixup for long mode. \n", - __func__); - vmcs_write32(GUEST_TR_AR_BYTES, - (guest_tr_ar & ~VMX_AR_TYPE_MASK) - | VMX_AR_TYPE_BUSY_64_TSS); - } - vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); -} - -static void exit_lmode(struct kvm_vcpu *vcpu) -{ - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); -} - -#endif - -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) -{ - int vpid = to_vmx(vcpu)->vpid; - - if (!vpid_sync_vcpu_addr(vpid, addr)) - vpid_sync_context(vpid); - - /* - * If VPIDs are not supported or enabled, then the above is a no-op. - * But we don't really need a TLB flush in that case anyway, because - * each VM entry/exit includes an implicit flush when VPID is 0. - */ -} - -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) -{ - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; - - vcpu->arch.cr0 &= ~cr0_guest_owned_bits; - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; -} - -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -{ - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; - - vcpu->arch.cr4 &= ~cr4_guest_owned_bits; - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; -} - -static void ept_load_pdptrs(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) - return; - - if (is_pae_paging(vcpu)) { - vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); - } -} - -void ept_save_pdptrs(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - if (is_pae_paging(vcpu)) { - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); - } - - kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); -} - -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, - unsigned long cr0, - struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) - vmx_cache_reg(vcpu, VCPU_EXREG_CR3); - if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } - - if (!(cr0 & X86_CR0_WP)) - *hw_cr0 &= ~X86_CR0_WP; -} - -void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long hw_cr0; - - hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); - if (enable_unrestricted_guest) - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; - else { - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; - - if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) - enter_pmode(vcpu); - - if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) - enter_rmode(vcpu); - } - -#ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) - enter_lmode(vcpu); - if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) - exit_lmode(vcpu); - } -#endif - - if (enable_ept && !enable_unrestricted_guest) - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); - - vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, hw_cr0); - vcpu->arch.cr0 = cr0; - - /* depends on vcpu->arch.cr0 to be set to a new value */ - vmx->emulation_required = emulation_required(vcpu); -} - -static int get_ept_level(struct kvm_vcpu *vcpu) -{ - if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) - return 5; - return 4; -} - -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) -{ - u64 eptp = VMX_EPTP_MT_WB; - - eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; - - if (enable_ept_ad_bits && - (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) - eptp |= VMX_EPTP_AD_ENABLE_BIT; - eptp |= (root_hpa & PAGE_MASK); - - return eptp; -} - -void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) -{ - struct kvm *kvm = vcpu->kvm; - bool update_guest_cr3 = true; - unsigned long guest_cr3; - u64 eptp; - - guest_cr3 = cr3; - if (enable_ept) { - eptp = construct_eptp(vcpu, cr3); - vmcs_write64(EPT_POINTER, eptp); - - if (kvm_x86_ops->tlb_remote_flush) { - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); - to_vmx(vcpu)->ept_pointer = eptp; - to_kvm_vmx(kvm)->ept_pointers_match - = EPT_POINTERS_CHECK; - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); - } - - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (!enable_unrestricted_guest && !is_paging(vcpu)) - guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - guest_cr3 = vcpu->arch.cr3; - else /* vmcs01.GUEST_CR3 is already up-to-date. */ - update_guest_cr3 = false; - ept_load_pdptrs(vcpu); - } - - if (update_guest_cr3) - vmcs_writel(GUEST_CR3, guest_cr3); -} - -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * Pass through host's Machine Check Enable value to hw_cr4, which - * is in force while we are in guest mode. Do not let guests control - * this bit, even if host CR4.MCE == 0. - */ - unsigned long hw_cr4; - - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); - if (enable_unrestricted_guest) - hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; - else if (vmx->rmode.vm86_active) - hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; - else - hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; - - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { - if (cr4 & X86_CR4_UMIP) { - secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); - hw_cr4 &= ~X86_CR4_UMIP; - } else if (!is_guest_mode(vcpu) || - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { - secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); - } - } - - if (cr4 & X86_CR4_VMXE) { - /* - * To use VMXON (and later other VMX instructions), a guest - * must first be able to turn on cr4.VMXE (see handle_vmon()). - * So basically the check on whether to allow nested VMX - * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. - */ - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) - return 1; - } - - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) - return 1; - - vcpu->arch.cr4 = cr4; - - if (!enable_unrestricted_guest) { - if (enable_ept) { - if (!is_paging(vcpu)) { - hw_cr4 &= ~X86_CR4_PAE; - hw_cr4 |= X86_CR4_PSE; - } else if (!(cr4 & X86_CR4_PAE)) { - hw_cr4 &= ~X86_CR4_PAE; - } - } - - /* - * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in - * hardware. To emulate this behavior, SMEP/SMAP/PKU needs - * to be manually disabled when guest switches to non-paging - * mode. - * - * If !enable_unrestricted_guest, the CPU is always running - * with CR0.PG=1 and CR4 needs to be modified. - * If enable_unrestricted_guest, the CPU automatically - * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. - */ - if (!is_paging(vcpu)) - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); - } - - vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, hw_cr4); - return 0; -} - -void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 ar; - - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { - *var = vmx->rmode.segs[seg]; - if (seg == VCPU_SREG_TR - || var->selector == vmx_read_guest_seg_selector(vmx, seg)) - return; - var->base = vmx_read_guest_seg_base(vmx, seg); - var->selector = vmx_read_guest_seg_selector(vmx, seg); - return; - } - var->base = vmx_read_guest_seg_base(vmx, seg); - var->limit = vmx_read_guest_seg_limit(vmx, seg); - var->selector = vmx_read_guest_seg_selector(vmx, seg); - ar = vmx_read_guest_seg_ar(vmx, seg); - var->unusable = (ar >> 16) & 1; - var->type = ar & 15; - var->s = (ar >> 4) & 1; - var->dpl = (ar >> 5) & 3; - /* - * Some userspaces do not preserve unusable property. Since usable - * segment has to be present according to VMX spec we can use present - * property to amend userspace bug by making unusable segment always - * nonpresent. vmx_segment_access_rights() already marks nonpresent - * segment as unusable. - */ - var->present = !var->unusable; - var->avl = (ar >> 12) & 1; - var->l = (ar >> 13) & 1; - var->db = (ar >> 14) & 1; - var->g = (ar >> 15) & 1; -} - -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment s; - - if (to_vmx(vcpu)->rmode.vm86_active) { - vmx_get_segment(vcpu, &s, seg); - return s.base; - } - return vmx_read_guest_seg_base(to_vmx(vcpu), seg); -} - -int vmx_get_cpl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (unlikely(vmx->rmode.vm86_active)) - return 0; - else { - int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); - return VMX_AR_DPL(ar); - } -} - -static u32 vmx_segment_access_rights(struct kvm_segment *var) -{ - u32 ar; - - if (var->unusable || !var->present) - ar = 1 << 16; - else { - ar = var->type & 15; - ar |= (var->s & 1) << 4; - ar |= (var->dpl & 3) << 5; - ar |= (var->present & 1) << 7; - ar |= (var->avl & 1) << 12; - ar |= (var->l & 1) << 13; - ar |= (var->db & 1) << 14; - ar |= (var->g & 1) << 15; - } - - return ar; -} - -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - - vmx_segment_cache_clear(vmx); - - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { - vmx->rmode.segs[seg] = *var; - if (seg == VCPU_SREG_TR) - vmcs_write16(sf->selector, var->selector); - else if (var->s) - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - goto out; - } - - vmcs_writel(sf->base, var->base); - vmcs_write32(sf->limit, var->limit); - vmcs_write16(sf->selector, var->selector); - - /* - * Fix the "Accessed" bit in AR field of segment registers for older - * qemu binaries. - * IA32 arch specifies that at the time of processor reset the - * "Accessed" bit in the AR field of segment registers is 1. And qemu - * is setting it to 0 in the userland code. This causes invalid guest - * state vmexit when "unrestricted guest" mode is turned on. - * Fix for this setup issue in cpu_reset is being pushed in the qemu - * tree. Newer qemu binaries with that qemu fix would not need this - * kvm hack. - */ - if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) - var->type |= 0x1; /* Accessed */ - - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); - -out: - vmx->emulation_required = emulation_required(vcpu); -} - -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -{ - u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); - - *db = (ar >> 14) & 1; - *l = (ar >> 13) & 1; -} - -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - dt->size = vmcs_read32(GUEST_IDTR_LIMIT); - dt->address = vmcs_readl(GUEST_IDTR_BASE); -} - -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - vmcs_write32(GUEST_IDTR_LIMIT, dt->size); - vmcs_writel(GUEST_IDTR_BASE, dt->address); -} - -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - dt->size = vmcs_read32(GUEST_GDTR_LIMIT); - dt->address = vmcs_readl(GUEST_GDTR_BASE); -} - -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - vmcs_write32(GUEST_GDTR_LIMIT, dt->size); - vmcs_writel(GUEST_GDTR_BASE, dt->address); -} - -static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment var; - u32 ar; - - vmx_get_segment(vcpu, &var, seg); - var.dpl = 0x3; - if (seg == VCPU_SREG_CS) - var.type = 0x3; - ar = vmx_segment_access_rights(&var); - - if (var.base != (var.selector << 4)) - return false; - if (var.limit != 0xffff) - return false; - if (ar != 0xf3) - return false; - - return true; -} - -static bool code_segment_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs; - unsigned int cs_rpl; - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - cs_rpl = cs.selector & SEGMENT_RPL_MASK; - - if (cs.unusable) - return false; - if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) - return false; - if (!cs.s) - return false; - if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { - if (cs.dpl > cs_rpl) - return false; - } else { - if (cs.dpl != cs_rpl) - return false; - } - if (!cs.present) - return false; - - /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ - return true; -} - -static bool stack_segment_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment ss; - unsigned int ss_rpl; - - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - ss_rpl = ss.selector & SEGMENT_RPL_MASK; - - if (ss.unusable) - return true; - if (ss.type != 3 && ss.type != 7) - return false; - if (!ss.s) - return false; - if (ss.dpl != ss_rpl) /* DPL != RPL */ - return false; - if (!ss.present) - return false; - - return true; -} - -static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment var; - unsigned int rpl; - - vmx_get_segment(vcpu, &var, seg); - rpl = var.selector & SEGMENT_RPL_MASK; - - if (var.unusable) - return true; - if (!var.s) - return false; - if (!var.present) - return false; - if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { - if (var.dpl < rpl) /* DPL < RPL */ - return false; - } - - /* TODO: Add other members to kvm_segment_field to allow checking for other access - * rights flags - */ - return true; -} - -static bool tr_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment tr; - - vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); - - if (tr.unusable) - return false; - if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ - return false; - if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ - return false; - if (!tr.present) - return false; - - return true; -} - -static bool ldtr_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment ldtr; - - vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); - - if (ldtr.unusable) - return true; - if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ - return false; - if (ldtr.type != 2) - return false; - if (!ldtr.present) - return false; - - return true; -} - -static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs, ss; - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - - return ((cs.selector & SEGMENT_RPL_MASK) == - (ss.selector & SEGMENT_RPL_MASK)); -} - -/* - * Check if guest state is valid. Returns true if valid, false if - * not. - * We assume that registers are always usable - */ -static bool guest_state_valid(struct kvm_vcpu *vcpu) -{ - if (enable_unrestricted_guest) - return true; - - /* real mode guest state checks */ - if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { - if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) - return false; - } else { - /* protected mode guest state checks */ - if (!cs_ss_rpl_check(vcpu)) - return false; - if (!code_segment_valid(vcpu)) - return false; - if (!stack_segment_valid(vcpu)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_DS)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_ES)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_FS)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_GS)) - return false; - if (!tr_valid(vcpu)) - return false; - if (!ldtr_valid(vcpu)) - return false; - } - /* TODO: - * - Add checks on RIP - * - Add checks on RFLAGS - */ - - return true; -} - -static int init_rmode_tss(struct kvm *kvm) -{ - gfn_t fn; - u16 data = 0; - int idx, r; - - idx = srcu_read_lock(&kvm->srcu); - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; - data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; - r = kvm_write_guest_page(kvm, fn++, &data, - TSS_IOPB_BASE_OFFSET, sizeof(u16)); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; - data = ~0; - r = kvm_write_guest_page(kvm, fn, &data, - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, - sizeof(u8)); -out: - srcu_read_unlock(&kvm->srcu, idx); - return r; -} - -static int init_rmode_identity_map(struct kvm *kvm) -{ - struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); - int i, idx, r = 0; - kvm_pfn_t identity_map_pfn; - u32 tmp; - - /* Protect kvm_vmx->ept_identity_pagetable_done. */ - mutex_lock(&kvm->slots_lock); - - if (likely(kvm_vmx->ept_identity_pagetable_done)) - goto out2; - - if (!kvm_vmx->ept_identity_map_addr) - kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; - - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm_vmx->ept_identity_map_addr, PAGE_SIZE); - if (r < 0) - goto out2; - - idx = srcu_read_lock(&kvm->srcu); - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); - if (r < 0) - goto out; - /* Set up identity-mapping pagetable for EPT in real mode */ - for (i = 0; i < PT32_ENT_PER_PAGE; i++) { - tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); - r = kvm_write_guest_page(kvm, identity_map_pfn, - &tmp, i * sizeof(tmp), sizeof(tmp)); - if (r < 0) - goto out; - } - kvm_vmx->ept_identity_pagetable_done = true; - -out: - srcu_read_unlock(&kvm->srcu, idx); - -out2: - mutex_unlock(&kvm->slots_lock); - return r; -} - -static void seg_setup(int seg) -{ - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - unsigned int ar; - - vmcs_write16(sf->selector, 0); - vmcs_writel(sf->base, 0); - vmcs_write32(sf->limit, 0xffff); - ar = 0x93; - if (seg == VCPU_SREG_CS) - ar |= 0x08; /* code segment */ - - vmcs_write32(sf->ar_bytes, ar); -} - -static int alloc_apic_access_page(struct kvm *kvm) -{ - struct page *page; - int r = 0; - - mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page_done) - goto out; - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); - if (r) - goto out; - - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) { - r = -EFAULT; - goto out; - } - - /* - * Do not pin the page in memory, so that memory hot-unplug - * is able to migrate it. - */ - put_page(page); - kvm->arch.apic_access_page_done = true; -out: - mutex_unlock(&kvm->slots_lock); - return r; -} - -int allocate_vpid(void) -{ - int vpid; - - if (!enable_vpid) - return 0; - spin_lock(&vmx_vpid_lock); - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); - if (vpid < VMX_NR_VPIDS) - __set_bit(vpid, vmx_vpid_bitmap); - else - vpid = 0; - spin_unlock(&vmx_vpid_lock); - return vpid; -} - -void free_vpid(int vpid) -{ - if (!enable_vpid || vpid == 0) - return; - spin_lock(&vmx_vpid_lock); - __clear_bit(vpid, vmx_vpid_bitmap); - spin_unlock(&vmx_vpid_lock); -} - -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_touch_msr_bitmap(); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R) - /* read-low */ - __clear_bit(msr, msr_bitmap + 0x000 / f); - - if (type & MSR_TYPE_W) - /* write-low */ - __clear_bit(msr, msr_bitmap + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R) - /* read-high */ - __clear_bit(msr, msr_bitmap + 0x400 / f); - - if (type & MSR_TYPE_W) - /* write-high */ - __clear_bit(msr, msr_bitmap + 0xc00 / f); - - } -} - -static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_touch_msr_bitmap(); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R) - /* read-low */ - __set_bit(msr, msr_bitmap + 0x000 / f); - - if (type & MSR_TYPE_W) - /* write-low */ - __set_bit(msr, msr_bitmap + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R) - /* read-high */ - __set_bit(msr, msr_bitmap + 0x400 / f); - - if (type & MSR_TYPE_W) - /* write-high */ - __set_bit(msr, msr_bitmap + 0xc00 / f); - - } -} - -static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type, bool value) -{ - if (value) - vmx_enable_intercept_for_msr(msr_bitmap, msr, type); - else - vmx_disable_intercept_for_msr(msr_bitmap, msr, type); -} - -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) -{ - u8 mode = 0; - - if (cpu_has_secondary_exec_ctrls() && - (secondary_exec_controls_get(to_vmx(vcpu)) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - mode |= MSR_BITMAP_MODE_X2APIC; - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) - mode |= MSR_BITMAP_MODE_X2APIC_APICV; - } - - return mode; -} - -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, - u8 mode) -{ - int msr; - - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; - msr_bitmap[word + (0x800 / sizeof(long))] = ~0; - } - - if (mode & MSR_BITMAP_MODE_X2APIC) { - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); - if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { - vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); - } - } -} - -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - u8 mode = vmx_msr_bitmap_mode(vcpu); - u8 changed = mode ^ vmx->msr_bitmap_mode; - - if (!changed) - return; - - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); - - vmx->msr_bitmap_mode = mode; -} - -void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) -{ - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); - u32 i; - - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH, - MSR_TYPE_RW, flag); - for (i = 0; i < vmx->pt_desc.addr_range; i++) { - vmx_set_intercept_for_msr(msr_bitmap, - MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, - MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag); - } -} - -static bool vmx_get_enable_apicv(struct kvm *kvm) -{ - return enable_apicv; -} - -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - void *vapic_page; - u32 vppr; - int rvi; - - if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || - !nested_cpu_has_vid(get_vmcs12(vcpu)) || - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) - return false; - - rvi = vmx_get_rvi(); - - vapic_page = vmx->nested.virtual_apic_map.hva; - vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); - - return ((rvi & 0xf0) > (vppr & 0xf0)); -} - -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, - bool nested) -{ -#ifdef CONFIG_SMP - int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; - - if (vcpu->mode == IN_GUEST_MODE) { - /* - * The vector of interrupt to be delivered to vcpu had - * been set in PIR before this function. - * - * Following cases will be reached in this block, and - * we always send a notification event in all cases as - * explained below. - * - * Case 1: vcpu keeps in non-root mode. Sending a - * notification event posts the interrupt to vcpu. - * - * Case 2: vcpu exits to root mode and is still - * runnable. PIR will be synced to vIRR before the - * next vcpu entry. Sending a notification event in - * this case has no effect, as vcpu is not in root - * mode. - * - * Case 3: vcpu exits to root mode and is blocked. - * vcpu_block() has already synced PIR to vIRR and - * never blocks vcpu if vIRR is not cleared. Therefore, - * a blocked vcpu here does not wait for any requested - * interrupts in PIR, and sending a notification event - * which has no effect is safe here. - */ - - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); - return true; - } -#endif - return false; -} - -static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, - int vector) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (is_guest_mode(vcpu) && - vector == vmx->nested.posted_intr_nv) { - /* - * If a posted intr is not recognized by hardware, - * we will accomplish it in the next vmentry. - */ - vmx->nested.pi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); - /* the PIR and ON have been set by L1. */ - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) - kvm_vcpu_kick(vcpu); - return 0; - } - return -1; -} -/* - * Send interrupt to vcpu via posted interrupt way. - * 1. If target vcpu is running(non-root mode), send posted interrupt - * notification to vcpu and hardware will sync PIR to vIRR atomically. - * 2. If target vcpu isn't running(root mode), kick it to pick up the - * interrupt from PIR in next vmentry. - */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int r; - - r = vmx_deliver_nested_posted_interrupt(vcpu, vector); - if (!r) - return; - - if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; - - /* If a previous notification has sent the IPI, nothing to do. */ - if (pi_test_and_set_on(&vmx->pi_desc)) - return; - - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) - kvm_vcpu_kick(vcpu); -} - -/* - * Set up the vmcs's constant host-state fields, i.e., host-state fields that - * will not change in the lifetime of the guest. - * Note that host-state that does change is set elsewhere. E.g., host-state - * that is set differently for each CPU is set in vmx_vcpu_load(), not here. - */ -void vmx_set_constant_host_state(struct vcpu_vmx *vmx) -{ - u32 low32, high32; - unsigned long tmpl; - unsigned long cr0, cr3, cr4; - - cr0 = read_cr0(); - WARN_ON(cr0 & X86_CR0_TS); - vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ - - /* - * Save the most likely value for this task's CR3 in the VMCS. - * We can't use __get_current_cr3_fast() because we're not atomic. - */ - cr3 = __read_cr3(); - vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->loaded_vmcs->host_state.cr3 = cr3; - - /* Save the most likely value for this task's CR4 in the VMCS. */ - cr4 = cr4_read_shadow(); - vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->loaded_vmcs->host_state.cr4 = cr4; - - vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ -#ifdef CONFIG_X86_64 - /* - * Load null selectors, so we can avoid reloading them in - * vmx_prepare_switch_to_host(), in case userspace uses - * the null selectors too (the expected case). - */ - vmcs_write16(HOST_DS_SELECTOR, 0); - vmcs_write16(HOST_ES_SELECTOR, 0); -#else - vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ -#endif - vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - - vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ - - vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ - - rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); - vmcs_write32(HOST_IA32_SYSENTER_CS, low32); - rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); - vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ - - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, low32, high32); - vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); - } - - if (cpu_has_load_ia32_efer()) - vmcs_write64(HOST_IA32_EFER, host_efer); -} - -void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) -{ - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; - if (enable_ept) - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; - if (is_guest_mode(&vmx->vcpu)) - vmx->vcpu.arch.cr4_guest_owned_bits &= - ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; - vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); -} - -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) -{ - u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - - if (!kvm_vcpu_apicv_active(&vmx->vcpu)) - pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; - - if (!enable_vnmi) - pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; - - if (!enable_preemption_timer) - pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - - return pin_based_exec_ctrl; -} - -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - if (cpu_has_secondary_exec_ctrls()) { - if (kvm_vcpu_apicv_active(vcpu)) - secondary_exec_controls_setbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - else - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - } - - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); -} - -u32 vmx_exec_control(struct vcpu_vmx *vmx) -{ - u32 exec_control = vmcs_config.cpu_based_exec_ctrl; - - if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) - exec_control &= ~CPU_BASED_MOV_DR_EXITING; - - if (!cpu_need_tpr_shadow(&vmx->vcpu)) { - exec_control &= ~CPU_BASED_TPR_SHADOW; -#ifdef CONFIG_X86_64 - exec_control |= CPU_BASED_CR8_STORE_EXITING | - CPU_BASED_CR8_LOAD_EXITING; -#endif - } - if (!enable_ept) - exec_control |= CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_INVLPG_EXITING; - if (kvm_mwait_in_guest(vmx->vcpu.kvm)) - exec_control &= ~(CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING); - if (kvm_hlt_in_guest(vmx->vcpu.kvm)) - exec_control &= ~CPU_BASED_HLT_EXITING; - return exec_control; -} - - -static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) -{ - struct kvm_vcpu *vcpu = &vmx->vcpu; - - u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; - - if (pt_mode == PT_MODE_SYSTEM) - exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); - if (!cpu_need_virtualize_apic_accesses(vcpu)) - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - if (vmx->vpid == 0) - exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; - if (!enable_ept) { - exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; - enable_unrestricted_guest = 0; - } - if (!enable_unrestricted_guest) - exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; - if (kvm_pause_in_guest(vmx->vcpu.kvm)) - exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!kvm_vcpu_apicv_active(vcpu)) - exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - - /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, - * in vmx_set_cr4. */ - exec_control &= ~SECONDARY_EXEC_DESC; - - /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD - (handle_vmptrld). - We can NOT enable shadow_vmcs here because we don't have yet - a current VMCS12 - */ - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - - if (!enable_pml) - exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - - if (vmx_xsaves_supported()) { - /* Exposing XSAVES only when XSAVE is exposed */ - bool xsaves_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); - - vcpu->arch.xsaves_enabled = xsaves_enabled; - - if (!xsaves_enabled) - exec_control &= ~SECONDARY_EXEC_XSAVES; - - if (nested) { - if (xsaves_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_XSAVES; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_XSAVES; - } - } - - if (vmx_rdtscp_supported()) { - bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); - if (!rdtscp_enabled) - exec_control &= ~SECONDARY_EXEC_RDTSCP; - - if (nested) { - if (rdtscp_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDTSCP; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; - } - } - - if (vmx_invpcid_supported()) { - /* Exposing INVPCID only when PCID is exposed */ - bool invpcid_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && - guest_cpuid_has(vcpu, X86_FEATURE_PCID); - - if (!invpcid_enabled) { - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; - guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); - } - - if (nested) { - if (invpcid_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_INVPCID; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_ENABLE_INVPCID; - } - } - - if (vmx_rdrand_supported()) { - bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); - if (rdrand_enabled) - exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING; - - if (nested) { - if (rdrand_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDRAND_EXITING; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDRAND_EXITING; - } - } - - if (vmx_rdseed_supported()) { - bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); - if (rdseed_enabled) - exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING; - - if (nested) { - if (rdseed_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDSEED_EXITING; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDSEED_EXITING; - } - } - - if (vmx_waitpkg_supported()) { - bool waitpkg_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); - - if (!waitpkg_enabled) - exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - - if (nested) { - if (waitpkg_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - } - } - - vmx->secondary_exec_control = exec_control; -} - -static void ept_set_mmio_spte_mask(void) -{ - /* - * EPT Misconfigurations can be generated if the value of bits 2:0 - * of an EPT paging-structure entry is 110b (write/execute). - */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE, 0); -} - -#define VMX_XSS_EXIT_BITMAP 0 - -/* - * Noting that the initialization of Guest-state Area of VMCS is in - * vmx_vcpu_reset(). - */ -static void init_vmcs(struct vcpu_vmx *vmx) -{ - if (nested) - nested_vmx_set_vmcs_shadowing_bitmap(); - - if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); - - vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ - - /* Control */ - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - - exec_controls_set(vmx, vmx_exec_control(vmx)); - - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - secondary_exec_controls_set(vmx, vmx->secondary_exec_control); - } - - if (kvm_vcpu_apicv_active(&vmx->vcpu)) { - vmcs_write64(EOI_EXIT_BITMAP0, 0); - vmcs_write64(EOI_EXIT_BITMAP1, 0); - vmcs_write64(EOI_EXIT_BITMAP2, 0); - vmcs_write64(EOI_EXIT_BITMAP3, 0); - - vmcs_write16(GUEST_INTR_STATUS, 0); - - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); - vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); - } - - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { - vmcs_write32(PLE_GAP, ple_gap); - vmx->ple_window = ple_window; - vmx->ple_window_dirty = true; - } - - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); - vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ - - vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(vmx); - vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ - vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ - - if (cpu_has_vmx_vmfunc()) - vmcs_write64(VM_FUNCTION_CONTROL, 0); - - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); - - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) - vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); - - vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); - - /* 22.2.1, 20.8.1 */ - vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); - - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); - - set_cr4_guest_host_mask(vmx); - - if (vmx->vpid != 0) - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - - if (vmx_xsaves_supported()) - vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); - - if (enable_pml) { - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); - } - - if (cpu_has_vmx_encls_vmexit()) - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); - - if (pt_mode == PT_MODE_HOST_GUEST) { - memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); - /* Bit[6~0] are forced to 1, writes are ignored. */ - vmx->pt_desc.guest.output_mask = 0x7F; - vmcs_write64(GUEST_IA32_RTIT_CTL, 0); - } -} - -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct msr_data apic_base_msr; - u64 cr0; - - vmx->rmode.vm86_active = 0; - vmx->spec_ctrl = 0; - - vmx->msr_ia32_umwait_control = 0; - - vcpu->arch.microcode_version = 0x100000000ULL; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - vmx->hv_deadline_tsc = -1; - kvm_set_cr8(vcpu, 0); - - if (!init_event) { - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(vcpu)) - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); - } - - vmx_segment_cache_clear(vmx); - - seg_setup(VCPU_SREG_CS); - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); - - seg_setup(VCPU_SREG_DS); - seg_setup(VCPU_SREG_ES); - seg_setup(VCPU_SREG_FS); - seg_setup(VCPU_SREG_GS); - seg_setup(VCPU_SREG_SS); - - vmcs_write16(GUEST_TR_SELECTOR, 0); - vmcs_writel(GUEST_TR_BASE, 0); - vmcs_write32(GUEST_TR_LIMIT, 0xffff); - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); - - vmcs_write16(GUEST_LDTR_SELECTOR, 0); - vmcs_writel(GUEST_LDTR_BASE, 0); - vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); - vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); - - if (!init_event) { - vmcs_write32(GUEST_SYSENTER_CS, 0); - vmcs_writel(GUEST_SYSENTER_ESP, 0); - vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - } - - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0xfff0); - - vmcs_writel(GUEST_GDTR_BASE, 0); - vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); - - vmcs_writel(GUEST_IDTR_BASE, 0); - vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); - - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); - if (kvm_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, 0); - - setup_msrs(vmx); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ - - if (cpu_has_vmx_tpr_shadow() && !init_event) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (cpu_need_tpr_shadow(vcpu)) - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - __pa(vcpu->arch.apic->regs)); - vmcs_write32(TPR_THRESHOLD, 0); - } - - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vmx->vcpu.arch.cr0 = cr0; - vmx_set_cr0(vcpu, cr0); /* enter rmode */ - vmx_set_cr4(vcpu, 0); - vmx_set_efer(vcpu, 0); - - update_exception_bitmap(vcpu); - - vpid_sync_context(vmx->vpid); - if (init_event) - vmx_clear_hlt(vcpu); -} - -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - if (!enable_vnmi || - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } - - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); -} - -static void vmx_inject_irq(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - uint32_t intr; - int irq = vcpu->arch.interrupt.nr; - - trace_kvm_inj_virq(irq); - - ++vcpu->stat.irq_injections; - if (vmx->rmode.vm86_active) { - int inc_eip = 0; - if (vcpu->arch.interrupt.soft) - inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); - return; - } - intr = irq | INTR_INFO_VALID_MASK; - if (vcpu->arch.interrupt.soft) { - intr |= INTR_TYPE_SOFT_INTR; - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - } else - intr |= INTR_TYPE_EXT_INTR; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); - - vmx_clear_hlt(vcpu); -} - -static void vmx_inject_nmi(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!enable_vnmi) { - /* - * Tracking the NMI-blocked state in software is built upon - * finding the next open IRQ window. This, in turn, depends on - * well-behaving guests: They have to keep IRQs disabled at - * least as long as the NMI handler runs. Otherwise we may - * cause NMI nesting, maybe breaking the guest. But as this is - * highly unlikely, we can live with the residual risk. - */ - vmx->loaded_vmcs->soft_vnmi_blocked = 1; - vmx->loaded_vmcs->vnmi_blocked_time = 0; - } - - ++vcpu->stat.nmi_injections; - vmx->loaded_vmcs->nmi_known_unmasked = false; - - if (vmx->rmode.vm86_active) { - kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); - return; - } - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); - - vmx_clear_hlt(vcpu); -} - -bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool masked; - - if (!enable_vnmi) - return vmx->loaded_vmcs->soft_vnmi_blocked; - if (vmx->loaded_vmcs->nmi_known_unmasked) - return false; - masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - return masked; -} - -void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!enable_vnmi) { - if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { - vmx->loaded_vmcs->soft_vnmi_blocked = masked; - vmx->loaded_vmcs->vnmi_blocked_time = 0; - } - } else { - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - if (masked) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - } -} - -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -{ - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - - if (!enable_vnmi && - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) - return 0; - - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); -} - -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) -{ - return (!to_vmx(vcpu)->nested.nested_run_pending && - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); -} - -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) -{ - int ret; - - if (enable_unrestricted_guest) - return 0; - - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, - PAGE_SIZE * 3); - if (ret) - return ret; - to_kvm_vmx(kvm)->tss_addr = addr; - return init_rmode_tss(kvm); -} - -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) -{ - to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; - return 0; -} - -static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) -{ - switch (vec) { - case BP_VECTOR: - /* - * Update instruction length as we may reinject the exception - * from user space while in guest debugging mode. - */ - to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return false; - /* fall through */ - case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; - /* fall through */ - case DE_VECTOR: - case OF_VECTOR: - case BR_VECTOR: - case UD_VECTOR: - case DF_VECTOR: - case SS_VECTOR: - case GP_VECTOR: - case MF_VECTOR: - return true; - break; - } - return false; -} - -static int handle_rmode_exception(struct kvm_vcpu *vcpu, - int vec, u32 err_code) -{ - /* - * Instruction with address size override prefix opcode 0x67 - * Cause the #SS fault with 0 error code in VM86 mode. - */ - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0)) { - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_vcpu_halt(vcpu); - } - return 1; - } - return 0; - } - - /* - * Forward all other exceptions that are valid in real mode. - * FIXME: Breaks guest debugging in real mode, needs to be fixed with - * the required debugging infrastructure rework. - */ - kvm_queue_exception(vcpu, vec); - return 1; -} - -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s, 0); -#endif -} - -static int handle_machine_check(struct kvm_vcpu *vcpu) -{ - /* handled by vmx_vcpu_run() */ - return 1; -} - -static int handle_exception_nmi(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_run *kvm_run = vcpu->run; - u32 intr_info, ex_no, error_code; - unsigned long cr2, rip, dr6; - u32 vect_info; - - vect_info = vmx->idt_vectoring_info; - intr_info = vmx->exit_intr_info; - - if (is_machine_check(intr_info) || is_nmi(intr_info)) - return 1; /* handled by handle_exception_nmi_irqoff() */ - - if (is_invalid_opcode(intr_info)) - return handle_ud(vcpu); - - error_code = 0; - if (intr_info & INTR_INFO_DELIVER_CODE_MASK) - error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - - if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { - WARN_ON_ONCE(!enable_vmware_backdoor); - - /* - * VMware backdoor emulation on #GP interception only handles - * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero - * error code on #GP. - */ - if (error_code) { - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; - } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); - } - - /* - * The #PF with PFEC.RSVD = 1 indicates the guest is accessing - * MMIO, it is better to report an internal error. - * See the comments in vmx_handle_exit. - */ - if ((vect_info & VECTORING_INFO_VALID_MASK) && - !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; - vcpu->run->internal.ndata = 3; - vcpu->run->internal.data[0] = vect_info; - vcpu->run->internal.data[1] = intr_info; - vcpu->run->internal.data[2] = error_code; - return 0; - } - - if (is_page_fault(intr_info)) { - cr2 = vmcs_readl(EXIT_QUALIFICATION); - /* EPT won't cause page fault directly */ - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); - return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); - } - - ex_no = intr_info & INTR_INFO_VECTOR_MASK; - - if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) - return handle_rmode_exception(vcpu, ex_no, error_code); - - switch (ex_no) { - case AC_VECTOR: - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); - return 1; - case DB_VECTOR: - dr6 = vmcs_readl(EXIT_QUALIFICATION); - if (!(vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (is_icebp(intr_info)) - WARN_ON(!skip_emulated_instruction(vcpu)); - - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; - kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); - /* fall through */ - case BP_VECTOR: - /* - * Update instruction length as we may reinject #BP from - * user space while in guest debugging mode. Reading it for - * #DB as well causes no harm, it is not used in that case. - */ - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_run->exit_reason = KVM_EXIT_DEBUG; - rip = kvm_rip_read(vcpu); - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; - kvm_run->debug.arch.exception = ex_no; - break; - default: - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = ex_no; - kvm_run->ex.error_code = error_code; - break; - } - return 0; -} - -static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.irq_exits; - return 1; -} - -static int handle_triple_fault(struct kvm_vcpu *vcpu) -{ - vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; - vcpu->mmio_needed = 0; - return 0; -} - -static int handle_io(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - int size, in, string; - unsigned port; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - string = (exit_qualification & 16) != 0; - - ++vcpu->stat.io_exits; - - if (string) - return kvm_emulate_instruction(vcpu, 0); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - in = (exit_qualification & 8) != 0; - - return kvm_fast_pio(vcpu, size, port, in); -} - -static void -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) -{ - /* - * Patch in the VMCALL instruction: - */ - hypercall[0] = 0x0f; - hypercall[1] = 0x01; - hypercall[2] = 0xc1; -} - -/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ -static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) -{ - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned long orig_val = val; - - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. So we first calculate the - * effective cr0 value that L1 would like to write into the - * hardware. It consists of the L2-owned bits from the new - * value combined with the L1-owned bits from L1's guest_cr0. - */ - val = (val & ~vmcs12->cr0_guest_host_mask) | - (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); - - if (!nested_guest_cr0_valid(vcpu, val)) - return 1; - - if (kvm_set_cr0(vcpu, val)) - return 1; - vmcs_writel(CR0_READ_SHADOW, orig_val); - return 0; - } else { - if (to_vmx(vcpu)->nested.vmxon && - !nested_host_cr0_valid(vcpu, val)) - return 1; - - return kvm_set_cr0(vcpu, val); - } -} - -static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) -{ - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned long orig_val = val; - - /* analogously to handle_set_cr0 */ - val = (val & ~vmcs12->cr4_guest_host_mask) | - (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); - if (kvm_set_cr4(vcpu, val)) - return 1; - vmcs_writel(CR4_READ_SHADOW, orig_val); - return 0; - } else - return kvm_set_cr4(vcpu, val); -} - -static int handle_desc(struct kvm_vcpu *vcpu) -{ - WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_cr(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification, val; - int cr; - int reg; - int err; - int ret; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - cr = exit_qualification & 15; - reg = (exit_qualification >> 8) & 15; - switch ((exit_qualification >> 4) & 3) { - case 0: /* mov to cr */ - val = kvm_register_readl(vcpu, reg); - trace_kvm_cr_write(cr, val); - switch (cr) { - case 0: - err = handle_set_cr0(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 3: - WARN_ON_ONCE(enable_unrestricted_guest); - err = kvm_set_cr3(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 4: - err = handle_set_cr4(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 8: { - u8 cr8_prev = kvm_get_cr8(vcpu); - u8 cr8 = (u8)val; - err = kvm_set_cr8(vcpu, cr8); - ret = kvm_complete_insn_gp(vcpu, err); - if (lapic_in_kernel(vcpu)) - return ret; - if (cr8_prev <= cr8) - return ret; - /* - * TODO: we might be squashing a - * KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - vcpu->run->exit_reason = KVM_EXIT_SET_TPR; - return 0; - } - } - break; - case 2: /* clts */ - WARN_ONCE(1, "Guest should always own CR0.TS"); - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); - return kvm_skip_emulated_instruction(vcpu); - case 1: /*mov from cr*/ - switch (cr) { - case 3: - WARN_ON_ONCE(enable_unrestricted_guest); - val = kvm_read_cr3(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); - return kvm_skip_emulated_instruction(vcpu); - case 8: - val = kvm_get_cr8(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case 3: /* lmsw */ - val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; - trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); - kvm_lmsw(vcpu, val); - - return kvm_skip_emulated_instruction(vcpu); - default: - break; - } - vcpu->run->exit_reason = 0; - vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", - (int)(exit_qualification >> 4) & 3, cr); - return 0; -} - -static int handle_dr(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - int dr, dr7, reg; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & DEBUG_REG_ACCESS_NUM; - - /* First, if DR does not exist, trigger UD */ - if (!kvm_require_dr(vcpu, dr)) - return 1; - - /* Do not handle if the CPL > 0, will trigger GP on re-entry */ - if (!kvm_require_cpl(vcpu, 0)) - return 1; - dr7 = vmcs_readl(GUEST_DR7); - if (dr7 & DR7_GD) { - /* - * As the vm-exit takes precedence over the debug trap, we - * need to emulate the latter, either for the host or the - * guest debugging itself. - */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; - vcpu->run->debug.arch.dr7 = dr7; - vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); - vcpu->run->debug.arch.exception = DB_VECTOR; - vcpu->run->exit_reason = KVM_EXIT_DEBUG; - return 0; - } else { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - } - - if (vcpu->guest_debug == 0) { - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); - - /* - * No more DR vmexits; force a reload of the debug registers - * and reenter on this instruction. The next vmexit will - * retrieve the full state of the debug registers. - */ - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; - return 1; - } - - reg = DEBUG_REG_ACCESS_REG(exit_qualification); - if (exit_qualification & TYPE_MOV_FROM_DR) { - unsigned long val; - - if (kvm_get_dr(vcpu, dr, &val)) - return 1; - kvm_register_write(vcpu, reg, val); - } else - if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) - return 1; - - return kvm_skip_emulated_instruction(vcpu); -} - -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.dr6; -} - -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) -{ -} - -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) -{ - get_debugreg(vcpu->arch.db[0], 0); - get_debugreg(vcpu->arch.db[1], 1); - get_debugreg(vcpu->arch.db[2], 2); - get_debugreg(vcpu->arch.db[3], 3); - get_debugreg(vcpu->arch.dr6, 6); - vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); - - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); -} - -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) -{ - vmcs_writel(GUEST_DR7, val); -} - -static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) -{ - kvm_apic_update_ppr(vcpu); - return 1; -} - -static int handle_interrupt_window(struct kvm_vcpu *vcpu) -{ - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); - - kvm_make_request(KVM_REQ_EVENT, vcpu); - - ++vcpu->stat.irq_window_exits; - return 1; -} - -static int handle_vmcall(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_hypercall(vcpu); -} - -static int handle_invd(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_invlpg(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - kvm_mmu_invlpg(vcpu, exit_qualification); - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_rdpmc(struct kvm_vcpu *vcpu) -{ - int err; - - err = kvm_rdpmc(vcpu); - return kvm_complete_insn_gp(vcpu, err); -} - -static int handle_wbinvd(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_wbinvd(vcpu); -} - -static int handle_xsetbv(struct kvm_vcpu *vcpu) -{ - u64 new_bv = kvm_read_edx_eax(vcpu); - u32 index = kvm_rcx_read(vcpu); - - if (kvm_set_xcr(vcpu, index, new_bv) == 0) - return kvm_skip_emulated_instruction(vcpu); - return 1; -} - -static int handle_apic_access(struct kvm_vcpu *vcpu) -{ - if (likely(fasteoi)) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int access_type, offset; - - access_type = exit_qualification & APIC_ACCESS_TYPE; - offset = exit_qualification & APIC_ACCESS_OFFSET; - /* - * Sane guest uses MOV to write EOI, with written value - * not cared. So make a short-circuit here by avoiding - * heavy instruction emulation. - */ - if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && - (offset == APIC_EOI)) { - kvm_lapic_set_eoi(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - } - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int vector = exit_qualification & 0xff; - - /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ - kvm_apic_set_eoi_accelerated(vcpu, vector); - return 1; -} - -static int handle_apic_write(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 offset = exit_qualification & 0xfff; - - /* APIC-write VM exit is trap-like and thus no need to adjust IP */ - kvm_apic_write_nodecode(vcpu, offset); - return 1; -} - -static int handle_task_switch(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qualification; - bool has_error_code = false; - u32 error_code = 0; - u16 tss_selector; - int reason, type, idt_v, idt_index; - - idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); - idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); - type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && idt_v) { - switch (type) { - case INTR_TYPE_NMI_INTR: - vcpu->arch.nmi_injected = false; - vmx_set_nmi_mask(vcpu, true); - break; - case INTR_TYPE_EXT_INTR: - case INTR_TYPE_SOFT_INTR: - kvm_clear_interrupt_queue(vcpu); - break; - case INTR_TYPE_HARD_EXCEPTION: - if (vmx->idt_vectoring_info & - VECTORING_INFO_DELIVER_CODE_MASK) { - has_error_code = true; - error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - } - /* fall through */ - case INTR_TYPE_SOFT_EXCEPTION: - kvm_clear_exception_queue(vcpu); - break; - default: - break; - } - } - tss_selector = exit_qualification; - - if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && - type != INTR_TYPE_EXT_INTR && - type != INTR_TYPE_NMI_INTR)) - WARN_ON(!skip_emulated_instruction(vcpu)); - - /* - * TODO: What about debug traps on tss switch? - * Are we supposed to inject them and update dr6? - */ - return kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, - reason, has_error_code, error_code); -} - -static int handle_ept_violation(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - gpa_t gpa; - u64 error_code; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - /* - * EPT violation happened while executing iret from NMI, - * "blocked by NMI" bit has to be set before next VM entry. - * There are errata that may cause this bit to not be set: - * AAK134, BY25. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - enable_vnmi && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - trace_kvm_page_fault(gpa, exit_qualification); - - /* Is it a read fault? */ - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) - ? PFERR_USER_MASK : 0; - /* Is it a write fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) - ? PFERR_WRITE_MASK : 0; - /* Is it a fetch fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) - ? PFERR_FETCH_MASK : 0; - /* ept page table entry is present? */ - error_code |= (exit_qualification & - (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | - EPT_VIOLATION_EXECUTABLE)) - ? PFERR_PRESENT_MASK : 0; - - error_code |= (exit_qualification & 0x100) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; - - vcpu->arch.exit_qualification = exit_qualification; - return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); -} - -static int handle_ept_misconfig(struct kvm_vcpu *vcpu) -{ - gpa_t gpa; - - /* - * A nested guest cannot optimize MMIO vmexits, because we have an - * nGPA here instead of the required GPA. - */ - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!is_guest_mode(vcpu) && - !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { - trace_kvm_fast_mmio(gpa); - return kvm_skip_emulated_instruction(vcpu); - } - - return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); -} - -static int handle_nmi_window(struct kvm_vcpu *vcpu) -{ - WARN_ON_ONCE(!enable_vnmi); - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); - ++vcpu->stat.nmi_window_exits; - kvm_make_request(KVM_REQ_EVENT, vcpu); - - return 1; -} - -static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool intr_window_requested; - unsigned count = 130; - - /* - * We should never reach the point where we are emulating L2 - * due to invalid guest state as that means we incorrectly - * allowed a nested VMEntry with an invalid vmcs12. - */ - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); - - intr_window_requested = exec_controls_get(vmx) & - CPU_BASED_INTR_WINDOW_EXITING; - - while (vmx->emulation_required && count-- != 0) { - if (intr_window_requested && vmx_interrupt_allowed(vcpu)) - return handle_interrupt_window(&vmx->vcpu); - - if (kvm_test_request(KVM_REQ_EVENT, vcpu)) - return 1; - - if (!kvm_emulate_instruction(vcpu, 0)) - return 0; - - if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } - - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_vcpu_halt(vcpu); - } - - /* - * Note, return 1 and not 0, vcpu_run() is responsible for - * morphing the pending signal into the proper return code. - */ - if (signal_pending(current)) - return 1; - - if (need_resched()) - schedule(); - } - - return 1; -} - -static void grow_ple_window(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned int old = vmx->ple_window; - - vmx->ple_window = __grow_ple_window(old, ple_window, - ple_window_grow, - ple_window_max); - - if (vmx->ple_window != old) { - vmx->ple_window_dirty = true; - trace_kvm_ple_window_update(vcpu->vcpu_id, - vmx->ple_window, old); - } -} - -static void shrink_ple_window(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned int old = vmx->ple_window; - - vmx->ple_window = __shrink_ple_window(old, ple_window, - ple_window_shrink, - ple_window); - - if (vmx->ple_window != old) { - vmx->ple_window_dirty = true; - trace_kvm_ple_window_update(vcpu->vcpu_id, - vmx->ple_window, old); - } -} - -/* - * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. - */ -static void wakeup_handler(void) -{ - struct kvm_vcpu *vcpu; - int cpu = smp_processor_id(); - - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), - blocked_vcpu_list) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (pi_test_on(pi_desc) == 1) - kvm_vcpu_kick(vcpu); - } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); -} - -static void vmx_enable_tdp(void) -{ - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, - enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, - enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK, - cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, - VMX_EPT_RWX_MASK, 0ull); - - ept_set_mmio_spte_mask(); - kvm_enable_tdp(); -} - -/* - * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE - * exiting, so only get here on cpu with PAUSE-Loop-Exiting. - */ -static int handle_pause(struct kvm_vcpu *vcpu) -{ - if (!kvm_pause_in_guest(vcpu->kvm)) - grow_ple_window(vcpu); - - /* - * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" - * VM-execution control is ignored if CPL > 0. OTOH, KVM - * never set PAUSE_EXITING and just set PLE if supported, - * so the vcpu must be CPL=0 if it gets a PAUSE exit. - */ - kvm_vcpu_on_spin(vcpu, true); - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_nop(struct kvm_vcpu *vcpu) -{ - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_mwait(struct kvm_vcpu *vcpu) -{ - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); - return handle_nop(vcpu); -} - -static int handle_invalid_op(struct kvm_vcpu *vcpu) -{ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -static int handle_monitor_trap(struct kvm_vcpu *vcpu) -{ - return 1; -} - -static int handle_monitor(struct kvm_vcpu *vcpu) -{ - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); - return handle_nop(vcpu); -} - -static int handle_invpcid(struct kvm_vcpu *vcpu) -{ - u32 vmx_instruction_info; - unsigned long type; - bool pcid_enabled; - gva_t gva; - struct x86_exception e; - unsigned i; - unsigned long roots_to_free = 0; - struct { - u64 pcid; - u64 gla; - } operand; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } - - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - - if (type > 3) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - /* According to the Intel instruction reference, the memory operand - * is read even if it isn't needed (e.g., for type==all) - */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, - sizeof(operand), &gva)) - return 1; - - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } - - if (operand.pcid >> 12 != 0) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); - - switch (type) { - case INVPCID_TYPE_INDIV_ADDR: - if ((!pcid_enabled && (operand.pcid != 0)) || - is_noncanonical_address(operand.gla, vcpu)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); - return kvm_skip_emulated_instruction(vcpu); - - case INVPCID_TYPE_SINGLE_CTXT: - if (!pcid_enabled && (operand.pcid != 0)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - if (kvm_get_active_pcid(vcpu) == operand.pcid) { - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) - == operand.pcid) - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); - - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); - /* - * If neither the current cr3 nor any of the prev_roots use the - * given PCID, then nothing needs to be done here because a - * resync will happen anyway before switching to any other CR3. - */ - - return kvm_skip_emulated_instruction(vcpu); - - case INVPCID_TYPE_ALL_NON_GLOBAL: - /* - * Currently, KVM doesn't mark global entries in the shadow - * page tables, so a non-global flush just degenerates to a - * global flush. If needed, we could optimize this later by - * keeping track of global entries in shadow page tables. - */ - - /* fall-through */ - case INVPCID_TYPE_ALL_INCL_GLOBAL: - kvm_mmu_unload(vcpu); - return kvm_skip_emulated_instruction(vcpu); - - default: - BUG(); /* We have already checked above that type <= 3 */ - } -} - -static int handle_pml_full(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - - trace_kvm_pml_full(vcpu->vcpu_id); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - /* - * PML buffer FULL happened while executing iret from NMI, - * "blocked by NMI" bit has to be set before next VM entry. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - enable_vnmi && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - - /* - * PML buffer already flushed at beginning of VMEXIT. Nothing to do - * here.., and there's no userspace involvement needed for PML. - */ - return 1; -} - -static int handle_preemption_timer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!vmx->req_immediate_exit && - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) - kvm_lapic_expired_hv_timer(vcpu); - - return 1; -} - -/* - * When nested=0, all VMX instruction VM Exits filter here. The handlers - * are overwritten by nested_vmx_setup() when nested=1. - */ -static int handle_vmx_instruction(struct kvm_vcpu *vcpu) -{ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -static int handle_encls(struct kvm_vcpu *vcpu) -{ - /* - * SGX virtualization is not yet supported. There is no software - * enable bit for SGX, so we have to trap ENCLS and inject a #UD - * to prevent the guest from executing ENCLS. - */ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -/* - * The exit handlers return 1 if the exit was handled fully and guest execution - * may resume. Otherwise they set the kvm_run parameter to indicate what needs - * to be done to userspace and return 0. - */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { - [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, - [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, - [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, - [EXIT_REASON_IO_INSTRUCTION] = handle_io, - [EXIT_REASON_CR_ACCESS] = handle_cr, - [EXIT_REASON_DR_ACCESS] = handle_dr, - [EXIT_REASON_CPUID] = kvm_emulate_cpuid, - [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, - [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, - [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, - [EXIT_REASON_HLT] = kvm_emulate_halt, - [EXIT_REASON_INVD] = handle_invd, - [EXIT_REASON_INVLPG] = handle_invlpg, - [EXIT_REASON_RDPMC] = handle_rdpmc, - [EXIT_REASON_VMCALL] = handle_vmcall, - [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, - [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, - [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, - [EXIT_REASON_VMPTRST] = handle_vmx_instruction, - [EXIT_REASON_VMREAD] = handle_vmx_instruction, - [EXIT_REASON_VMRESUME] = handle_vmx_instruction, - [EXIT_REASON_VMWRITE] = handle_vmx_instruction, - [EXIT_REASON_VMOFF] = handle_vmx_instruction, - [EXIT_REASON_VMON] = handle_vmx_instruction, - [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, - [EXIT_REASON_APIC_ACCESS] = handle_apic_access, - [EXIT_REASON_APIC_WRITE] = handle_apic_write, - [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, - [EXIT_REASON_WBINVD] = handle_wbinvd, - [EXIT_REASON_XSETBV] = handle_xsetbv, - [EXIT_REASON_TASK_SWITCH] = handle_task_switch, - [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, - [EXIT_REASON_GDTR_IDTR] = handle_desc, - [EXIT_REASON_LDTR_TR] = handle_desc, - [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, - [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, - [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, - [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, - [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, - [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, - [EXIT_REASON_INVEPT] = handle_vmx_instruction, - [EXIT_REASON_INVVPID] = handle_vmx_instruction, - [EXIT_REASON_RDRAND] = handle_invalid_op, - [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_PML_FULL] = handle_pml_full, - [EXIT_REASON_INVPCID] = handle_invpcid, - [EXIT_REASON_VMFUNC] = handle_vmx_instruction, - [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, - [EXIT_REASON_ENCLS] = handle_encls, -}; - -static const int kvm_vmx_max_exit_handlers = - ARRAY_SIZE(kvm_vmx_exit_handlers); - -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) -{ - *info1 = vmcs_readl(EXIT_QUALIFICATION); - *info2 = vmcs_read32(VM_EXIT_INTR_INFO); -} - -static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) -{ - if (vmx->pml_pg) { - __free_page(vmx->pml_pg); - vmx->pml_pg = NULL; - } -} - -static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 *pml_buf; - u16 pml_idx; - - pml_idx = vmcs_read16(GUEST_PML_INDEX); - - /* Do nothing if PML buffer is empty */ - if (pml_idx == (PML_ENTITY_NUM - 1)) - return; - - /* PML index always points to next available PML buffer entity */ - if (pml_idx >= PML_ENTITY_NUM) - pml_idx = 0; - else - pml_idx++; - - pml_buf = page_address(vmx->pml_pg); - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { - u64 gpa; - - gpa = pml_buf[pml_idx]; - WARN_ON(gpa & (PAGE_SIZE - 1)); - kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); - } - - /* reset PML index */ - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); -} - -/* - * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. - * Called before reporting dirty_bitmap to userspace. - */ -static void kvm_flush_pml_buffers(struct kvm *kvm) -{ - int i; - struct kvm_vcpu *vcpu; - /* - * We only need to kick vcpu out of guest mode here, as PML buffer - * is flushed at beginning of all VMEXITs, and it's obvious that only - * vcpus running in guest are possible to have unflushed GPAs in PML - * buffer. - */ - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vcpu_kick(vcpu); -} - -static void vmx_dump_sel(char *name, uint32_t sel) -{ - pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", - name, vmcs_read16(sel), - vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), - vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), - vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); -} - -static void vmx_dump_dtsel(char *name, uint32_t limit) -{ - pr_err("%s limit=0x%08x, base=0x%016lx\n", - name, vmcs_read32(limit), - vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); -} - -void dump_vmcs(void) -{ - u32 vmentry_ctl, vmexit_ctl; - u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; - unsigned long cr4; - u64 efer; - int i, n; - - if (!dump_invalid_vmcs) { - pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); - return; - } - - vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - cr4 = vmcs_readl(GUEST_CR4); - efer = vmcs_read64(GUEST_IA32_EFER); - secondary_exec_control = 0; - if (cpu_has_secondary_exec_ctrls()) - secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - - pr_err("*** Guest State ***\n"); - pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", - vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), - vmcs_readl(CR0_GUEST_HOST_MASK)); - pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", - cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); - pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && - (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) - { - pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", - vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); - pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", - vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); - } - pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", - vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); - pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", - vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", - vmcs_readl(GUEST_SYSENTER_ESP), - vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); - vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); - vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); - vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); - vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); - vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); - vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); - vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); - vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); - vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); - vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); - if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || - (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", - efer, vmcs_read64(GUEST_IA32_PAT)); - pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", - vmcs_read64(GUEST_IA32_DEBUGCTL), - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); - if (cpu_has_load_perf_global_ctrl() && - vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016llx\n", - vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); - if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) - pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); - pr_err("Interruptibility = %08x ActivityState = %08x\n", - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), - vmcs_read32(GUEST_ACTIVITY_STATE)); - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) - pr_err("InterruptStatus = %04x\n", - vmcs_read16(GUEST_INTR_STATUS)); - - pr_err("*** Host State ***\n"); - pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", - vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); - pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", - vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), - vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), - vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), - vmcs_read16(HOST_TR_SELECTOR)); - pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", - vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), - vmcs_readl(HOST_TR_BASE)); - pr_err("GDTBase=%016lx IDTBase=%016lx\n", - vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); - pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", - vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), - vmcs_readl(HOST_CR4)); - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", - vmcs_readl(HOST_IA32_SYSENTER_ESP), - vmcs_read32(HOST_IA32_SYSENTER_CS), - vmcs_readl(HOST_IA32_SYSENTER_EIP)); - if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", - vmcs_read64(HOST_IA32_EFER), - vmcs_read64(HOST_IA32_PAT)); - if (cpu_has_load_perf_global_ctrl() && - vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016llx\n", - vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); - - pr_err("*** Control State ***\n"); - pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", - pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); - pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); - pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", - vmcs_read32(EXCEPTION_BITMAP), - vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), - vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); - pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), - vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); - pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_read32(VM_EXIT_INTR_ERROR_CODE), - vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); - pr_err(" reason=%08x qualification=%016lx\n", - vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); - pr_err("IDTVectoring: info=%08x errcode=%08x\n", - vmcs_read32(IDT_VECTORING_INFO_FIELD), - vmcs_read32(IDT_VECTORING_ERROR_CODE)); - pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); - if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) - pr_err("TSC Multiplier = 0x%016llx\n", - vmcs_read64(TSC_MULTIPLIER)); - if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { - u16 status = vmcs_read16(GUEST_INTR_STATUS); - pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); - } - pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); - if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) - pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); - pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); - } - if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) - pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) - pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); - n = vmcs_read32(CR3_TARGET_COUNT); - for (i = 0; i + 1 < n; i += 4) - pr_err("CR3 target%u=%016lx target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); - if (i < n) - pr_err("CR3 target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); - if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) - pr_err("PLE Gap=%08x Window=%08x\n", - vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); - if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) - pr_err("Virtual processor ID = 0x%04x\n", - vmcs_read16(VIRTUAL_PROCESSOR_ID)); -} - -/* - * The guest has exited. See if we can fix it or if we need userspace - * assistance. - */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exit_reason = vmx->exit_reason; - u32 vectoring_info = vmx->idt_vectoring_info; - - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - - /* - * Flush logged GPAs PML buffer, this will make dirty_bitmap more - * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before - * querying dirty_bitmap, we only need to kick all vcpus out of guest - * mode as if vcpus is in root mode, the PML buffer must has been - * flushed already. - */ - if (enable_pml) - vmx_flush_pml_buffer(vcpu); - - /* If guest state is invalid, start emulating */ - if (vmx->emulation_required) - return handle_invalid_guest_state(vcpu); - - if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) - return nested_vmx_reflect_vmexit(vcpu, exit_reason); - - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = exit_reason; - return 0; - } - - if (unlikely(vmx->fail)) { - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = vmcs_read32(VM_INSTRUCTION_ERROR); - return 0; - } - - /* - * Note: - * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by - * delivery event since it indicates guest is accessing MMIO. - * The vm-exit can be triggered again after return to guest that - * will cause infinite loop. - */ - if ((vectoring_info & VECTORING_INFO_VALID_MASK) && - (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION && - exit_reason != EXIT_REASON_PML_FULL && - exit_reason != EXIT_REASON_TASK_SWITCH)) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.ndata = 3; - vcpu->run->internal.data[0] = vectoring_info; - vcpu->run->internal.data[1] = exit_reason; - vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; - if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { - vcpu->run->internal.ndata++; - vcpu->run->internal.data[3] = - vmcs_read64(GUEST_PHYSICAL_ADDRESS); - } - return 0; - } - - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) { - if (vmx_interrupt_allowed(vcpu)) { - vmx->loaded_vmcs->soft_vnmi_blocked = 0; - } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && - vcpu->arch.nmi_pending) { - /* - * This CPU don't support us in finding the end of an - * NMI-blocked window if the guest runs with IRQs - * disabled. So we pull the trigger after 1 s of - * futile waiting, but inform the user about this. - */ - printk(KERN_WARNING "%s: Breaking out of NMI-blocked " - "state on VCPU %d after 1 s timeout\n", - __func__, vcpu->vcpu_id); - vmx->loaded_vmcs->soft_vnmi_blocked = 0; - } - } - - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); - return 1; - } else if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { -#ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); -#endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } -} - -/* - * Software based L1D cache flush which is used when microcode providing - * the cache control MSR is not loaded. - * - * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to - * flush it is required to read in 64 KiB because the replacement algorithm - * is not exactly LRU. This could be sized at runtime via topology - * information but as all relevant affected CPUs have 32KiB L1D cache size - * there is no point in doing so. - */ -static void vmx_l1d_flush(struct kvm_vcpu *vcpu) -{ - int size = PAGE_SIZE << L1D_CACHE_ORDER; - - /* - * This code is only executed when the the flush mode is 'cond' or - * 'always' - */ - if (static_branch_likely(&vmx_l1d_flush_cond)) { - bool flush_l1d; - - /* - * Clear the per-vcpu flush bit, it gets set again - * either from vcpu_run() or from one of the unsafe - * VMEXIT handlers. - */ - flush_l1d = vcpu->arch.l1tf_flush_l1d; - vcpu->arch.l1tf_flush_l1d = false; - - /* - * Clear the per-cpu flush bit, it gets set again from - * the interrupt handlers. - */ - flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); - kvm_clear_cpu_l1tf_flush_l1d(); - - if (!flush_l1d) - return; - } - - vcpu->stat.l1d_flush++; - - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); - return; - } - - asm volatile( - /* First ensure the pages are in the TLB */ - "xorl %%eax, %%eax\n" - ".Lpopulate_tlb:\n\t" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $4096, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lpopulate_tlb\n\t" - "xorl %%eax, %%eax\n\t" - "cpuid\n\t" - /* Now fill the cache */ - "xorl %%eax, %%eax\n" - ".Lfill_cache:\n" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $64, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lfill_cache\n\t" - "lfence\n" - :: [flush_pages] "r" (vmx_l1d_flush_pages), - [size] "r" (size) - : "eax", "ebx", "ecx", "edx"); -} - -static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - int tpr_threshold; - - if (is_guest_mode(vcpu) && - nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) - return; - - tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; - if (is_guest_mode(vcpu)) - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; - else - vmcs_write32(TPR_THRESHOLD, tpr_threshold); -} - -void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 sec_exec_control; - - if (!lapic_in_kernel(vcpu)) - return; - - if (!flexpriority_enabled && - !cpu_has_vmx_virtualize_x2apic_mode()) - return; - - /* Postpone execution until vmcs01 is the current VMCS. */ - if (is_guest_mode(vcpu)) { - vmx->nested.change_vmcs01_virtual_apic_mode = true; - return; - } - - sec_exec_control = secondary_exec_controls_get(vmx); - sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); - - switch (kvm_get_apic_mode(vcpu)) { - case LAPIC_MODE_INVALID: - WARN_ONCE(true, "Invalid local APIC state"); - case LAPIC_MODE_DISABLED: - break; - case LAPIC_MODE_XAPIC: - if (flexpriority_enabled) { - sec_exec_control |= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb(vcpu, true); - } - break; - case LAPIC_MODE_X2APIC: - if (cpu_has_vmx_virtualize_x2apic_mode()) - sec_exec_control |= - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - break; - } - secondary_exec_controls_set(vmx, sec_exec_control); - - vmx_update_msr_bitmap(vcpu); -} - -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) -{ - if (!is_guest_mode(vcpu)) { - vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb(vcpu, true); - } -} - -static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) -{ - u16 status; - u8 old; - - if (max_isr == -1) - max_isr = 0; - - status = vmcs_read16(GUEST_INTR_STATUS); - old = status >> 8; - if (max_isr != old) { - status &= 0xff; - status |= max_isr << 8; - vmcs_write16(GUEST_INTR_STATUS, status); - } -} - -static void vmx_set_rvi(int vector) -{ - u16 status; - u8 old; - - if (vector == -1) - vector = 0; - - status = vmcs_read16(GUEST_INTR_STATUS); - old = (u8)status & 0xff; - if ((u8)vector != old) { - status &= ~0xff; - status |= (u8)vector; - vmcs_write16(GUEST_INTR_STATUS, status); - } -} - -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) -{ - /* - * When running L2, updating RVI is only relevant when - * vmcs12 virtual-interrupt-delivery enabled. - * However, it can be enabled only when L1 also - * intercepts external-interrupts and in that case - * we should not update vmcs02 RVI but instead intercept - * interrupt. Therefore, do nothing when running L2. - */ - if (!is_guest_mode(vcpu)) - vmx_set_rvi(max_irr); -} - -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int max_irr; - bool max_irr_updated; - - WARN_ON(!vcpu->arch.apicv_active); - if (pi_test_on(&vmx->pi_desc)) { - pi_clear_on(&vmx->pi_desc); - /* - * IOMMU can write to PID.ON, so the barrier matters even on UP. - * But on x86 this is just a compiler barrier anyway. - */ - smp_mb__after_atomic(); - max_irr_updated = - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); - - /* - * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. - */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } - } else { - max_irr = kvm_lapic_find_highest_irr(vcpu); - } - vmx_hwapic_irr_update(vcpu, max_irr); - return max_irr; -} - -static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - return pi_test_on(pi_desc) || - (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); -} - -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) -{ - if (!kvm_vcpu_apicv_active(vcpu)) - return; - - vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); - vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); - vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); - vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); -} - -static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - pi_clear_on(&vmx->pi_desc); - memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); -} - -static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) -{ - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* if exit due to PF check for async PF */ - if (is_page_fault(vmx->exit_intr_info)) - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); - - /* Handle machine checks before interrupts are enabled */ - if (is_machine_check(vmx->exit_intr_info)) - kvm_machine_check(); - - /* We need to handle NMIs before interrupts are enabled */ - if (is_nmi(vmx->exit_intr_info)) { - kvm_before_interrupt(&vmx->vcpu); - asm("int $2"); - kvm_after_interrupt(&vmx->vcpu); - } -} - -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) -{ - unsigned int vector; - unsigned long entry; -#ifdef CONFIG_X86_64 - unsigned long tmp; -#endif - gate_desc *desc; - u32 intr_info; - - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - if (WARN_ONCE(!is_external_intr(intr_info), - "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) - return; - - vector = intr_info & INTR_INFO_VECTOR_MASK; - desc = (gate_desc *)host_idt_base + vector; - entry = gate_offset(desc); - - kvm_before_interrupt(vcpu); - - asm volatile( -#ifdef CONFIG_X86_64 - "mov %%" _ASM_SP ", %[sp]\n\t" - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" - "push $%c[ss]\n\t" - "push %[sp]\n\t" -#endif - "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" - CALL_NOSPEC - : -#ifdef CONFIG_X86_64 - [sp]"=&r"(tmp), -#endif - ASM_CALL_CONSTRAINT - : - THUNK_TARGET(entry), - [ss]"i"(__KERNEL_DS), - [cs]"i"(__KERNEL_CS) - ); - - kvm_after_interrupt(vcpu); -} -STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); - -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - handle_external_interrupt_irqoff(vcpu); - else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) - handle_exception_nmi_irqoff(vmx); - else if (!is_guest_mode(vcpu) && - vmx->exit_reason == EXIT_REASON_MSR_WRITE) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); -} - -static bool vmx_has_emulated_msr(int index) -{ - switch (index) { - case MSR_IA32_SMBASE: - /* - * We cannot do SMM unless we can run the guest in big - * real mode. - */ - return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - return nested; - case MSR_AMD64_VIRT_SPEC_CTRL: - /* This is AMD only. */ - return false; - default: - return true; - } -} - -static bool vmx_pt_supported(void) -{ - return pt_mode == PT_MODE_HOST_GUEST; -} - -static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) -{ - u32 exit_intr_info; - bool unblock_nmi; - u8 vector; - bool idtv_info_valid; - - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - - if (enable_vnmi) { - if (vmx->loaded_vmcs->nmi_known_unmasked) - return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Re-set bit "block by NMI" before VM entry if vmexit caused by - * a guest IRET fault. - * SDM 3: 23.2.2 (September 2008) - * Bit 12 is undefined in any of the following cases: - * If the VM exit sets the valid bit in the IDT-vectoring - * information field. - * If the VM exit is due to a double fault. - */ - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && - vector != DF_VECTOR && !idtv_info_valid) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->loaded_vmcs->nmi_known_unmasked = - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) - & GUEST_INTR_STATE_NMI); - } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->vnmi_blocked_time += - ktime_to_ns(ktime_sub(ktime_get(), - vmx->loaded_vmcs->entry_time)); -} - -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, - u32 idt_vectoring_info, - int instr_len_field, - int error_code_field) -{ - u8 vector; - int type; - bool idtv_info_valid; - - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; - - vcpu->arch.nmi_injected = false; - kvm_clear_exception_queue(vcpu); - kvm_clear_interrupt_queue(vcpu); - - if (!idtv_info_valid) - return; - - kvm_make_request(KVM_REQ_EVENT, vcpu); - - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; - type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - - switch (type) { - case INTR_TYPE_NMI_INTR: - vcpu->arch.nmi_injected = true; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Clear bit "block by NMI" before VM entry if a NMI - * delivery faulted. - */ - vmx_set_nmi_mask(vcpu, false); - break; - case INTR_TYPE_SOFT_EXCEPTION: - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ - case INTR_TYPE_HARD_EXCEPTION: - if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { - u32 err = vmcs_read32(error_code_field); - kvm_requeue_exception_e(vcpu, vector, err); - } else - kvm_requeue_exception(vcpu, vector); - break; - case INTR_TYPE_SOFT_INTR: - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ - case INTR_TYPE_EXT_INTR: - kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); - break; - default: - break; - } -} - -static void vmx_complete_interrupts(struct vcpu_vmx *vmx) -{ - __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, - VM_EXIT_INSTRUCTION_LEN, - IDT_VECTORING_ERROR_CODE); -} - -static void vmx_cancel_injection(struct kvm_vcpu *vcpu) -{ - __vmx_complete_interrupts(vcpu, - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - VM_ENTRY_INSTRUCTION_LEN, - VM_ENTRY_EXCEPTION_ERROR_CODE); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); -} - -static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) -{ - int i, nr_msrs; - struct perf_guest_switch_msr *msrs; - - msrs = perf_guest_get_msrs(&nr_msrs); - - if (!msrs) - return; - - for (i = 0; i < nr_msrs; i++) - if (msrs[i].host == msrs[i].guest) - clear_atomic_switch_msr(vmx, msrs[i].msr); - else - add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, - msrs[i].host, false); -} - -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - -static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 tscl; - u32 delta_tsc; - - if (vmx->req_immediate_exit) { - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); - vmx->loaded_vmcs->hv_timer_soft_disabled = false; - } else if (vmx->hv_deadline_tsc != -1) { - tscl = rdtsc(); - if (vmx->hv_deadline_tsc > tscl) - /* set_hv_timer ensures the delta fits in 32-bits */ - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> - cpu_preemption_timer_multi); - else - delta_tsc = 0; - - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); - vmx->loaded_vmcs->hv_timer_soft_disabled = false; - } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); - vmx->loaded_vmcs->hv_timer_soft_disabled = true; - } -} - -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) -{ - if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { - vmx->loaded_vmcs->host_state.rsp = host_rsp; - vmcs_writel(HOST_RSP, host_rsp); - } -} - -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); - -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4; - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required) - return; - - if (vmx->ple_window_dirty) { - vmx->ple_window_dirty = false; - vmcs_write32(PLE_WINDOW, vmx->ple_window); - } - - if (vmx->nested.need_vmcs12_to_shadow_sync) - nested_sync_vmcs12_to_shadow(vcpu); - - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { - vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->host_state.cr3 = cr3; - } - - cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { - vmcs_writel(HOST_CR4, cr4); - vmx->loaded_vmcs->host_state.cr4 = cr4; - } - - /* When single-stepping over STI and MOV SS, we must clear the - * corresponding interruptibility bits in the guest state. Otherwise - * vmentry fails as it then expects bit 14 (BS) in pending debug - * exceptions being set, but that's not correct for the guest debugging - * case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - kvm_load_guest_xsave_state(vcpu); - - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - - pt_guest_enter(vmx); - - atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); - - if (enable_preemption_timer) - vmx_update_hv_timer(vcpu); - - if (lapic_in_kernel(vcpu) && - vcpu->arch.apic->lapic_timer.timer_advance_ns) - kvm_wait_lapic_expire(vcpu); - - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - - /* L1D Flush includes CPU buffer clear to mitigate MDS */ - if (static_branch_unlikely(&vmx_l1d_should_flush)) - vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); - - if (vcpu->arch.cr2 != read_cr2()) - write_cr2(vcpu->arch.cr2); - - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); - - vcpu->arch.cr2 = read_cr2(); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); - - /* All fields are clean at this point */ - if (static_branch_unlikely(&enable_evmcs)) - current_evmcs->hv_clean_fields |= - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - - if (static_branch_unlikely(&enable_evmcs)) - current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; - - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); - -#ifndef CONFIG_X86_64 - /* - * The sysexit path does not restore ds/es, so we must set them to - * a reasonable value ourselves. - * - * We can't defer this to vmx_prepare_switch_to_host() since that - * function may be executed in interrupt context, which saves and - * restore segments around it, nullifying its effect. - */ - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); -#endif - - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_RFLAGS) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_SEGMENTS) - | (1 << VCPU_EXREG_CR3)); - vcpu->arch.regs_dirty = 0; - - pt_guest_exit(vmx); - - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - - kvm_load_host_xsave_state(vcpu); - - vmx->nested.nested_run_pending = 0; - vmx->idt_vectoring_info = 0; - - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - kvm_machine_check(); - - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) - return; - - vmx->loaded_vmcs->launched = 1; - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - - vmx_recover_nmi_blocking(vmx); - vmx_complete_interrupts(vmx); -} - -static struct kvm *vmx_vm_alloc(void) -{ - struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), - GFP_KERNEL_ACCOUNT | __GFP_ZERO, - PAGE_KERNEL); - return &kvm_vmx->kvm; -} - -static void vmx_vm_free(struct kvm *kvm) -{ - kfree(kvm->arch.hyperv.hv_pa_pg); - vfree(to_kvm_vmx(kvm)); -} - -static void vmx_free_vcpu(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (enable_pml) - vmx_destroy_pml_buffer(vmx); - free_vpid(vmx->vpid); - nested_vmx_free_vcpu(vcpu); - free_loaded_vmcs(vmx->loaded_vmcs); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, vmx); -} - -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) -{ - int err; - struct vcpu_vmx *vmx; - unsigned long *msr_bitmap; - int i, cpu; - - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vmx) - return ERR_PTR(-ENOMEM); - - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_vcpu; - } - - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } - - vmx->vpid = allocate_vpid(); - - err = kvm_vcpu_init(&vmx->vcpu, kvm, id); - if (err) - goto free_vcpu; - - err = -ENOMEM; - - /* - * If PML is turned on, failure on enabling PML just results in failure - * of creating the vcpu, therefore we can simplify PML logic (by - * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest), etc. - */ - if (enable_pml) { - vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vmx->pml_pg) - goto uninit_vcpu; - } - - BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { - u32 index = vmx_msr_index[i]; - u32 data_low, data_high; - int j = vmx->nmsrs; - - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) - continue; - - vmx->guest_msrs[j].index = i; - vmx->guest_msrs[j].data = 0; - switch (index) { - case MSR_IA32_TSX_CTRL: - /* - * No need to pass TSX_CTRL_CPUID_CLEAR through, so - * let's avoid changing CPUID bits under the host - * kernel's feet. - */ - vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - break; - default: - vmx->guest_msrs[j].mask = -1ull; - break; - } - ++vmx->nmsrs; - } - - err = alloc_loaded_vmcs(&vmx->vmcs01); - if (err < 0) - goto free_pml; - - msr_bitmap = vmx->vmcs01.msr_bitmap; - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(kvm)) { - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); - } - vmx->msr_bitmap_mode = 0; - - vmx->loaded_vmcs = &vmx->vmcs01; - cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; - init_vmcs(vmx); - vmx_vcpu_put(&vmx->vcpu); - put_cpu(); - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - err = alloc_apic_access_page(kvm); - if (err) - goto free_vmcs; - } - - if (enable_ept && !enable_unrestricted_guest) { - err = init_rmode_identity_map(kvm); - if (err) - goto free_vmcs; - } - - if (nested) - nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, - vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); - else - memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); - - vmx->nested.posted_intr_nv = -1; - vmx->nested.current_vmptr = -1ull; - - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; - - /* - * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR - * or POSTED_INTR_WAKEUP_VECTOR. - */ - vmx->pi_desc.nv = POSTED_INTR_VECTOR; - vmx->pi_desc.sn = 1; - - vmx->ept_pointer = INVALID_PAGE; - - return &vmx->vcpu; - -free_vmcs: - free_loaded_vmcs(vmx->loaded_vmcs); -free_pml: - vmx_destroy_pml_buffer(vmx); -uninit_vcpu: - kvm_vcpu_uninit(&vmx->vcpu); -free_vcpu: - free_vpid(vmx->vpid); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); -free_partial_vcpu: - kmem_cache_free(kvm_vcpu_cache, vmx); - return ERR_PTR(err); -} - -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" - -static int vmx_vm_init(struct kvm *kvm) -{ - spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); - - if (!ple_gap) - kvm->arch.pause_in_guest = true; - - if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { - switch (l1tf_mitigation) { - case L1TF_MITIGATION_OFF: - case L1TF_MITIGATION_FLUSH_NOWARN: - /* 'I explicitly don't care' is set */ - break; - case L1TF_MITIGATION_FLUSH: - case L1TF_MITIGATION_FLUSH_NOSMT: - case L1TF_MITIGATION_FULL: - /* - * Warn upon starting the first VM in a potentially - * insecure environment. - */ - if (sched_smt_active()) - pr_warn_once(L1TF_MSG_SMT); - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) - pr_warn_once(L1TF_MSG_L1D); - break; - case L1TF_MITIGATION_FULL_FORCE: - /* Flush is enforced */ - break; - } - } - return 0; -} - -static int __init vmx_check_processor_compat(void) -{ - struct vmcs_config vmcs_conf; - struct vmx_capability vmx_cap; - - if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) - return -EIO; - if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, - enable_apicv); - if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { - printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", - smp_processor_id()); - return -EIO; - } - return 0; -} - -static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 cache; - u64 ipat = 0; - - /* For VT-d and EPT combination - * 1. MMIO: always map as UC - * 2. EPT with VT-d: - * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. - * b. VT-d with snooping control feature: snooping control feature of - * VT-d engine can guarantee the cache correctness. Just set it - * to WB to keep consistent with host. So the same as item 3. - * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep - * consistent with host MTRR - */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { - ipat = VMX_EPT_IPAT_BIT; - cache = MTRR_TYPE_WRBACK; - goto exit; - } - - if (kvm_read_cr0(vcpu) & X86_CR0_CD) { - ipat = VMX_EPT_IPAT_BIT; - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - cache = MTRR_TYPE_WRBACK; - else - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - -exit: - return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; -} - -static int vmx_get_lpage_level(void) -{ - if (enable_ept && !cpu_has_vmx_ept_1g_page()) - return PT_DIRECTORY_LEVEL; - else - /* For shadow and EPT supported 1GB page */ - return PT_PDPE_LEVEL; -} - -static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) -{ - /* - * These bits in the secondary execution controls field - * are dynamic, the others are mostly based on the hypervisor - * architecture and the guest's CPUID. Do not touch the - * dynamic bits. - */ - u32 mask = - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_DESC; - - u32 new_ctl = vmx->secondary_exec_control; - u32 cur_ctl = secondary_exec_controls_get(vmx); - - secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); -} - -/* - * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits - * (indicating "allowed-1") if they are supported in the guest's CPUID. - */ -static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_cpuid_entry2 *entry; - - vmx->nested.msrs.cr0_fixed1 = 0xffffffff; - vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; - -#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ - if (entry && (entry->_reg & (_cpuid_mask))) \ - vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ -} while (0) - - entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); - - entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); - -#undef cr4_fixed1_update -} - -static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (kvm_mpx_supported()) { - bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); - - if (mpx_enabled) { - vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; - vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - } else { - vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; - vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; - } - } -} - -static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_cpuid_entry2 *best = NULL; - int i; - - for (i = 0; i < PT_CPUID_LEAVES; i++) { - best = kvm_find_cpuid_entry(vcpu, 0x14, i); - if (!best) - return; - vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax; - vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx; - vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx; - vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx; - } - - /* Get the number of configurable Address Ranges for filtering */ - vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges); - - /* Initialize and clear the no dependency bits */ - vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS | - RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC); - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise - * will inject an #GP - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN; - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and - * PSBFreq can be set - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC | - RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ); - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and - * MTCFreq can be set - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN | - RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE); - - /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW | - RTIT_CTL_PTW_EN); - - /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN; - - /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA; - - /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN; - - /* unmask address range configure area */ - for (i = 0; i < vmx->pt_desc.addr_range; i++) - vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); -} - -static void vmx_cpuid_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ - vcpu->arch.xsaves_enabled = false; - - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - vmcs_set_secondary_exec_control(vmx); - } - - if (nested_vmx_allowed(vcpu)) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); - - if (nested_vmx_allowed(vcpu)) { - nested_vmx_cr_fixed1_bits_update(vcpu); - nested_vmx_entry_exit_ctls_update(vcpu); - } - - if (boot_cpu_has(X86_FEATURE_INTEL_PT) && - guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) - update_intel_pt_cfg(vcpu); - - if (boot_cpu_has(X86_FEATURE_RTM)) { - struct shared_msr_entry *msr; - msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL); - if (msr) { - bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); - vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); - } - } -} - -static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) -{ - if (func == 1 && nested) - entry->ecx |= bit(X86_FEATURE_VMX); -} - -static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) -{ - to_vmx(vcpu)->req_immediate_exit = true; -} - -static int vmx_check_intercept(struct kvm_vcpu *vcpu, - struct x86_instruction_info *info, - enum x86_intercept_stage stage) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; - - /* - * RDPID causes #UD if disabled through secondary execution controls. - * Because it is marked as EmulateOnUD, we need to intercept it here. - */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } - - /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; -} - -#ifdef CONFIG_X86_64 -/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ -static inline int u64_shl_div_u64(u64 a, unsigned int shift, - u64 divisor, u64 *result) -{ - u64 low = a << shift, high = a >> (64 - shift); - - /* To avoid the overflow on divq */ - if (high >= divisor) - return 1; - - /* Low hold the result, high hold rem which is discarded */ - asm("divq %2\n\t" : "=a" (low), "=d" (high) : - "rm" (divisor), "0" (low), "1" (high)); - *result = low; - - return 0; -} - -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, - bool *expired) -{ - struct vcpu_vmx *vmx; - u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; - struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; - - if (kvm_mwait_in_guest(vcpu->kvm) || - kvm_can_post_timer_interrupt(vcpu)) - return -EOPNOTSUPP; - - vmx = to_vmx(vcpu); - tscl = rdtsc(); - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); - delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; - lapic_timer_advance_cycles = nsec_to_cycles(vcpu, - ktimer->timer_advance_ns); - - if (delta_tsc > lapic_timer_advance_cycles) - delta_tsc -= lapic_timer_advance_cycles; - else - delta_tsc = 0; - - /* Convert to host delta tsc if tsc scaling is enabled */ - if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && - delta_tsc && u64_shl_div_u64(delta_tsc, - kvm_tsc_scaling_ratio_frac_bits, - vcpu->arch.tsc_scaling_ratio, &delta_tsc)) - return -ERANGE; - - /* - * If the delta tsc can't fit in the 32 bit after the multi shift, - * we can't use the preemption timer. - * It's possible that it fits on later vmentries, but checking - * on every vmentry is costly so we just use an hrtimer. - */ - if (delta_tsc >> (cpu_preemption_timer_multi + 32)) - return -ERANGE; - - vmx->hv_deadline_tsc = tscl + delta_tsc; - *expired = !delta_tsc; - return 0; -} - -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) -{ - to_vmx(vcpu)->hv_deadline_tsc = -1; -} -#endif - -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ - if (!kvm_pause_in_guest(vcpu->kvm)) - shrink_ple_window(vcpu); -} - -static void vmx_slot_enable_log_dirty(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_mmu_slot_leaf_clear_dirty(kvm, slot); - kvm_mmu_slot_largepage_remove_write_access(kvm, slot); -} - -static void vmx_slot_disable_log_dirty(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_mmu_slot_set_dirty(kvm, slot); -} - -static void vmx_flush_log_dirty(struct kvm *kvm) -{ - kvm_flush_pml_buffers(kvm); -} - -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12; - struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; - - if (is_guest_mode(vcpu)) { - WARN_ON_ONCE(vmx->nested.pml_full); - - /* - * Check if PML is enabled for the nested guest. - * Whether eptp bit 6 is set is already checked - * as part of A/D emulation. - */ - vmcs12 = get_vmcs12(vcpu); - if (!nested_cpu_has_pml(vmcs12)) - return 0; - - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { - vmx->nested.pml_full = true; - return 1; - } - - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; - dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; - - if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, - offset_in_page(dst), sizeof(gpa))) - return 0; - - vmcs12->guest_pml_index--; - } - - return 0; -} - -static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, - struct kvm_memory_slot *memslot, - gfn_t offset, unsigned long mask) -{ - kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); -} - -static void __pi_post_block(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - do { - old.control = new.control = pi_desc->control; - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, - "Wakeup handler not enabled while the VCPU is blocked\n"); - - dest = cpu_physical_id(vcpu->cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - vcpu->pre_pcpu = -1; - } -} - -/* - * This routine does the following things for vCPU which is going - * to be blocked if VT-d PI is enabled. - * - Store the vCPU to the wakeup list, so when interrupts happen - * we can find the right vCPU to wake up. - * - Change the Posted-interrupt descriptor as below: - * 'NDST' <-- vcpu->pre_pcpu - * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR - * - If 'ON' is set during this process, which means at least one - * interrupt is posted for this vCPU, we cannot block it, in - * this case, return 1, otherwise, return 0. - * - */ -static int pi_pre_block(struct kvm_vcpu *vcpu) -{ - unsigned int dest; - struct pi_desc old, new; - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) - return 0; - - WARN_ON(irqs_disabled()); - local_irq_disable(); - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { - vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - } - - do { - old.control = new.control = pi_desc->control; - - WARN((pi_desc->sn == 1), - "Warning: SN field of posted-interrupts " - "is set before blocking\n"); - - /* - * Since vCPU can be preempted during this process, - * vcpu->cpu could be different with pre_pcpu, we - * need to set pre_pcpu as the destination of wakeup - * notification event, then we can find the right vCPU - * to wakeup in wakeup handler if interrupts happen - * when the vCPU is in blocked state. - */ - dest = cpu_physical_id(vcpu->pre_pcpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* set 'NV' to 'wakeup vector' */ - new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - /* We should not block the vCPU if an interrupt is posted for it. */ - if (pi_test_on(pi_desc) == 1) - __pi_post_block(vcpu); - - local_irq_enable(); - return (vcpu->pre_pcpu == -1); -} - -static int vmx_pre_block(struct kvm_vcpu *vcpu) -{ - if (pi_pre_block(vcpu)) - return 1; - - if (kvm_lapic_hv_timer_in_use(vcpu)) - kvm_lapic_switch_to_sw_timer(vcpu); - - return 0; -} - -static void pi_post_block(struct kvm_vcpu *vcpu) -{ - if (vcpu->pre_pcpu == -1) - return; - - WARN_ON(irqs_disabled()); - local_irq_disable(); - __pi_post_block(vcpu); - local_irq_enable(); -} - -static void vmx_post_block(struct kvm_vcpu *vcpu) -{ - if (kvm_x86_ops->set_hv_timer) - kvm_lapic_switch_to_hv_timer(vcpu); - - pi_post_block(vcpu); -} - -/* - * vmx_update_pi_irte - set IRTE for Posted-Interrupts - * - * @kvm: kvm - * @host_irq: host irq of the interrupt - * @guest_irq: gsi of the interrupt - * @set: set or unset PI - * returns 0 on success, < 0 on failure - */ -static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) -{ - struct kvm_kernel_irq_routing_entry *e; - struct kvm_irq_routing_table *irq_rt; - struct kvm_lapic_irq irq; - struct kvm_vcpu *vcpu; - struct vcpu_data vcpu_info; - int idx, ret = 0; - - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(kvm->vcpus[0])) - return 0; - - idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (guest_irq >= irq_rt->nr_rt_entries || - hlist_empty(&irq_rt->map[guest_irq])) { - pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", - guest_irq, irq_rt->nr_rt_entries); - goto out; - } - - hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { - if (e->type != KVM_IRQ_ROUTING_MSI) - continue; - /* - * VT-d PI cannot support posting multicast/broadcast - * interrupts to a vCPU, we still use interrupt remapping - * for these kind of interrupts. - * - * For lowest-priority interrupts, we only support - * those with single CPU as the destination, e.g. user - * configures the interrupts via /proc/irq or uses - * irqbalance to make the interrupts single-CPU. - * - * We will support full lowest-priority interrupt later. - * - * In addition, we can only inject generic interrupts using - * the PI mechanism, refuse to route others through it. - */ - - kvm_set_msi_irq(kvm, e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - - continue; - } - - vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); - vcpu_info.vector = irq.vector; - - trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, - vcpu_info.vector, vcpu_info.pi_desc_addr, set); - - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); - - if (ret < 0) { - printk(KERN_INFO "%s: failed to update PI IRTE\n", - __func__); - goto out; - } - } - - ret = 0; -out: - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - -static void vmx_setup_mce(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.mcg_cap & MCG_LMCE_P) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; - else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; -} - -static int vmx_smi_allowed(struct kvm_vcpu *vcpu) -{ - /* we need a nested vmexit to enter SMM, postpone if run is pending */ - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - return 1; -} - -static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vmx->nested.smm.guest_mode = is_guest_mode(vcpu); - if (vmx->nested.smm.guest_mode) - nested_vmx_vmexit(vcpu, -1, 0, 0); - - vmx->nested.smm.vmxon = vmx->nested.vmxon; - vmx->nested.vmxon = false; - vmx_clear_hlt(vcpu); - return 0; -} - -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int ret; - - if (vmx->nested.smm.vmxon) { - vmx->nested.vmxon = true; - vmx->nested.smm.vmxon = false; - } - - if (vmx->nested.smm.guest_mode) { - ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) - return ret; - - vmx->nested.smm.guest_mode = false; - } - return 0; -} - -static int enable_smi_window(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) -{ - return false; -} - -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->nested.vmxon; -} - -static __init int hardware_setup(void) -{ - unsigned long host_bndcfgs; - struct desc_ptr dt; - int r, i; - - rdmsrl_safe(MSR_EFER, &host_efer); - - store_idt(&dt); - host_idt_base = dt.address; - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) - kvm_define_shared_msr(i, vmx_msr_index[i]); - - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) - return -EIO; - - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - - if (boot_cpu_has(X86_FEATURE_MPX)) { - rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); - WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); - } - - if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || - !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) - enable_vpid = 0; - - if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels() || - !cpu_has_vmx_ept_mt_wb() || - !cpu_has_vmx_invept_global()) - enable_ept = 0; - - if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) - enable_ept_ad_bits = 0; - - if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) - enable_unrestricted_guest = 0; - - if (!cpu_has_vmx_flexpriority()) - flexpriority_enabled = 0; - - if (!cpu_has_virtual_nmis()) - enable_vnmi = 0; - - /* - * set_apic_access_page_addr() is used to reload apic access - * page upon invalidation. No need to do anything if not - * using the APIC_ACCESS_ADDR VMCS field. - */ - if (!flexpriority_enabled) - kvm_x86_ops->set_apic_access_page_addr = NULL; - - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; - - if (enable_ept && !cpu_has_vmx_ept_2m_page()) - kvm_disable_largepages(); - -#if IS_ENABLED(CONFIG_HYPERV) - if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH - && enable_ept) { - kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb; - kvm_x86_ops->tlb_remote_flush_with_range = - hv_remote_flush_tlb_with_range; - } -#endif - - if (!cpu_has_vmx_ple()) { - ple_gap = 0; - ple_window = 0; - ple_window_grow = 0; - ple_window_max = 0; - ple_window_shrink = 0; - } - - if (!cpu_has_vmx_apicv()) { - enable_apicv = 0; - kvm_x86_ops->sync_pir_to_irr = NULL; - } - - if (cpu_has_vmx_tsc_scaling()) { - kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; - kvm_tsc_scaling_ratio_frac_bits = 48; - } - - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - - if (enable_ept) - vmx_enable_tdp(); - else - kvm_disable_tdp(); - - /* - * Only enable PML when hardware supports PML feature, and both EPT - * and EPT A/D bit features are enabled -- PML depends on them to work. - */ - if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) - enable_pml = 0; - - if (!enable_pml) { - kvm_x86_ops->slot_enable_log_dirty = NULL; - kvm_x86_ops->slot_disable_log_dirty = NULL; - kvm_x86_ops->flush_log_dirty = NULL; - kvm_x86_ops->enable_log_dirty_pt_masked = NULL; - } - - if (!cpu_has_vmx_preemption_timer()) - enable_preemption_timer = false; - - if (enable_preemption_timer) { - u64 use_timer_freq = 5000ULL * 1000 * 1000; - u64 vmx_msr; - - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); - cpu_preemption_timer_multi = - vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; - - if (tsc_khz) - use_timer_freq = (u64)tsc_khz * 1000; - use_timer_freq >>= cpu_preemption_timer_multi; - - /* - * KVM "disables" the preemption timer by setting it to its max - * value. Don't use the timer if it might cause spurious exits - * at a rate faster than 0.1 Hz (of uninterrupted guest time). - */ - if (use_timer_freq > 0xffffffffu / 10) - enable_preemption_timer = false; - } - - if (!enable_preemption_timer) { - kvm_x86_ops->set_hv_timer = NULL; - kvm_x86_ops->cancel_hv_timer = NULL; - kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; - } - - kvm_set_posted_intr_wakeup_handler(wakeup_handler); - - kvm_mce_cap_supported |= MCG_LMCE_P; - - if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) - return -EINVAL; - if (!enable_ept || !cpu_has_vmx_intel_pt()) - pt_mode = PT_MODE_SYSTEM; - - if (nested) { - nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept, enable_apicv); - - r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); - if (r) - return r; - } - - r = alloc_kvm_area(); - if (r) - nested_vmx_hardware_unsetup(); - return r; -} - -static __exit void hardware_unsetup(void) -{ - if (nested) - nested_vmx_hardware_unsetup(); - - free_kvm_area(); -} - -static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { - .cpu_has_kvm_support = cpu_has_kvm_support, - .disabled_by_bios = vmx_disabled_by_bios, - .hardware_setup = hardware_setup, - .hardware_unsetup = hardware_unsetup, - .check_processor_compatibility = vmx_check_processor_compat, - .hardware_enable = hardware_enable, - .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = report_flexpriority, - .has_emulated_msr = vmx_has_emulated_msr, - - .vm_init = vmx_vm_init, - .vm_alloc = vmx_vm_alloc, - .vm_free = vmx_vm_free, - - .vcpu_create = vmx_create_vcpu, - .vcpu_free = vmx_free_vcpu, - .vcpu_reset = vmx_vcpu_reset, - - .prepare_guest_switch = vmx_prepare_switch_to_guest, - .vcpu_load = vmx_vcpu_load, - .vcpu_put = vmx_vcpu_put, - - .update_bp_intercept = update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, - .get_msr = vmx_get_msr, - .set_msr = vmx_set_msr, - .get_segment_base = vmx_get_segment_base, - .get_segment = vmx_get_segment, - .set_segment = vmx_set_segment, - .get_cpl = vmx_get_cpl, - .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, - .set_cr0 = vmx_set_cr0, - .set_cr3 = vmx_set_cr3, - .set_cr4 = vmx_set_cr4, - .set_efer = vmx_set_efer, - .get_idt = vmx_get_idt, - .set_idt = vmx_set_idt, - .get_gdt = vmx_get_gdt, - .set_gdt = vmx_set_gdt, - .get_dr6 = vmx_get_dr6, - .set_dr6 = vmx_set_dr6, - .set_dr7 = vmx_set_dr7, - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, - .cache_reg = vmx_cache_reg, - .get_rflags = vmx_get_rflags, - .set_rflags = vmx_set_rflags, - - .tlb_flush = vmx_flush_tlb, - .tlb_flush_gva = vmx_flush_tlb_gva, - - .run = vmx_vcpu_run, - .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = skip_emulated_instruction, - .set_interrupt_shadow = vmx_set_interrupt_shadow, - .get_interrupt_shadow = vmx_get_interrupt_shadow, - .patch_hypercall = vmx_patch_hypercall, - .set_irq = vmx_inject_irq, - .set_nmi = vmx_inject_nmi, - .queue_exception = vmx_queue_exception, - .cancel_injection = vmx_cancel_injection, - .interrupt_allowed = vmx_interrupt_allowed, - .nmi_allowed = vmx_nmi_allowed, - .get_nmi_mask = vmx_get_nmi_mask, - .set_nmi_mask = vmx_set_nmi_mask, - .enable_nmi_window = enable_nmi_window, - .enable_irq_window = enable_irq_window, - .update_cr8_intercept = update_cr8_intercept, - .set_virtual_apic_mode = vmx_set_virtual_apic_mode, - .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .get_enable_apicv = vmx_get_enable_apicv, - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, - .load_eoi_exitmap = vmx_load_eoi_exitmap, - .apicv_post_state_restore = vmx_apicv_post_state_restore, - .hwapic_irr_update = vmx_hwapic_irr_update, - .hwapic_isr_update = vmx_hwapic_isr_update, - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, - .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_posted_interrupt = vmx_deliver_posted_interrupt, - .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, - - .set_tss_addr = vmx_set_tss_addr, - .set_identity_map_addr = vmx_set_identity_map_addr, - .get_tdp_level = get_ept_level, - .get_mt_mask = vmx_get_mt_mask, - - .get_exit_info = vmx_get_exit_info, - - .get_lpage_level = vmx_get_lpage_level, - - .cpuid_update = vmx_cpuid_update, - - .rdtscp_supported = vmx_rdtscp_supported, - .invpcid_supported = vmx_invpcid_supported, - - .set_supported_cpuid = vmx_set_supported_cpuid, - - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - - .read_l1_tsc_offset = vmx_read_l1_tsc_offset, - .write_l1_tsc_offset = vmx_write_l1_tsc_offset, - - .set_tdp_cr3 = vmx_set_cr3, - - .check_intercept = vmx_check_intercept, - .handle_exit_irqoff = vmx_handle_exit_irqoff, - .mpx_supported = vmx_mpx_supported, - .xsaves_supported = vmx_xsaves_supported, - .umip_emulated = vmx_umip_emulated, - .pt_supported = vmx_pt_supported, - - .request_immediate_exit = vmx_request_immediate_exit, - - .sched_in = vmx_sched_in, - - .slot_enable_log_dirty = vmx_slot_enable_log_dirty, - .slot_disable_log_dirty = vmx_slot_disable_log_dirty, - .flush_log_dirty = vmx_flush_log_dirty, - .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, - .write_log_dirty = vmx_write_pml_buffer, - - .pre_block = vmx_pre_block, - .post_block = vmx_post_block, - - .pmu_ops = &intel_pmu_ops, - - .update_pi_irte = vmx_update_pi_irte, - -#ifdef CONFIG_X86_64 - .set_hv_timer = vmx_set_hv_timer, - .cancel_hv_timer = vmx_cancel_hv_timer, -#endif - - .setup_mce = vmx_setup_mce, - - .smi_allowed = vmx_smi_allowed, - .pre_enter_smm = vmx_pre_enter_smm, - .pre_leave_smm = vmx_pre_leave_smm, - .enable_smi_window = enable_smi_window, - - .check_nested_events = NULL, - .get_nested_state = NULL, - .set_nested_state = NULL, - .get_vmcs12_pages = NULL, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, - .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, - .apic_init_signal_blocked = vmx_apic_init_signal_blocked, -}; - -static void vmx_cleanup_l1d_flush(void) -{ - if (vmx_l1d_flush_pages) { - free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); - vmx_l1d_flush_pages = NULL; - } - /* Restore state so sysfs ignores VMX */ - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; -} - -static void vmx_exit(void) -{ -#ifdef CONFIG_KEXEC_CORE - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); - synchronize_rcu(); -#endif - - kvm_exit(); - -#if IS_ENABLED(CONFIG_HYPERV) - if (static_branch_unlikely(&enable_evmcs)) { - int cpu; - struct hv_vp_assist_page *vp_ap; - /* - * Reset everything to support using non-enlightened VMCS - * access later (e.g. when we reload the module with - * enlightened_vmcs=0) - */ - for_each_online_cpu(cpu) { - vp_ap = hv_get_vp_assist_page(cpu); - - if (!vp_ap) - continue; - - vp_ap->nested_control.features.directhypercall = 0; - vp_ap->current_nested_vmcs = 0; - vp_ap->enlighten_vmentry = 0; - } - - static_branch_disable(&enable_evmcs); - } -#endif - vmx_cleanup_l1d_flush(); -} -module_exit(vmx_exit); - -static int __init vmx_init(void) -{ - int r; - -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Enlightened VMCS usage should be recommended and the host needs - * to support eVMCS v1 or above. We can also disable eVMCS support - * with module parameter. - */ - if (enlightened_vmcs && - ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && - (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= - KVM_EVMCS_VERSION) { - int cpu; - - /* Check that we have assist pages on all online CPUs */ - for_each_online_cpu(cpu) { - if (!hv_get_vp_assist_page(cpu)) { - enlightened_vmcs = false; - break; - } - } - - if (enlightened_vmcs) { - pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); - static_branch_enable(&enable_evmcs); - } - - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_direct_tlbflush - = hv_enable_direct_tlbflush; - - } else { - enlightened_vmcs = false; - } -#endif - - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); - if (r) - return r; - - /* - * Must be called after kvm_init() so enable_ept is properly set - * up. Hand the parameter mitigation value in which was stored in - * the pre module init parser. If no parameter was given, it will - * contain 'auto' which will be turned into the default 'cond' - * mitigation mode. - */ - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } - -#ifdef CONFIG_KEXEC_CORE - rcu_assign_pointer(crash_vmclear_loaded_vmcss, - crash_vmclear_local_loaded_vmcss); -#endif - vmx_check_vmcs12_offsets(); - - return 0; -} -module_init(vmx_init); -- GitLab From fce56d970d65a20abc774ac808cce991f104a972 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 Feb 2020 09:37:41 -0800 Subject: [PATCH 0092/1278] KVM: nVMX: Use correct root level for nested EPT shadow page tables [ Upstream commit 148d735eb55d32848c3379e460ce365f2c1cbe4b ] Hardcode the EPT page-walk level for L2 to be 4 levels, as KVM's MMU currently also hardcodes the page walk level for nested EPT to be 4 levels. The L2 guest is all but guaranteed to soft hang on its first instruction when L1 is using EPT, as KVM will construct 4-level page tables and then tell hardware to use 5-level page tables. Fixes: 855feb673640 ("KVM: MMU: Add 5 level EPT & Shadow page table support.") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4790994854bb..86037cc11419 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4597,6 +4597,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; -- GitLab From 13851e4667e0d750e6ea3d1a110ca32e68b5b862 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 7 Nov 2019 16:30:48 +0100 Subject: [PATCH 0093/1278] drm/gma500: Fixup fbdev stolen size usage evaluation [ Upstream commit fd1a5e521c3c083bb43ea731aae0f8b95f12b9bd ] psbfb_probe performs an evaluation of the required size from the stolen GTT memory, but gets it wrong in two distinct ways: - The resulting size must be page-size-aligned; - The size to allocate is derived from the surface dimensions, not the fb dimensions. When two connectors are connected with different modes, the smallest will be stored in the fb dimensions, but the size that needs to be allocated must match the largest (surface) dimensions. This is what is used in the actual allocation code. Fix this by correcting the evaluation to conform to the two points above. It allows correctly switching to 16bpp when one connector is e.g. 1920x1080 and the other is 1024x768. Signed-off-by: Paul Kocialkowski Signed-off-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20191107153048.843881-1-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/gma500/framebuffer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 2570c7f647a6..883fc45870dd 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -486,6 +486,7 @@ static int psbfb_probe(struct drm_fb_helper *helper, container_of(helper, struct psb_fbdev, psb_fb_helper); struct drm_device *dev = psb_fbdev->psb_fb_helper.dev; struct drm_psb_private *dev_priv = dev->dev_private; + unsigned int fb_size; int bytespp; bytespp = sizes->surface_bpp / 8; @@ -495,8 +496,11 @@ static int psbfb_probe(struct drm_fb_helper *helper, /* If the mode will not fit in 32bit then switch to 16bit to get a console on full resolution. The X mode setting server will allocate its own 32bit GEM framebuffer */ - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height > - dev_priv->vram_stolen_size) { + fb_size = ALIGN(sizes->surface_width * bytespp, 64) * + sizes->surface_height; + fb_size = ALIGN(fb_size, PAGE_SIZE); + + if (fb_size > dev_priv->vram_stolen_size) { sizes->surface_bpp = 16; sizes->surface_depth = 16; } -- GitLab From 877a96a390db71b9e067ac62794401f58414a89e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Dec 2019 09:34:54 +0100 Subject: [PATCH 0094/1278] cpu/hotplug, stop_machine: Fix stop_machine vs hotplug order [ Upstream commit 45178ac0cea853fe0e405bf11e101bdebea57b15 ] Paul reported a very sporadic, rcutorture induced, workqueue failure. When the planets align, the workqueue rescuer's self-migrate fails and then triggers a WARN for running a work on the wrong CPU. Tejun then figured that set_cpus_allowed_ptr()'s stop_one_cpu() call could be ignored! When stopper->enabled is false, stop_machine will insta complete the work, without actually doing the work. Worse, it will not WARN about this (we really should fix this). It turns out there is a small window where a freshly online'ed CPU is marked 'online' but doesn't yet have the stopper task running: BP AP bringup_cpu() __cpu_up(cpu, idle) --> start_secondary() ... cpu_startup_entry() bringup_wait_for_ap() wait_for_ap_thread() <-- cpuhp_online_idle() while (1) do_idle() ... available to run kthreads ... stop_machine_unpark() stopper->enable = true; Close this by moving the stop_machine_unpark() into cpuhp_online_idle(), such that the stopper thread is ready before we start the idle loop and schedule. Reported-by: "Paul E. McKenney" Debugged-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Paul E. McKenney" Signed-off-by: Sasha Levin --- kernel/cpu.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 49273130e4f1..96c0a868232e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -494,8 +494,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; - /* Unpark the stopper thread and the hotplug thread of the target cpu */ - stop_machine_unpark(cpu); + /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* @@ -1064,8 +1063,8 @@ void notify_cpu_starting(unsigned int cpu) /* * Called from the idle task. Wake up the controlling task which brings the - * stopper and the hotplug thread of the upcoming CPU up and then delegates - * the rest of the online bringup to the hotplug thread. + * hotplug thread of the upcoming CPU up and then delegates the rest of the + * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { @@ -1075,6 +1074,12 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + /* + * Unpart the stopper thread before we start the idle loop (and start + * scheduling); this ensures the stopper task is always available. + */ + stop_machine_unpark(smp_processor_id()); + st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } -- GitLab From 927c64c4e7395a24590ebb1278f39ad4a3a42331 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Dec 2019 12:58:55 +0300 Subject: [PATCH 0095/1278] brcmfmac: Fix use after free in brcmf_sdio_readframes() [ Upstream commit 216b44000ada87a63891a8214c347e05a4aea8fe ] The brcmu_pkt_buf_free_skb() function frees "pkt" so it leads to a static checker warning: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:1974 brcmf_sdio_readframes() error: dereferencing freed memory 'pkt' It looks like there was supposed to be a continue after we free "pkt". Fixes: 4754fceeb9a6 ("brcmfmac: streamline SDIO read frame routine") Signed-off-by: Dan Carpenter Acked-by: Franky Lin Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 4c28b04ea605..d198a8780b96 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -1932,6 +1932,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes) BRCMF_SDIO_FT_NORMAL)) { rd->len = 0; brcmu_pkt_buf_free_skb(pkt); + continue; } bus->sdcnt.rx_readahead_cnt++; if (rd->len != roundup(rd_new.len, 16)) { -- GitLab From bb36a883e32b7d3804441822e6e0d4cb3a47a19b Mon Sep 17 00:00:00 2001 From: Zahari Petkov Date: Mon, 18 Nov 2019 23:02:55 +0200 Subject: [PATCH 0096/1278] leds: pca963x: Fix open-drain initialization [ Upstream commit 697529091ac7a0a90ca349b914bb30641c13c753 ] Before commit bb29b9cccd95 ("leds: pca963x: Add bindings to invert polarity") Mode register 2 was initialized directly with either 0x01 or 0x05 for open-drain or totem pole (push-pull) configuration. Afterwards, MODE2 initialization started using bitwise operations on top of the default MODE2 register value (0x05). Using bitwise OR for setting OUTDRV with 0x01 and 0x05 does not produce correct results. When open-drain is used, instead of setting OUTDRV to 0, the driver keeps it as 1: Open-drain: 0x05 | 0x01 -> 0x05 (0b101 - incorrect) Totem pole: 0x05 | 0x05 -> 0x05 (0b101 - correct but still wrong) Now OUTDRV setting uses correct bitwise operations for initialization: Open-drain: 0x05 & ~0x04 -> 0x01 (0b001 - correct) Totem pole: 0x05 | 0x04 -> 0x05 (0b101 - correct) Additional MODE2 register definitions are introduced now as well. Fixes: bb29b9cccd95 ("leds: pca963x: Add bindings to invert polarity") Signed-off-by: Zahari Petkov Signed-off-by: Pavel Machek Signed-off-by: Sasha Levin --- drivers/leds/leds-pca963x.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c index 3bf9a1271819..88c7313cf869 100644 --- a/drivers/leds/leds-pca963x.c +++ b/drivers/leds/leds-pca963x.c @@ -43,6 +43,8 @@ #define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ #define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ +#define PCA963X_MODE2_OUTDRV 0x04 /* Open-drain or totem pole */ +#define PCA963X_MODE2_INVRT 0x10 /* Normal or inverted direction */ #define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ #define PCA963X_MODE1 0x00 @@ -462,12 +464,12 @@ static int pca963x_probe(struct i2c_client *client, PCA963X_MODE2); /* Configure output: open-drain or totem pole (push-pull) */ if (pdata->outdrv == PCA963X_OPEN_DRAIN) - mode2 |= 0x01; + mode2 &= ~PCA963X_MODE2_OUTDRV; else - mode2 |= 0x05; + mode2 |= PCA963X_MODE2_OUTDRV; /* Configure direction: normal or inverted */ if (pdata->dir == PCA963X_INVERTED) - mode2 |= 0x10; + mode2 |= PCA963X_MODE2_INVRT; i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, mode2); } -- GitLab From 52f192660a4d1529458d72178f50cf431743b961 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Thu, 12 Dec 2019 11:25:55 +0530 Subject: [PATCH 0097/1278] ext4: fix ext4_dax_read/write inode locking sequence for IOCB_NOWAIT [ Upstream commit f629afe3369e9885fd6e9cc7a4f514b6a65cf9e9 ] Apparently our current rwsem code doesn't like doing the trylock, then lock for real scheme. So change our dax read/write methods to just do the trylock for the RWF_NOWAIT case. This seems to fix AIM7 regression in some scalable filesystems upto ~25% in some cases. Claimed in commit 942491c9e6d6 ("xfs: fix AIM7 regression") Reviewed-by: Jan Kara Reviewed-by: Matthew Bobrowski Tested-by: Joseph Qi Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/20191212055557.11151-2-riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4ede0af9d6fe..acec134da57d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -38,9 +38,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock_shared(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) return -EAGAIN; + } else { inode_lock_shared(inode); } /* @@ -188,9 +189,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } ret = ext4_write_checks(iocb, from); -- GitLab From e80cac7b887e1191210978e6e89c40fe4cfdca0b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 23 Dec 2019 18:33:47 +0900 Subject: [PATCH 0098/1278] ALSA: ctl: allow TLV read operation for callback type of element in locked case [ Upstream commit d61fe22c2ae42d9fd76c34ef4224064cca4b04b0 ] A design of ALSA control core allows applications to execute three operations for TLV feature; read, write and command. Furthermore, it allows driver developers to process the operations by two ways; allocated array or callback function. In the former, read operation is just allowed, thus developers uses the latter when device driver supports variety of models or the target model is expected to dynamically change information stored in TLV container. The core also allows applications to lock any element so that the other applications can't perform write operation to the element for element value and TLV information. When the element is locked, write and command operation for TLV information are prohibited as well as element value. Any read operation should be allowed in the case. At present, when an element has callback function for TLV information, TLV read operation returns EPERM if the element is locked. On the other hand, the read operation is success when an element has allocated array for TLV information. In both cases, read operation is success for element value expectedly. This commit fixes the bug. This change can be backported to v4.14 kernel or later. Signed-off-by: Takashi Sakamoto Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20191223093347.15279-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/control.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index 36571cd49be3..a0ce22164957 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1467,8 +1467,9 @@ static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, if (kctl->tlv.c == NULL) return -ENXIO; - /* When locked, this is unavailable. */ - if (vd->owner != NULL && vd->owner != file) + /* Write and command operations are not allowed for locked element. */ + if (op_flag != SNDRV_CTL_TLV_OP_READ && + vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); -- GitLab From 9fb666349ef9a39034d14e798047a553b5d47a3a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 28 Dec 2019 15:30:45 +0200 Subject: [PATCH 0099/1278] gianfar: Fix TX timestamping with a stacked DSA driver [ Upstream commit c26a2c2ddc0115eb088873f5c309cf46b982f522 ] The driver wrongly assumes that it is the only entity that can set the SKBTX_IN_PROGRESS bit of the current skb. Therefore, in the gfar_clean_tx_ring function, where the TX timestamp is collected if necessary, the aforementioned bit is used to discriminate whether or not the TX timestamp should be delivered to the socket's error queue. But a stacked driver such as a DSA switch can also set the SKBTX_IN_PROGRESS bit, which is actually exactly what it should do in order to denote that the hardware timestamping process is undergoing. Therefore, gianfar would misinterpret the "in progress" bit as being its own, and deliver a second skb clone in the socket's error queue, completely throwing off a PTP process which is not expecting to receive it, _even though_ TX timestamping is not enabled for gianfar. There have been discussions [0] as to whether non-MAC drivers need or not to set SKBTX_IN_PROGRESS at all (whose purpose is to avoid sending 2 timestamps, a sw and a hw one, to applications which only expect one). But as of this patch, there are at least 2 PTP drivers that would break in conjunction with gianfar: the sja1105 DSA switch and the felix switch, by way of its ocelot core driver. So regardless of that conclusion, fix the gianfar driver to not do stuff based on flags set by others and not intended for it. [0]: https://www.spinics.net/lists/netdev/msg619699.html Fixes: f0ee7acfcdd4 ("gianfar: Add hardware TX timestamping support") Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/gianfar.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 27d0e3b9833c..e4a2c74a9b47 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2685,13 +2685,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { + bool do_tstamp; + + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en; frags = skb_shinfo(skb)->nr_frags; /* When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + if (unlikely(do_tstamp)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2705,7 +2709,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { next = next_txbd(bdp, base, tx_ring_size); buflen = be16_to_cpu(next->length) + GMAC_FCB_LEN + GMAC_TXPAL_LEN; @@ -2715,7 +2719,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), buflen, DMA_TO_DEVICE); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) & ~0x7UL); -- GitLab From d3151da31bda9c19c4b0050443aaa4eed3875cb9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Dec 2019 20:48:09 +0100 Subject: [PATCH 0100/1278] pinctrl: sh-pfc: sh7264: Fix CAN function GPIOs [ Upstream commit 55b1cb1f03ad5eea39897d0c74035e02deddcff2 ] pinmux_func_gpios[] contains a hole due to the missing function GPIO definition for the "CTX0&CTX1" signal, which is the logical "AND" of the two CAN outputs. Fix this by: - Renaming CRX0_CRX1_MARK to CTX0_CTX1_MARK, as PJ2MD[2:0]=010 configures the combined "CTX0&CTX1" output signal, - Renaming CRX0X1_MARK to CRX0_CRX1_MARK, as PJ3MD[1:0]=10 configures the shared "CRX0/CRX1" input signal, which is fed to both CAN inputs, - Adding the missing function GPIO definition for "CTX0&CTX1" to pinmux_func_gpios[], - Moving all CAN enums next to each other. See SH7262 Group, SH7264 Group User's Manual: Hardware, Rev. 4.00: [1] Figure 1.2 (3) (Pin Assignment for the SH7264 Group (1-Mbyte Version), [2] Figure 1.2 (4) Pin Assignment for the SH7264 Group (640-Kbyte Version, [3] Table 1.4 List of Pins, [4] Figure 20.29 Connection Example when Using This Module as 1-Channel Module (64 Mailboxes x 1 Channel), [5] Table 32.10 Multiplexed Pins (Port J), [6] Section 32.2.30 (3) Port J Control Register 0 (PJCR0). Note that the last 2 disagree about PJ2MD[2:0], which is probably the root cause of this bug. But considering [4], "CTx0&CTx1" in [5] must be correct, and "CRx0&CRx1" in [6] must be wrong. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191218194812.12741-4-geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7264.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c index e1c34e19222e..3ddb9565ed80 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c @@ -500,17 +500,15 @@ enum { SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK, CRX0_MARK, CRX1_MARK, CTX0_MARK, CTX1_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK, PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK, PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK, PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK, IERXD_MARK, IETXD_MARK, - CRX0_CRX1_MARK, WDTOVF_MARK, - CRX0X1_MARK, - /* DMAC */ TEND0_MARK, DACK0_MARK, DREQ0_MARK, TEND1_MARK, DACK1_MARK, DREQ1_MARK, @@ -998,12 +996,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(PJ3_DATA, PJ3MD_00), PINMUX_DATA(CRX1_MARK, PJ3MD_01), - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10), + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10), PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11), PINMUX_DATA(PJ2_DATA, PJ2MD_000), PINMUX_DATA(CTX1_MARK, PJ2MD_001), - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010), + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010), PINMUX_DATA(CS2_MARK, PJ2MD_011), PINMUX_DATA(SCK0_MARK, PJ2MD_100), PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101), @@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0), GPIO_FN(CRX0_CRX1), -- GitLab From e3b1ef9fb6079e9a8a9f236ad6d35eb7e79a7519 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 31 Aug 2019 12:00:24 +0200 Subject: [PATCH 0101/1278] pxa168fb: Fix the function used to release some memory in an error handling path [ Upstream commit 3c911fe799d1c338d94b78e7182ad452c37af897 ] In the probe function, some resources are allocated using 'dma_alloc_wc()', they should be released with 'dma_free_wc()', not 'dma_free_coherent()'. We already use 'dma_free_wc()' in the remove function, but not in the error handling path of the probe function. Also, remove a useless 'PAGE_ALIGN()'. 'info->fix.smem_len' is already PAGE_ALIGNed. Fixes: 638772c7553f ("fb: add support of LCD display controller on pxa168/910 (base layer)") Signed-off-by: Christophe JAILLET Reviewed-by: Lubomir Rintel CC: YueHaibing Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20190831100024.3248-1-christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa168fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index d059d04c63ac..20195d3dbf08 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -769,8 +769,8 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_clk: clk_disable_unprepare(fbi->clk); failed_free_fbmem: - dma_free_coherent(fbi->dev, info->fix.smem_len, - info->screen_base, fbi->fb_start_dma); + dma_free_wc(fbi->dev, info->fix.smem_len, + info->screen_base, fbi->fb_start_dma); failed_free_info: kfree(info); @@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), + dma_free_wc(fbi->dev, info->fix.smem_len, info->screen_base, info->fix.smem_start); clk_disable_unprepare(fbi->clk); -- GitLab From 0ea58ac76c2f7456b13d064f33cf198da64165fc Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 21 Nov 2019 08:55:24 +0100 Subject: [PATCH 0102/1278] media: i2c: mt9v032: fix enum mbus codes and frame sizes [ Upstream commit 1451d5ae351d938a0ab1677498c893f17b9ee21d ] This driver supports both the mt9v032 (color) and the mt9v022 (mono) sensors. Depending on which sensor is used, the format from the sensor is different. The format.code inside the dev struct holds this information. The enum mbus and enum frame sizes need to take into account both type of sensors, not just the color one. To solve this, use the format.code in these functions instead of the hardcoded bayer color format (which is only used for mt9v032). [Sakari Ailus: rewrapped commit message] Suggested-by: Wenyou Yang Signed-off-by: Eugen Hristev Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/mt9v032.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c index 8a430640c85d..1a20d0d558d3 100644 --- a/drivers/media/i2c/mt9v032.c +++ b/drivers/media/i2c/mt9v032.c @@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_mbus_code_enum *code) { + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + if (code->index > 0) return -EINVAL; - code->code = MEDIA_BUS_FMT_SGRBG10_1X10; + code->code = mt9v032->format.code; return 0; } @@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_frame_size_enum *fse) { - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10) + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + + if (fse->index >= 3) + return -EINVAL; + if (mt9v032->format.code != fse->code) return -EINVAL; fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index); -- GitLab From ad28e1b4ce466b17131149db9e372ba9ea5baccc Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 28 Oct 2019 19:54:22 +1100 Subject: [PATCH 0103/1278] powerpc/powernv/iov: Ensure the pdn for VFs always contains a valid PE number [ Upstream commit 3b5b9997b331e77ce967eba2c4bc80dc3134a7fe ] On pseries there is a bug with adding hotplugged devices to an IOMMU group. For a number of dumb reasons fixing that bug first requires re-working how VFs are configured on PowerNV. For background, on PowerNV we use the pcibios_sriov_enable() hook to do two things: 1. Create a pci_dn structure for each of the VFs, and 2. Configure the PHB's internal BARs so the MMIO range for each VF maps to a unique PE. Roughly speaking a PE is the hardware counterpart to a Linux IOMMU group since all the devices in a PE share the same IOMMU table. A PE also defines the set of devices that should be isolated in response to a PCI error (i.e. bad DMA, UR/CA, AER events, etc). When isolated all MMIO and DMA traffic to and from devicein the PE is blocked by the root complex until the PE is recovered by the OS. The requirement to block MMIO causes a giant headache because the P8 PHB generally uses a fixed mapping between MMIO addresses and PEs. As a result we need to delay configuring the IOMMU groups for device until after MMIO resources are assigned. For physical devices (i.e. non-VFs) the PE assignment is done in pcibios_setup_bridge() which is called immediately after the MMIO resources for downstream devices (and the bridge's windows) are assigned. For VFs the setup is more complicated because: a) pcibios_setup_bridge() is not called again when VFs are activated, and b) The pci_dev for VFs are created by generic code which runs after pcibios_sriov_enable() is called. The work around for this is a two step process: 1. A fixup in pcibios_add_device() is used to initialised the cached pe_number in pci_dn, then 2. A bus notifier then adds the device to the IOMMU group for the PE specified in pci_dn->pe_number. A side effect fixing the pseries bug mentioned in the first paragraph is moving the fixup out of pcibios_add_device() and into pcibios_bus_add_device(), which is called much later. This results in step 2. failing because pci_dn->pe_number won't be initialised when the bus notifier is run. We can fix this by removing the need for the fixup. The PE for a VF is known before the VF is even scanned so we can initialise pci_dn->pe_number pcibios_sriov_enable() instead. Unfortunately, moving the initialisation causes two problems: 1. We trip the WARN_ON() in the current fixup code, and 2. The EEH core clears pdn->pe_number when recovering a VF and relies on the fixup to correctly re-set it. The only justification for either of these is a comment in eeh_rmv_device() suggesting that pdn->pe_number *must* be set to IODA_INVALID_PE in order for the VF to be scanned. However, this comment appears to have no basis in reality. Both bugs can be fixed by just deleting the code. Tested-by: Alexey Kardashevskiy Reviewed-by: Alexey Kardashevskiy Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191028085424.12006-1-oohall@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/eeh_driver.c | 6 ------ arch/powerpc/platforms/powernv/pci-ioda.c | 19 +++++++++++++++---- arch/powerpc/platforms/powernv/pci.c | 4 ---- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 470284f9e4f6..5a48c93aaa1b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_list, &rmv_data->edev_list); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d3d5796f7df6..36ef504eeab3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1523,6 +1523,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1535,13 +1539,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1555,6 +1557,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 961c131a5b7e..844ca1886063 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -978,16 +978,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) struct pnv_phb *phb = hose->private_data; #ifdef CONFIG_PCI_IOV struct pnv_ioda_pe *pe; - struct pci_dn *pdn; /* Fix the VF pdn PE number */ if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); list_for_each_entry(pe, &phb->ioda.pe_list, list) { if (pe->rid == ((pdev->bus->number << 8) | (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; pe->pdev = pdev; break; } -- GitLab From 13c15ab8b31731c4006e8c53918b8bba9548c51a Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 21:26:05 +0800 Subject: [PATCH 0104/1278] gpio: gpio-grgpio: fix possible sleep-in-atomic-context bugs in grgpio_irq_map/unmap() [ Upstream commit e36eaf94be8f7bc4e686246eed3cf92d845e2ef8 ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: drivers/gpio/gpio-grgpio.c, 261: request_irq in grgpio_irq_map drivers/gpio/gpio-grgpio.c, 255: _raw_spin_lock_irqsave in grgpio_irq_map drivers/gpio/gpio-grgpio.c, 318: free_irq in grgpio_irq_unmap drivers/gpio/gpio-grgpio.c, 299: _raw_spin_lock_irqsave in grgpio_irq_unmap request_irq() and free_irq() can sleep at runtime. To fix these bugs, request_irq() and free_irq() are called without holding the spinlock. These bugs are found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Link: https://lore.kernel.org/r/20191218132605.10594-1-baijiaju1990@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-grgpio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index 6544a16ab02e..7541bd327e6c 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, lirq->irq = irq; uirq = &priv->uirqs[lirq->index]; if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, dev_name(priv->dev), priv); if (ret) { dev_err(priv->dev, "Could not request underlying irq %d\n", uirq->uirq); - - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); - return ret; } + spin_lock_irqsave(&priv->gc.bgpio_lock, flags); } uirq->refcnt++; @@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) if (index >= 0) { uirq = &priv->uirqs[lirq->index]; uirq->refcnt--; - if (uirq->refcnt == 0) + if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); free_irq(uirq->uirq, priv); + return; + } } spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); -- GitLab From 28820c5802f9f83c655ab09ccae8289103ce1490 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 13 Nov 2019 16:16:25 -0500 Subject: [PATCH 0105/1278] char/random: silence a lockdep splat with printk() [ Upstream commit 1b710b1b10eff9d46666064ea25f079f70bc67a8 ] Sergey didn't like the locking order, uart_port->lock -> tty_port->lock uart_write (uart_port->lock) __uart_start pl011_start_tx pl011_tx_chars uart_write_wakeup tty_port_tty_wakeup tty_port_default tty_port_tty_get (tty_port->lock) but those code is so old, and I have no clue how to de-couple it after checking other locks in the splat. There is an onging effort to make all printk() as deferred, so until that happens, workaround it for now as a short-term fix. LTP: starting iogen01 (export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$) WARNING: possible circular locking dependency detected ------------------------------------------------------ doio/49441 is trying to acquire lock: ffff008b7cff7290 (&(&zone->lock)->rlock){..-.}, at: rmqueue+0x138/0x2050 but task is already holding lock: 60ff000822352818 (&pool->lock/1){-.-.}, at: start_flush_work+0xd8/0x3f0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&pool->lock/1){-.-.}: lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 __queue_work+0x4b4/0xa10 queue_work_on+0xac/0x11c tty_schedule_flip+0x84/0xbc tty_flip_buffer_push+0x1c/0x28 pty_write+0x98/0xd0 n_tty_write+0x450/0x60c tty_write+0x338/0x474 __vfs_write+0x88/0x214 vfs_write+0x12c/0x1a4 redirected_tty_write+0x90/0xdc do_loop_readv_writev+0x140/0x180 do_iter_write+0xe0/0x10c vfs_writev+0x134/0x1cc do_writev+0xbc/0x130 __arm64_sys_writev+0x58/0x8c el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 -> #3 (&(&port->lock)->rlock){-.-.}: lock_acquire+0x320/0x360 _raw_spin_lock_irqsave+0x7c/0x9c tty_port_tty_get+0x24/0x60 tty_port_default_wakeup+0x1c/0x3c tty_port_tty_wakeup+0x34/0x40 uart_write_wakeup+0x28/0x44 pl011_tx_chars+0x1b8/0x270 pl011_start_tx+0x24/0x70 __uart_start+0x5c/0x68 uart_write+0x164/0x1c8 do_output_char+0x33c/0x348 n_tty_write+0x4bc/0x60c tty_write+0x338/0x474 redirected_tty_write+0xc0/0xdc do_loop_readv_writev+0x140/0x180 do_iter_write+0xe0/0x10c vfs_writev+0x134/0x1cc do_writev+0xbc/0x130 __arm64_sys_writev+0x58/0x8c el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 -> #2 (&port_lock_key){-.-.}: lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 pl011_console_write+0xec/0x2cc console_unlock+0x794/0x96c vprintk_emit+0x260/0x31c vprintk_default+0x54/0x7c vprintk_func+0x218/0x254 printk+0x7c/0xa4 register_console+0x734/0x7b0 uart_add_one_port+0x734/0x834 pl011_register_port+0x6c/0xac sbsa_uart_probe+0x234/0x2ec platform_drv_probe+0xd4/0x124 really_probe+0x250/0x71c driver_probe_device+0xb4/0x200 __device_attach_driver+0xd8/0x188 bus_for_each_drv+0xbc/0x110 __device_attach+0x120/0x220 device_initial_probe+0x20/0x2c bus_probe_device+0x54/0x100 device_add+0xae8/0xc2c platform_device_add+0x278/0x3b8 platform_device_register_full+0x238/0x2ac acpi_create_platform_device+0x2dc/0x3a8 acpi_bus_attach+0x390/0x3cc acpi_bus_attach+0x108/0x3cc acpi_bus_attach+0x108/0x3cc acpi_bus_attach+0x108/0x3cc acpi_bus_scan+0x7c/0xb0 acpi_scan_init+0xe4/0x304 acpi_init+0x100/0x114 do_one_initcall+0x348/0x6a0 do_initcall_level+0x190/0x1fc do_basic_setup+0x34/0x4c kernel_init_freeable+0x19c/0x260 kernel_init+0x18/0x338 ret_from_fork+0x10/0x18 -> #1 (console_owner){-...}: lock_acquire+0x320/0x360 console_lock_spinning_enable+0x6c/0x7c console_unlock+0x4f8/0x96c vprintk_emit+0x260/0x31c vprintk_default+0x54/0x7c vprintk_func+0x218/0x254 printk+0x7c/0xa4 get_random_u64+0x1c4/0x1dc shuffle_pick_tail+0x40/0xac __free_one_page+0x424/0x710 free_one_page+0x70/0x120 __free_pages_ok+0x61c/0xa94 __free_pages_core+0x1bc/0x294 memblock_free_pages+0x38/0x48 __free_pages_memory+0xcc/0xfc __free_memory_core+0x70/0x78 free_low_memory_core_early+0x148/0x18c memblock_free_all+0x18/0x54 mem_init+0xb4/0x17c mm_init+0x14/0x38 start_kernel+0x19c/0x530 -> #0 (&(&zone->lock)->rlock){..-.}: validate_chain+0xf6c/0x2e2c __lock_acquire+0x868/0xc2c lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 rmqueue+0x138/0x2050 get_page_from_freelist+0x474/0x688 __alloc_pages_nodemask+0x3b4/0x18dc alloc_pages_current+0xd0/0xe0 alloc_slab_page+0x2b4/0x5e0 new_slab+0xc8/0x6bc ___slab_alloc+0x3b8/0x640 kmem_cache_alloc+0x4b4/0x588 __debug_object_init+0x778/0x8b4 debug_object_init_on_stack+0x40/0x50 start_flush_work+0x16c/0x3f0 __flush_work+0xb8/0x124 flush_work+0x20/0x30 xlog_cil_force_lsn+0x88/0x204 [xfs] xfs_log_force_lsn+0x128/0x1b8 [xfs] xfs_file_fsync+0x3c4/0x488 [xfs] vfs_fsync_range+0xb0/0xd0 generic_write_sync+0x80/0xa0 [xfs] xfs_file_buffered_aio_write+0x66c/0x6e4 [xfs] xfs_file_write_iter+0x1a0/0x218 [xfs] __vfs_write+0x1cc/0x214 vfs_write+0x12c/0x1a4 ksys_write+0xb0/0x120 __arm64_sys_write+0x54/0x88 el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 other info that might help us debug this: Chain exists of: &(&zone->lock)->rlock --> &(&port->lock)->rlock --> &pool->lock/1 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&pool->lock/1); lock(&(&port->lock)->rlock); lock(&pool->lock/1); lock(&(&zone->lock)->rlock); *** DEADLOCK *** 4 locks held by doio/49441: #0: a0ff00886fc27408 (sb_writers#8){.+.+}, at: vfs_write+0x118/0x1a4 #1: 8fff00080810dfe0 (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0x2a8/0x300 [xfs] #2: ffff9000129f2390 (rcu_read_lock){....}, at: rcu_lock_acquire+0x8/0x38 #3: 60ff000822352818 (&pool->lock/1){-.-.}, at: start_flush_work+0xd8/0x3f0 stack backtrace: CPU: 48 PID: 49441 Comm: doio Tainted: G W Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.11 06/18/2019 Call trace: dump_backtrace+0x0/0x248 show_stack+0x20/0x2c dump_stack+0xe8/0x150 print_circular_bug+0x368/0x380 check_noncircular+0x28c/0x294 validate_chain+0xf6c/0x2e2c __lock_acquire+0x868/0xc2c lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 rmqueue+0x138/0x2050 get_page_from_freelist+0x474/0x688 __alloc_pages_nodemask+0x3b4/0x18dc alloc_pages_current+0xd0/0xe0 alloc_slab_page+0x2b4/0x5e0 new_slab+0xc8/0x6bc ___slab_alloc+0x3b8/0x640 kmem_cache_alloc+0x4b4/0x588 __debug_object_init+0x778/0x8b4 debug_object_init_on_stack+0x40/0x50 start_flush_work+0x16c/0x3f0 __flush_work+0xb8/0x124 flush_work+0x20/0x30 xlog_cil_force_lsn+0x88/0x204 [xfs] xfs_log_force_lsn+0x128/0x1b8 [xfs] xfs_file_fsync+0x3c4/0x488 [xfs] vfs_fsync_range+0xb0/0xd0 generic_write_sync+0x80/0xa0 [xfs] xfs_file_buffered_aio_write+0x66c/0x6e4 [xfs] xfs_file_write_iter+0x1a0/0x218 [xfs] __vfs_write+0x1cc/0x214 vfs_write+0x12c/0x1a4 ksys_write+0xb0/0x120 __arm64_sys_write+0x54/0x88 el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 Reviewed-by: Sergey Senozhatsky Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/1573679785-21068-1-git-send-email-cai@lca.pw Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- drivers/char/random.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e6efa07e9f9e..50d5846acf48 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1598,8 +1598,9 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - pr_notice("random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); + printk_deferred(KERN_NOTICE "random: %s called from %pS " + "with crng_init=%d\n", func_name, caller, + crng_init); } /* -- GitLab From 3cd0d6e3b297a07c26a38600cb47d9c7932656ea Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 19 Dec 2019 11:34:01 +0100 Subject: [PATCH 0106/1278] media: sti: bdisp: fix a possible sleep-in-atomic-context bug in bdisp_device_run() [ Upstream commit bb6d42061a05d71dd73f620582d9e09c8fbf7f5b ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: drivers/media/platform/sti/bdisp/bdisp-hw.c, 385: msleep in bdisp_hw_reset drivers/media/platform/sti/bdisp/bdisp-v4l2.c, 341: bdisp_hw_reset in bdisp_device_run drivers/media/platform/sti/bdisp/bdisp-v4l2.c, 317: _raw_spin_lock_irqsave in bdisp_device_run To fix this bug, msleep() is replaced with udelay(). This bug is found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Reviewed-by: Fabien Dessenne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/sti/bdisp/bdisp-hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index b7892f3efd98..5c4c3f0c57be 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -14,8 +14,8 @@ #define MAX_SRC_WIDTH 2048 /* Reset & boot poll config */ -#define POLL_RST_MAX 50 -#define POLL_RST_DELAY_MS 20 +#define POLL_RST_MAX 500 +#define POLL_RST_DELAY_MS 2 enum bdisp_target_plan { BDISP_RGB, @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp) for (i = 0; i < POLL_RST_MAX; i++) { if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE) break; - msleep(POLL_RST_DELAY_MS); + udelay(POLL_RST_DELAY_MS * 1000); } if (i == POLL_RST_MAX) dev_err(bdisp->dev, "Reset timeout\n"); -- GitLab From 95c15f8e630ba55eec2d7fd9de88767d1ee398fd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 28 Dec 2019 00:04:47 +0100 Subject: [PATCH 0107/1278] pinctrl: baytrail: Do not clear IRQ flags on direct-irq enabled pins [ Upstream commit a23680594da7a9e2696dbcf4f023e9273e2fa40b ] Suspending Goodix touchscreens requires changing the interrupt pin to output before sending them a power-down command. Followed by wiggling the interrupt pin to wake the device up, after which it is put back in input mode. On Bay Trail devices with a Goodix touchscreen direct-irq mode is used in combination with listing the pin as a normal GpioIo resource. This works fine, until the goodix driver gets rmmod-ed and then insmod-ed again. In this case byt_gpio_disable_free() calls byt_gpio_clear_triggering() which clears the IRQ flags and after that the (direct) IRQ no longer triggers. This commit fixes this by adding a check for the BYT_DIRECT_IRQ_EN flag to byt_gpio_clear_triggering(). Note that byt_gpio_clear_triggering() only gets called from byt_gpio_disable_free() for direct-irq enabled pins, as these are excluded from the irq_valid mask by byt_init_irq_valid_mask(). Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Reviewed-by: Linus Walleij Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-baytrail.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 9df5d29d708d..4fb3e44f9133 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -958,7 +958,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset) raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */ + if (value & BYT_DIRECT_IRQ_EN) + /* nothing to do */ ; + else + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + writel(value, reg); raw_spin_unlock_irqrestore(&byt_lock, flags); } -- GitLab From bfd75d7bf197c9336a46042e9819f01075faa88e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:37 +0100 Subject: [PATCH 0108/1278] efi/x86: Map the entire EFI vendor string before copying it [ Upstream commit ffc2760bcf2dba0dbef74013ed73eea8310cc52c ] Fix a couple of issues with the way we map and copy the vendor string: - we map only 2 bytes, which usually works since you get at least a page, but if the vendor string happens to cross a page boundary, a crash will result - only call early_memunmap() if early_memremap() succeeded, or we will call it with a NULL address which it doesn't like, - while at it, switch to early_memremap_ro(), and array indexing rather than pointer dereferencing to read the CHAR16 characters. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Fixes: 5b83683f32b1 ("x86: EFI runtime service support") Link: https://lkml.kernel.org/r/20200103113953.9571-5-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 335a62e74a2e..5b0275310070 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -480,7 +480,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -505,14 +504,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, -- GitLab From f524a25696a3524ccdba397944c568df7d762026 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 10 Jan 2020 09:30:42 +0800 Subject: [PATCH 0109/1278] MIPS: Loongson: Fix potential NULL dereference in loongson3_platform_init() [ Upstream commit 72d052e28d1d2363f9107be63ef3a3afdea6143c ] If kzalloc fails, it should return -ENOMEM, otherwise may trigger a NULL pointer dereference. Fixes: 3adeb2566b9b ("MIPS: Loongson: Improve LEFI firmware interface") Signed-off-by: Tiezhu Yang Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Huacai Chen Cc: Jiaxun Yang Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/loongson64/loongson-3/platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 25a97cc0ee33..0db4cc3196eb 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c @@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; -- GitLab From 5645615adf3f4bf6ef010943691cb0ebd1fbb065 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Jan 2020 00:07:42 -0800 Subject: [PATCH 0110/1278] sparc: Add .exit.data section. [ Upstream commit 548f0b9a5f4cffa0cecf62eb12aa8db682e4eee6 ] This fixes build errors of all sorts. Also, emit .exit.text unconditionally. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/kernel/vmlinux.lds.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 5a2344574f39..4323dc4ae4c7 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -167,12 +167,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; -- GitLab From 0151b03f43f2d295a6949454434074b34a262e06 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 17:44:05 +0800 Subject: [PATCH 0111/1278] uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol() [ Upstream commit b74351287d4bd90636c3f48bc188c2f53824c2d4 ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: kernel/irq/manage.c, 523: synchronize_irq in disable_irq drivers/uio/uio_dmem_genirq.c, 140: disable_irq in uio_dmem_genirq_irqcontrol drivers/uio/uio_dmem_genirq.c, 134: _raw_spin_lock_irqsave in uio_dmem_genirq_irqcontrol synchronize_irq() can sleep at runtime. To fix this bug, disable_irq() is called without holding the spinlock. This bug is found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Link: https://lore.kernel.org/r/20191218094405.6009-1-baijiaju1990@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/uio/uio_dmem_genirq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index e1134a4d97f3..a00b4aee6c79 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) if (irq_on) { if (test_and_clear_bit(0, &priv->flags)) enable_irq(dev_info->irq); + spin_unlock_irqrestore(&priv->lock, flags); } else { - if (!test_and_set_bit(0, &priv->flags)) + if (!test_and_set_bit(0, &priv->flags)) { + spin_unlock_irqrestore(&priv->lock, flags); disable_irq(dev_info->irq); + } } - spin_unlock_irqrestore(&priv->lock, flags); return 0; } -- GitLab From d2de2d9f9807dde09d3e1dc19531dfdd078cd747 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 11:43:49 +0800 Subject: [PATCH 0112/1278] usb: gadget: udc: fix possible sleep-in-atomic-context bugs in gr_probe() [ Upstream commit 9c1ed62ae0690dfe5d5e31d8f70e70a95cb48e52 ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: drivers/usb/gadget/udc/core.c, 1175: kzalloc(GFP_KERNEL) in usb_add_gadget_udc_release drivers/usb/gadget/udc/core.c, 1272: usb_add_gadget_udc_release in usb_add_gadget_udc drivers/usb/gadget/udc/gr_udc.c, 2186: usb_add_gadget_udc in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe drivers/usb/gadget/udc/core.c, 1195: mutex_lock in usb_add_gadget_udc_release drivers/usb/gadget/udc/core.c, 1272: usb_add_gadget_udc_release in usb_add_gadget_udc drivers/usb/gadget/udc/gr_udc.c, 2186: usb_add_gadget_udc in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe drivers/usb/gadget/udc/gr_udc.c, 212: debugfs_create_file in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2197: gr_dfs_create in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2114: devm_request_threaded_irq in gr_request_irq drivers/usb/gadget/udc/gr_udc.c, 2202: gr_request_irq in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe kzalloc(GFP_KERNEL), mutex_lock(), debugfs_create_file() and devm_request_threaded_irq() can sleep at runtime. To fix these possible bugs, usb_add_gadget_udc(), gr_dfs_create() and gr_request_irq() are called without handling the spinlock. These bugs are found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/gr_udc.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 1f9941145746..feb73a1c42ef 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev) return -ENOMEM; } - spin_lock(&dev->lock); - /* Inside lock so that no gadget can use this udc until probe is done */ retval = usb_add_gadget_udc(dev->dev, &dev->gadget); if (retval) { @@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev) } dev->added = 1; + spin_lock(&dev->lock); + retval = gr_udc_init(dev); - if (retval) + if (retval) { + spin_unlock(&dev->lock); goto out; - - gr_dfs_create(dev); + } /* Clear all interrupt enables that might be left on since last boot */ gr_disable_interrupts_and_pullup(dev); + spin_unlock(&dev->lock); + + gr_dfs_create(dev); + retval = gr_request_irq(dev, dev->irq); if (retval) { dev_err(dev->dev, "Failed to request irq %d\n", dev->irq); @@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev) dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq); out: - spin_unlock(&dev->lock); - if (retval) gr_remove(pdev); -- GitLab From 197bdc2a01078753fd9f57a17e08eb97ce7d30ba Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 19 Dec 2019 11:34:31 +0000 Subject: [PATCH 0113/1278] usb: dwc2: Fix IN FIFO allocation [ Upstream commit 644139f8b64d818f6345351455f14471510879a5 ] On chips with fewer FIFOs than endpoints (for example RK3288 which has 9 endpoints, but only 6 which are cabable of input), the DPTXFSIZN registers above the FIFO count may return invalid values. With logging added on startup, I see: dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=1 sz=256 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=2 sz=128 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=3 sz=128 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=4 sz=64 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=5 sz=64 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=6 sz=32 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=7 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=8 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=9 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=10 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=11 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=12 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=13 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=14 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=15 sz=0 but: # cat /sys/kernel/debug/ff580000.usb/fifo Non-periodic FIFOs: RXFIFO: Size 275 NPTXFIFO: Size 16, Start 0x00000113 Periodic TXFIFOs: DPTXFIFO 1: Size 256, Start 0x00000123 DPTXFIFO 2: Size 128, Start 0x00000223 DPTXFIFO 3: Size 128, Start 0x000002a3 DPTXFIFO 4: Size 64, Start 0x00000323 DPTXFIFO 5: Size 64, Start 0x00000363 DPTXFIFO 6: Size 32, Start 0x000003a3 DPTXFIFO 7: Size 0, Start 0x000003e3 DPTXFIFO 8: Size 0, Start 0x000003a3 DPTXFIFO 9: Size 256, Start 0x00000123 so it seems that FIFO 9 is mirroring FIFO 1. Fix the allocation by using the FIFO count instead of the endpoint count when selecting a FIFO for an endpoint. Acked-by: Minas Harutyunyan Signed-off-by: John Keeping Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc2/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 4af9a1c652ed..aeb6f7c84ea0 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3933,11 +3933,12 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, * a unique tx-fifo even if it is non-periodic. */ if (dir_in && hsotg->dedicated_fifos) { + unsigned fifo_count = dwc2_hsotg_tx_fifo_count(hsotg); u32 fifo_index = 0; u32 fifo_size = UINT_MAX; size = hs_ep->ep.maxpacket * hs_ep->mc; - for (i = 1; i < hsotg->num_of_eps; ++i) { + for (i = 1; i <= fifo_count; ++i) { if (hsotg->fifo_map & (1 << i)) continue; val = dwc2_readl(hsotg->regs + DPTXFSIZN(i)); -- GitLab From 2a41bcb3bdc9236c3fe243b014e0d070f3bacd49 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Dec 2019 21:32:46 +0000 Subject: [PATCH 0114/1278] clocksource/drivers/bcm2835_timer: Fix memory leak of timer [ Upstream commit 2052d032c06761330bca4944bb7858b00960e868 ] Currently when setup_irq fails the error exit path will leak the recently allocated timer structure. Originally the code would throw a panic but a later commit changed the behaviour to return via the err_iounmap path and hence we now have a memory leak. Fix this by adding a err_timer_free error path that kfree's timer. Addresses-Coverity: ("Resource Leak") Fixes: 524a7f08983d ("clocksource/drivers/bcm2835_timer: Convert init function to return error") Signed-off-by: Colin Ian King Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20191219213246.34437-1-colin.king@canonical.com Signed-off-by: Sasha Levin --- drivers/clocksource/bcm2835_timer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 39e489a96ad7..8894cfc32be0 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -134,7 +134,7 @@ static int __init bcm2835_timer_init(struct device_node *node) ret = setup_irq(irq, &timer->act); if (ret) { pr_err("Can't set up timer IRQ\n"); - goto err_iounmap; + goto err_timer_free; } clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff); @@ -143,6 +143,9 @@ static int __init bcm2835_timer_init(struct device_node *node) return 0; +err_timer_free: + kfree(timer); + err_iounmap: iounmap(base); return ret; -- GitLab From 0ffec5708942c021acf93662079be1cb1cebe738 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Mon, 13 Jan 2020 22:11:58 +0530 Subject: [PATCH 0115/1278] kselftest: Minimise dependency of get_size on C library interfaces [ Upstream commit 6b64a650f0b2ae3940698f401732988699eecf7a ] It was observed[1] on arm64 that __builtin_strlen led to an infinite loop in the get_size selftest. This is because __builtin_strlen (and other builtins) may sometimes result in a call to the C library function. The C library implementation of strlen uses an IFUNC resolver to load the most efficient strlen implementation for the underlying machine and hence has a PLT indirection even for static binaries. Because this binary avoids the C library startup routines, the PLT initialization never happens and hence the program gets stuck in an infinite loop. On x86_64 the __builtin_strlen just happens to expand inline and avoid the call but that is not always guaranteed. Further, while testing on x86_64 (Fedora 31), it was observed that the test also failed with a segfault inside write() because the generated code for the write function in glibc seems to access TLS before the syscall (probably due to the cancellation point check) and fails because TLS is not initialised. To mitigate these problems, this patch reduces the interface with the C library to just the syscall function. The syscall function still sets errno on failure, which is undesirable but for now it only affects cases where syscalls fail. [1] https://bugs.linaro.org/show_bug.cgi?id=5479 Signed-off-by: Siddhesh Poyarekar Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Reviewed-by: Tim Bird Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/size/get_size.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c index d4b59ab979a0..f55943b6d1e2 100644 --- a/tools/testing/selftests/size/get_size.c +++ b/tools/testing/selftests/size/get_size.c @@ -12,23 +12,35 @@ * own execution. It also attempts to have as few dependencies * on kernel features as possible. * - * It should be statically linked, with startup libs avoided. - * It uses no library calls, and only the following 3 syscalls: + * It should be statically linked, with startup libs avoided. It uses + * no library calls except the syscall() function for the following 3 + * syscalls: * sysinfo(), write(), and _exit() * * For output, it avoids printf (which in some C libraries * has large external dependencies) by implementing it's own * number output and print routines, and using __builtin_strlen() + * + * The test may crash if any of the above syscalls fails because in some + * libc implementations (e.g. the GNU C Library) errno is saved in + * thread-local storage, which does not get initialized due to avoiding + * startup libs. */ #include #include +#include #define STDOUT_FILENO 1 static int print(const char *s) { - return write(STDOUT_FILENO, s, __builtin_strlen(s)); + size_t len = 0; + + while (s[len] != '\0') + len++; + + return syscall(SYS_write, STDOUT_FILENO, s, len); } static inline char *num_to_str(unsigned long num, char *buf, int len) @@ -80,12 +92,12 @@ void _start(void) print("TAP version 13\n"); print("# Testing system size.\n"); - ccode = sysinfo(&info); + ccode = syscall(SYS_sysinfo, &info); if (ccode < 0) { print("not ok 1"); print(test_name); print(" ---\n reason: \"could not get sysinfo\"\n ...\n"); - _exit(ccode); + syscall(SYS_exit, ccode); } print("ok 1"); print(test_name); @@ -101,5 +113,5 @@ void _start(void) print(" ...\n"); print("1..1\n"); - _exit(0); + syscall(SYS_exit, 0); } -- GitLab From 5e9b22da0235e6dd6515c8701c76187dbc6852cb Mon Sep 17 00:00:00 2001 From: Kai Li Date: Sat, 11 Jan 2020 10:25:42 +0800 Subject: [PATCH 0116/1278] jbd2: clear JBD2_ABORT flag before journal_reset to update log tail info when load journal [ Upstream commit a09decff5c32060639a685581c380f51b14e1fc2 ] If the journal is dirty when the filesystem is mounted, jbd2 will replay the journal but the journal superblock will not be updated by journal_reset() because JBD2_ABORT flag is still set (it was set in journal_init_common()). This is problematic because when a new transaction is then committed, it will be recorded in block 1 (journal->j_tail was set to 1 in journal_reset()). If unclean shutdown happens again before the journal superblock is updated, the new recorded transaction will not be replayed during the next mount (because of stale sb->s_start and sb->s_sequence values) which can lead to filesystem corruption. Fixes: 85e0c4e89c1b ("jbd2: if the journal is aborted then don't allow update of the log tail") Signed-off-by: Kai Li Link: https://lore.kernel.org/r/20200111022542.5008-1-li.kai4@h3c.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/journal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index d3cce5c86fd9..b72be822f04f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1687,6 +1687,11 @@ int jbd2_journal_load(journal_t *journal) journal->j_devname); return -EFSCORRUPTED; } + /* + * clear JBD2_ABORT flag initialized in journal_init_common + * here to update log tail information with the newest seq. + */ + journal->j_flags &= ~JBD2_ABORT; /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory @@ -1694,7 +1699,6 @@ int jbd2_journal_load(journal_t *journal) if (journal_reset(journal)) goto recovery_error; - journal->j_flags &= ~JBD2_ABORT; journal->j_flags |= JBD2_LOADED; return 0; -- GitLab From c118cd45f46a0d6b985e9959d020b5ec2f5d58dc Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 7 Jan 2020 18:04:10 -0500 Subject: [PATCH 0117/1278] x86/sysfb: Fix check for bad VRAM size [ Upstream commit dacc9092336be20b01642afe1a51720b31f60369 ] When checking whether the reported lfb_size makes sense, the height * stride result is page-aligned before seeing whether it exceeds the reported size. This doesn't work if height * stride is not an exact number of pages. For example, as reported in the kernel bugzilla below, an 800x600x32 EFI framebuffer gets skipped because of this. Move the PAGE_ALIGN to after the check vs size. Reported-by: Christopher Head Tested-by: Christopher Head Signed-off-by: Arvind Sankar Signed-off-by: Borislav Petkov Link: https://bugzilla.kernel.org/show_bug.cgi?id=206051 Link: https://lkml.kernel.org/r/20200107230410.2291947-1-nivedita@alum.mit.edu Signed-off-by: Sasha Levin --- arch/x86/kernel/sysfb_simplefb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 85195d447a92..f3215346e47f 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); -- GitLab From 8f16da1dcdac5f90e77f075160c110101004d48d Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Tue, 9 Sep 2014 22:49:41 +0100 Subject: [PATCH 0118/1278] tracing: Fix tracing_stat return values in error handling paths [ Upstream commit afccc00f75bbbee4e4ae833a96c2d29a7259c693 ] tracing_stat_init() was always returning '0', even on the error paths. It now returns -ENODEV if tracing_init_dentry() fails or -ENOMEM if it fails to created the 'trace_stat' debugfs directory. Link: http://lkml.kernel.org/r/1410299381-20108-1-git-send-email-luis.henriques@canonical.com Fixes: ed6f1c996bfe4 ("tracing: Check return value of tracing_init_dentry()") Signed-off-by: Luis Henriques [ Pulled from the archeological digging of my INBOX ] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_stat.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75bf1bcb4a8a..bf68af63538b 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -278,18 +278,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, -- GitLab From cf6eb046e53cc36534562c04cc6ce130c40dfcfc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 24 Jan 2020 17:47:49 -0500 Subject: [PATCH 0119/1278] tracing: Fix very unlikely race of registering two stat tracers [ Upstream commit dfb6cd1e654315168e36d947471bd2a0ccd834ae ] Looking through old emails in my INBOX, I came across a patch from Luis Henriques that attempted to fix a race of two stat tracers registering the same stat trace (extremely unlikely, as this is done in the kernel, and probably doesn't even exist). The submitted patch wasn't quite right as it needed to deal with clean up a bit better (if two stat tracers were the same, it would have the same files). But to make the code cleaner, all we needed to do is to keep the all_stat_sessions_mutex held for most of the registering function. Link: http://lkml.kernel.org/r/1410299375-20068-1-git-send-email-luis.henriques@canonical.com Fixes: 002bb86d8d42f ("tracing/ftrace: separate events tracing and stats tracing engine") Reported-by: Luis Henriques Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_stat.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index bf68af63538b..92b76f9e25ed 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -306,7 +306,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -317,17 +317,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -336,15 +334,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) -- GitLab From 7139b70b1ae5ea076f03b888e3bec19e268faa28 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 4 Dec 2019 20:46:12 +0800 Subject: [PATCH 0120/1278] ext4, jbd2: ensure panic when aborting with zero errno [ Upstream commit 51f57b01e4a3c7d7bdceffd84de35144e8c538e7 ] JBD2_REC_ERR flag used to indicate the errno has been updated when jbd2 aborted, and then __ext4_abort() and ext4_handle_error() can invoke panic if ERRORS_PANIC is specified. But if the journal has been aborted with zero errno, jbd2_journal_abort() didn't set this flag so we can no longer panic. Fix this by always record the proper errno in the journal superblock. Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20191204124614.45424-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/journal.c | 15 ++++----------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index fe4fe155b7fb..15d129b7494b 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); - jbd2_journal_abort(journal, 0); + jbd2_journal_abort(journal, -EIO); } write_lock(&journal->j_state_lock); } else { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b72be822f04f..eae9ced846d5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2128,12 +2128,10 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) { - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); - } + jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); } /** @@ -2175,11 +2173,6 @@ static void __journal_abort_soft (journal_t *journal, int errno) * failure to disk. ext3_error, for example, now uses this * functionality. * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * */ void jbd2_journal_abort(journal_t *journal, int errno) -- GitLab From 24fd0b0adc5354b31fd1f4b8315e6c7e8a4eb41b Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 22 Jan 2020 11:18:57 +0800 Subject: [PATCH 0121/1278] nbd: add a flush_workqueue in nbd_start_device [ Upstream commit 5c0dd228b5fc30a3b732c7ae2657e0161ec7ed80 ] When kzalloc fail, may cause trying to destroy the workqueue from inside the workqueue. If num_connections is m (2 < m), and NO.1 ~ NO.n (1 < n < m) kzalloc are successful. The NO.(n + 1) failed. Then, nbd_start_device will return ENOMEM to nbd_start_device_ioctl, and nbd_start_device_ioctl will return immediately without running flush_workqueue. However, we still have n recv threads. If nbd_release run first, recv threads may have to drop the last config_refs and try to destroy the workqueue from inside the workqueue. To fix it, add a flush_workqueue in nbd_start_device. Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Sun Ke Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4c661ad91e7d..8f56e6b2f114 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1203,6 +1203,16 @@ static int nbd_start_device(struct nbd_device *nbd) args = kzalloc(sizeof(*args), GFP_KERNEL); if (!args) { sock_shutdown(nbd); + /* + * If num_connections is m (2 < m), + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. + * But NO.(n + 1) failed. We still have n recv threads. + * So, add flush_workqueue here to prevent recv threads + * dropping the last config_refs and trying to destroy + * the workqueue from inside the workqueue. + */ + if (i) + flush_workqueue(nbd->recv_workq); return -ENOMEM; } sk_set_memalloc(config->socks[i]->sock->sk); -- GitLab From 394c452af6013255668b8f157a1bdfd4571f839d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 5 Dec 2019 07:40:43 -0500 Subject: [PATCH 0122/1278] KVM: s390: ENOTSUPP -> EOPNOTSUPP fixups [ Upstream commit c611990844c28c61ca4b35ff69d3a2ae95ccd486 ] There is no ENOTSUPP for userspace. Reported-by: Julian Wiedmann Fixes: 519783935451 ("KVM: s390: introduce ais mode modify function") Fixes: 2c1a48f2e5ed ("KVM: S390: add new group for flic") Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger Signed-off-by: Sasha Levin --- arch/s390/kvm/interrupt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 28f3796d23c8..61d25e2c82ef 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1913,7 +1913,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2214,7 +2214,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2294,7 +2294,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; -- GitLab From 16536ce0dfa08059fdd315f99b8ccfb30af2290c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 1 Feb 2020 14:03:11 +0900 Subject: [PATCH 0123/1278] kconfig: fix broken dependency in randconfig-generated .config [ Upstream commit c8fb7d7e48d11520ad24808cfce7afb7b9c9f798 ] Running randconfig on arm64 using KCONFIG_SEED=0x40C5E904 (e.g. on v5.5) produces the .config with CONFIG_EFI=y and CONFIG_CPU_BIG_ENDIAN=y, which does not meet the !CONFIG_CPU_BIG_ENDIAN dependency. This is because the user choice for CONFIG_CPU_LITTLE_ENDIAN vs CONFIG_CPU_BIG_ENDIAN is set by randomize_choice_values() after the value of CONFIG_EFI is calculated. When this happens, the has_changed flag should be set. Currently, it takes the result from the last iteration. It should accumulate all the results of the loop. Fixes: 3b9a19e08960 ("kconfig: loop as long as we changed some symbols in randconfig") Reported-by: Vincenzo Frascino Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/confdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 27aac273205b..fa423fcd1a92 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode) sym_calc_value(csym); if (mode == def_random) - has_changed = randomize_choice_values(csym); + has_changed |= randomize_choice_values(csym); else { set_all_choice_values(csym); has_changed = true; -- GitLab From ef7339a38eccc07dbbc6a4dde0c0d4d3be157a87 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:34 -0800 Subject: [PATCH 0124/1278] clk: qcom: rcg2: Don't crash if our parent can't be found; return an error [ Upstream commit 908b050114d8fefdddc57ec9fbc213c3690e7f5f ] When I got my clock parenting slightly wrong I ended up with a crash that looked like this: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... pc : clk_hw_get_rate+0x14/0x44 ... Call trace: clk_hw_get_rate+0x14/0x44 _freq_tbl_determine_rate+0x94/0xfc clk_rcg2_determine_rate+0x2c/0x38 clk_core_determine_round_nolock+0x4c/0x88 clk_core_round_rate_nolock+0x6c/0xa8 clk_core_round_rate_nolock+0x9c/0xa8 clk_core_set_rate_nolock+0x70/0x180 clk_set_rate+0x3c/0x6c of_clk_set_defaults+0x254/0x360 platform_drv_probe+0x28/0xb0 really_probe+0x120/0x2dc driver_probe_device+0x64/0xfc device_driver_attach+0x4c/0x6c __driver_attach+0xac/0xc0 bus_for_each_dev+0x84/0xcc driver_attach+0x2c/0x38 bus_add_driver+0xfc/0x1d0 driver_register+0x64/0xf8 __platform_driver_register+0x4c/0x58 msm_drm_register+0x5c/0x60 ... It turned out that clk_hw_get_parent_by_index() was returning NULL and we weren't checking. Let's check it so that we don't crash. Fixes: ac269395cdd8 ("clk: qcom: Convert to clk_hw based provider APIs") Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Link: https://lkml.kernel.org/r/20200203103049.v4.1.I7487325fe8e701a68a07d3be8a6a4b571eca9cfa@changeid Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/clk-rcg2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index a93439242565..d3953ea69fda 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -210,6 +210,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, clk_flags = clk_hw_get_flags(hw); p = clk_hw_get_parent_by_index(hw, index); + if (!p) + return -EINVAL; + if (clk_flags & CLK_SET_RATE_PARENT) { if (f->pre_div) { if (!rate) -- GitLab From df49e093ed54581241e70ce3b2ebd2faa92abfbf Mon Sep 17 00:00:00 2001 From: yu kuai Date: Mon, 4 Nov 2019 21:27:20 +0800 Subject: [PATCH 0125/1278] drm/amdgpu: remove 4 set but not used variable in amdgpu_atombios_get_connector_info_from_object_table [ Upstream commit bae028e3e521e8cb8caf2cc16a455ce4c55f2332 ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c: In function 'amdgpu_atombios_get_connector_info_from_object_table': drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:376:26: warning: variable 'grph_obj_num' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:376:13: warning: variable 'grph_obj_id' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:341:37: warning: variable 'con_obj_type' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:341:24: warning: variable 'con_obj_num' set but not used [-Wunused-but-set-variable] They are never used, so can be removed. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: yu kuai Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index cc4e18dcd8b6..4779740421a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -371,14 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; + uint8_t grph_obj_type= grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; -- GitLab From b99bcff0147d069adbc68349a2a8bbe40855c2f2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Dec 2019 17:47:09 +0100 Subject: [PATCH 0126/1278] regulator: rk808: Lower log level on optional GPIOs being not available [ Upstream commit b8a039d37792067c1a380dc710361905724b9b2f ] RK808 can leverage a couple of GPIOs to tweak the ramp rate during DVS (Dynamic Voltage Scaling). These GPIOs are entirely optional but a dev_warn() appeared when cleaning this driver to use a more up-to-date gpiod API. At least reduce the log level to 'info' as it is totally fine to not populate these GPIO on a hardware design. This change is trivial but it is worth not polluting the logs during bringup phase by having real warnings and errors sorted out correctly. Fixes: a13eaf02e2d6 ("regulator: rk808: make better use of the gpiod API") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20191203164709.11127-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/rk808-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 213b68743cc8..92498ac50303 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -714,7 +714,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev, } if (!pdata->dvs_gpio[i]) { - dev_warn(dev, "there is no dvs%d gpio\n", i); + dev_info(dev, "there is no dvs%d gpio\n", i); continue; } -- GitLab From 7ee8e95a8c58c1040aeb3e96a4e68107d9f25c96 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Nov 2019 15:55:51 +0100 Subject: [PATCH 0127/1278] net/wan/fsl_ucc_hdlc: reject muram offsets above 64K [ Upstream commit 148587a59f6b85831695e0497d9dd1af5f0495af ] Qiang Zhao points out that these offsets get written to 16-bit registers, and there are some QE platforms with more than 64K muram. So it is possible that qe_muram_alloc() gives us an allocation that can't actually be used by the hardware, so detect and reject that. Reported-by: Qiang Zhao Reviewed-by: Timur Tabi Signed-off-by: Rasmus Villemoes Acked-by: David S. Miller Signed-off-by: Li Yang Signed-off-by: Sasha Levin --- drivers/net/wan/fsl_ucc_hdlc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 571a1ff8f81f..6a26cef62193 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -240,6 +240,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ret = -ENOMEM; goto free_riptr; } + if (riptr != (u16)riptr || tiptr != (u16)tiptr) { + dev_err(priv->dev, "MURAM allocation out of addressable range\n"); + ret = -ENOMEM; + goto free_tiptr; + } /* Set RIPTR, TIPTR */ iowrite16be(riptr, &priv->ucc_pram->riptr); -- GitLab From 2caa8fad60a0186d0afad87e4281965dcba7c3b9 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 25 Nov 2019 13:52:52 -0600 Subject: [PATCH 0128/1278] PCI/IOV: Fix memory leak in pci_iov_add_virtfn() [ Upstream commit 8c386cc817878588195dde38e919aa6ba9409d58 ] In the implementation of pci_iov_add_virtfn() the allocated virtfn is leaked if pci_setup_device() fails. The error handling is not calling pci_stop_and_remove_bus_device(). Change the goto label to failed2. Fixes: 156c55325d30 ("PCI: Check for pci_setup_device() failure in pci_iov_add_virtfn()") Link: https://lore.kernel.org/r/20191125195255.23740-1-navid.emamdoost@gmail.com Signed-off-by: Navid Emamdoost Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/iov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0fd8e164339c..0dc646c1bc3d 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -179,6 +179,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) failed2: sysfs_remove_link(&dev->dev.kobj, buf); failed1: + pci_stop_and_remove_bus_device(virtfn); pci_dev_put(dev); pci_stop_and_remove_bus_device(virtfn); failed0: -- GitLab From 4aea4f02dc85136a45f943ea2ba2b111d553017a Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Mon, 9 Dec 2019 21:08:45 +0800 Subject: [PATCH 0129/1278] NFC: port100: Convert cpu_to_le16(le16_to_cpu(E1) + E2) to use le16_add_cpu(). [ Upstream commit 718eae277e62a26e5862eb72a830b5e0fe37b04a ] Convert cpu_to_le16(le16_to_cpu(frame->datalen) + len) to use le16_add_cpu(), which is more concise and does the same thing. Reported-by: Hulk Robot Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/port100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 60ae382f50da..06bb226c62ef 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -574,7 +574,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) -- GitLab From b4571f0e7235eaebd052b712fe0f586f474904d3 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Mon, 9 Dec 2019 16:15:01 +0100 Subject: [PATCH 0130/1278] arm64: dts: qcom: msm8996: Disable USB2 PHY suspend by core [ Upstream commit d026c96b25b7ce5df89526aad2df988d553edb4d ] QUSB2 PHY on msm8996 doesn't work well when autosuspend by dwc3 core using USB2PHYCFG register is enabled. One of the issue seen is that PHY driver reports PLL lock failure and fails phy_init() if dwc3 core has USB2 PHY suspend enabled. Fix this by using quirks to disable USB2 PHY LPM/suspend and dwc3 core already takes care of explicitly suspending PHY during suspend if quirks are specified. Signed-off-by: Manu Gautam Signed-off-by: Paolo Pisati Link: https://lore.kernel.org/r/20191209151501.26993-1-p.pisati@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 6f372ec055dd..da2949586c7a 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -788,6 +788,8 @@ interrupts = <0 138 0>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -817,6 +819,8 @@ interrupts = <0 131 0>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; }; -- GitLab From eacee8c37a6eba0965fa82998f60777c9c1e683e Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 9 Dec 2019 08:50:17 -0800 Subject: [PATCH 0131/1278] ARM: dts: imx6: rdu2: Disable WP for USDHC2 and USDHC3 [ Upstream commit cd58a174e58649426fb43d7456e5f7d7eab58af1 ] RDU2 production units come with resistor connecting WP pin to correpsonding GPIO DNPed for both SD card slots. Drop any WP related configuration and mark both slots with "disable-wp". Reported-by: Chris Healy Reviewed-by: Chris Healy Reviewed-by: Lucas Stach Signed-off-by: Andrey Smirnov Cc: Shawn Guo Cc: Fabio Estevam Cc: Lucas Stach Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 849eb3443cde..719e63092c2e 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -587,7 +587,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -598,7 +598,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -1001,7 +1001,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1014,7 +1013,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; -- GitLab From cf471438760565041c052ee5fc3de9465744a735 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 8 Dec 2019 22:11:40 +0100 Subject: [PATCH 0132/1278] media: v4l2-device.h: Explicitly compare grp{id,mask} to zero in v4l2_device macros [ Upstream commit afb34781620274236bd9fc9246e22f6963ef5262 ] When building with Clang + -Wtautological-constant-compare, several of the ivtv and cx18 drivers warn along the lines of: drivers/media/pci/cx18/cx18-driver.c:1005:21: warning: converting the result of '<<' to a boolean always evaluates to true [-Wtautological-constant-compare] cx18_call_hw(cx, CX18_HW_GPIO_RESET_CTRL, ^ drivers/media/pci/cx18/cx18-cards.h:18:37: note: expanded from macro 'CX18_HW_GPIO_RESET_CTRL' #define CX18_HW_GPIO_RESET_CTRL (1 << 6) ^ 1 warning generated. This warning happens because the shift operation is implicitly converted to a boolean in v4l2_device_mask_call_all before being negated. This can be solved by just comparing the mask result to 0 explicitly so that there is no boolean conversion. The ultimate goal is to enable -Wtautological-compare globally because there are several subwarnings that would be helpful to have. For visual consistency and avoidance of these warnings in the future, all of the implicitly boolean conversions in the v4l2_device macros are converted to explicit ones as well. Link: https://github.com/ClangBuiltLinux/linux/issues/752 Reviewed-by: Ezequiel Garcia Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- include/media/v4l2-device.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index 8ffa94009d1a..76002416cead 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) @@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) @@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ } while (0) /* @@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ }) /* -- GitLab From bf1357325ad163bf716512838728e657a6903d66 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 Dec 2019 11:35:58 +0100 Subject: [PATCH 0133/1278] reiserfs: Fix spurious unlock in reiserfs_fill_super() error handling [ Upstream commit 4d5c1adaf893b8aa52525d2b81995e949bcb3239 ] When we fail to allocate string for journal device name we jump to 'error' label which tries to unlock reiserfs write lock which is not held. Jump to 'error_unlocked' instead. Fixes: f32485be8397 ("reiserfs: delay reiserfs lock until journal initialization") Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/reiserfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5208d85dd30c..9caf3948417c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1954,7 +1954,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); - goto error; + goto error_unlocked; } } #ifdef CONFIG_QUOTA -- GitLab From 2730ea6d5c5242b0de0ec7599ce2aadb8d9e0a7a Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 15 Dec 2019 10:14:51 -0600 Subject: [PATCH 0134/1278] fore200e: Fix incorrect checks of NULL pointer dereference [ Upstream commit bbd20c939c8aa3f27fa30e86691af250bf92973a ] In fore200e_send and fore200e_close, the pointers from the arguments are dereferenced in the variable declaration block and then checked for NULL. The patch fixes these issues by avoiding NULL pointer dereferences. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/fore200e.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f8b7e86907cc..0a1ad1a1d34f 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1496,12 +1496,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<vci >= 0) && (vcc->vci < 1<dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1562,9 +1564,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); -- GitLab From 6e46de4f308a3665c110d8237b968364901d4d58 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 20:42:57 -0700 Subject: [PATCH 0135/1278] ALSA: usx2y: Adjust indentation in snd_usX2Y_hwdep_dsp_status [ Upstream commit df4654bd6e42125d9b85ce3a26eaca2935290b98 ] Clang warns: ../sound/usb/usx2y/usX2Yhwdep.c:122:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] info->version = USX2Y_DRIVER_VERSION; ^ ../sound/usb/usx2y/usX2Yhwdep.c:120:2: note: previous statement is here if (us428->chip_status & USX2Y_STAT_CHIP_INIT) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. This was introduced before the beginning of git history so no fixes tag. Link: https://github.com/ClangBuiltLinux/linux/issues/831 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20191218034257.54535-1-natechancellor@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/usx2y/usX2Yhwdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index f4b3cda412fc..e75271e731b2 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -131,7 +131,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw, info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code if (us428->chip_status & USX2Y_STAT_CHIP_INIT) info->chip_ready = 1; - info->version = USX2Y_DRIVER_VERSION; + info->version = USX2Y_DRIVER_VERSION; return 0; } -- GitLab From 284fa04ce0f7887f5d1a183e75e10734df2a7a5b Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:26 +0700 Subject: [PATCH 0136/1278] b43legacy: Fix -Wcast-function-type [ Upstream commit 475eec112e4267232d10f4afe2f939a241692b6c ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Tested-by: Larry Finger Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43legacy/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index f1e3dad57629..f435bd0f8b5b 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev) } /* Interrupt handler bottom-half */ -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev) +static void b43legacy_interrupt_tasklet(unsigned long data) { + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data; u32 reason; u32 dma_reason[ARRAY_SIZE(dev->dma_reason)]; u32 merged_dma_reason = 0; @@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev, b43legacy_set_status(wldev, B43legacy_STAT_UNINIT); wldev->bad_frames_preempt = modparam_bad_frames_preempt; tasklet_init(&wldev->isr_tasklet, - (void (*)(unsigned long))b43legacy_interrupt_tasklet, + b43legacy_interrupt_tasklet, (unsigned long)wldev); if (modparam_pio) wldev->__using_pio = true; -- GitLab From 79a0c15ca4b9ae01fcac8ddf4535f80c7a4f4176 Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:27 +0700 Subject: [PATCH 0137/1278] ipw2x00: Fix -Wcast-function-type [ Upstream commit ebd77feb27e91bb5fe35a7818b7c13ea7435fb98 ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/ipw2x00/ipw2100.c | 7 ++++--- drivers/net/wireless/intel/ipw2x00/ipw2200.c | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 19c442cb93e4..8fbdd7d4fd0c 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) } } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv) +static void ipw2100_irq_tasklet(unsigned long data) { + struct ipw2100_priv *priv = (struct ipw2100_priv *)data; struct net_device *dev = priv->net_dev; unsigned long flags; u32 inta, tmp; @@ -6027,7 +6028,7 @@ static void ipw2100_rf_kill(struct work_struct *work) spin_unlock_irqrestore(&priv->low_lock, flags); } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv); +static void ipw2100_irq_tasklet(unsigned long data); static const struct net_device_ops ipw2100_netdev_ops = { .ndo_open = ipw2100_open, @@ -6157,7 +6158,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill); INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event); - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw2100_irq_tasklet, (unsigned long)priv); /* NOTE: We do not start the deferred work for status checks yet */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 8da87496cb58..2d0734ab3f74 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -1966,8 +1966,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv) wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); } -static void ipw_irq_tasklet(struct ipw_priv *priv) +static void ipw_irq_tasklet(unsigned long data) { + struct ipw_priv *priv = (struct ipw_priv *)data; u32 inta, inta_mask, handled = 0; unsigned long flags; int rc = 0; @@ -10702,7 +10703,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv) INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate); #endif /* CONFIG_IPW2200_QOS */ - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw_irq_tasklet, (unsigned long)priv); return ret; -- GitLab From f603cec80d9e851dd27dcc2733f4866f8eb94b4f Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:28 +0700 Subject: [PATCH 0138/1278] iwlegacy: Fix -Wcast-function-type [ Upstream commit da5e57e8a6a3e69dac2937ba63fa86355628fbb2 ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 5 +++-- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 329f3a63dadd..0fb81151a132 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il) } static void -il3945_irq_tasklet(struct il_priv *il) +il3945_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il3945_irq_tasklet, + il3945_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index de9b6522c43f..665e82effb03 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4363,8 +4363,9 @@ il4965_synchronize_irq(struct il_priv *il) } static void -il4965_irq_tasklet(struct il_priv *il) +il4965_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -6264,7 +6265,7 @@ il4965_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il4965_irq_tasklet, + il4965_irq_tasklet, (unsigned long)il); } -- GitLab From f5fb8b53423e989da9df0e87981faf90f373ebcc Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:29 +0700 Subject: [PATCH 0139/1278] rtlwifi: rtl_pci: Fix -Wcast-function-type [ Upstream commit cb775c88da5d48a85d99d95219f637b6fad2e0e9 ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtlwifi/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 457a0f725c8a..ab74f3155854 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1091,13 +1091,15 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) return ret; } -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_irq_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; _rtl_pci_tx_chk_waitq(hw); } -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw, /*task */ tasklet_init(&rtlpriv->works.irq_tasklet, - (void (*)(unsigned long))_rtl_pci_irq_tasklet, + _rtl_pci_irq_tasklet, (unsigned long)hw); tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet, - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet, + _rtl_pci_prepare_bcn_tasklet, (unsigned long)hw); INIT_WORK(&rtlpriv->works.lps_change_work, rtl_lps_change_work_callback); -- GitLab From 4a30decabbb4a4d15dd5da43f9e1780062bdc487 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 15 Dec 2019 13:58:58 -0600 Subject: [PATCH 0140/1278] orinoco: avoid assertion in case of NULL pointer [ Upstream commit c705f9fc6a1736dcf6ec01f8206707c108dca824 ] In ezusb_init, if upriv is NULL, the code crashes. However, the caller in ezusb_probe can handle the error and print the failure message. The patch replaces the BUG_ON call to error return. Signed-off-by: Aditya Pakki Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 95015d74b1c0..5a64674a5c8d 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1364,7 +1364,8 @@ static int ezusb_init(struct hermes *hw) int retval; BUG_ON(in_interrupt()); - BUG_ON(!upriv); + if (!upriv) + return -EINVAL; upriv->reply_count = 0; /* Write the MAGIC number on the simulated registers to keep -- GitLab From 4e2fd61c42be3bc47bdd71816f4b82be27501eeb Mon Sep 17 00:00:00 2001 From: Erik Kaneda Date: Tue, 17 Dec 2019 11:35:20 -0800 Subject: [PATCH 0141/1278] ACPICA: Disassembler: create buffer fields in ACPI_PARSE_LOAD_PASS1 [ Upstream commit 5ddbd77181dfca61b16d2e2222382ea65637f1b9 ] ACPICA commit 29cc8dbc5463a93625bed87d7550a8bed8913bf4 create_buffer_field is a deferred op that is typically processed in load pass 2. However, disassembly of control method contents walk the parse tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed in a later walk. This is a problem when there is a control method that has the same name as the AML_CREATE object. In this case, any use of the name segment will be detected as a method call rather than a reference to a buffer field. If this is detected as a method call, it can result in a mal-formed parse tree if the control methods have parameters. This change in processing AML_CREATE ops earlier solves this issue by inserting the named object in the ACPI namespace so that references to this name would be detected as a name string rather than a method call. Link: https://github.com/acpica/acpica/commit/29cc8dbc Reported-by: Elia Geretto Tested-by: Elia Geretto Signed-off-by: Bob Moore Signed-off-by: Erik Kaneda Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpica/dsfield.c | 2 +- drivers/acpi/acpica/dswload.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 7bcf5f5ea029..8df4a49a99a6 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -273,7 +273,7 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op, * FUNCTION: acpi_ds_get_field_names * * PARAMETERS: info - create_field info structure - * ` walk_state - Current method state + * walk_state - Current method state * arg - First parser arg for the field name list * * RETURN: Status diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index eaa859a89702..1d82e1419397 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -444,6 +444,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op, walk_state)); + /* + * Disassembler: handle create field operators here. + * + * create_buffer_field is a deferred op that is typically processed in load + * pass 2. However, disassembly of control method contents walk the parse + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed + * in a later walk. This is a problem when there is a control method that + * has the same name as the AML_CREATE object. In this case, any use of the + * name segment will be detected as a method call rather than a reference + * to a buffer field. + * + * This earlier creation during disassembly solves this issue by inserting + * the named object in the ACPI namespace so that references to this name + * would be a name string rather than a method call. + */ + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) && + (walk_state->op_info->flags & AML_CREATE)) { + status = acpi_ds_create_buffer_field(op, walk_state); + return_ACPI_STATUS(status); + } + /* We are only interested in opcodes that have an associated name */ if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) { -- GitLab From 752c854192b5fded885bdc883fdb400a30cfda04 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 25 Nov 2019 22:53:33 -0800 Subject: [PATCH 0142/1278] scsi: ufs: Complete pending requests in host reset and restore path [ Upstream commit 2df74b6985b51e77756e2e8faa16c45ca3ba53c5 ] In UFS host reset and restore path, before probe, we stop and start the host controller once. After host controller is stopped, the pending requests, if any, are cleared from the doorbell, but no completion IRQ would be raised due to the hba is stopped. These pending requests shall be completed along with the first NOP_OUT command (as it is the first command which can raise a transfer completion IRQ) sent during probe. Since the OCSs of these pending requests are not SUCCESS (because they are not yet literally finished), their UPIUs shall be dumped. When there are multiple pending requests, the UPIU dump can be overwhelming and may lead to stability issues because it is in atomic context. Therefore, before probe, complete these pending requests right after host controller is stopped and silence the UPIU dump from them. Link: https://lore.kernel.org/r/1574751214-8321-5-git-send-email-cang@qti.qualcomm.com Reviewed-by: Alim Akhtar Reviewed-by: Bean Huo Tested-by: Bean Huo Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 24 ++++++++++-------------- drivers/scsi/ufs/ufshcd.h | 2 ++ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index ce40de334f11..c35045324695 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4580,7 +4580,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if (host_byte(result) != DID_OK) + if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -5109,8 +5109,8 @@ static void ufshcd_err_handler(struct work_struct *work) /* * if host reset is required then skip clearing the pending - * transfers forcefully because they will automatically get - * cleared after link startup. + * transfers forcefully because they will get cleared during + * host reset and restore */ if (needs_reset) goto skip_pending_xfer_clear; @@ -5749,9 +5749,15 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) int err; unsigned long flags; - /* Reset the host controller */ + /* + * Stop the host controller and complete the requests + * cleared by h/w + */ spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_hba_stop(hba, false); + hba->silence_err_logs = true; + ufshcd_complete_requests(hba); + hba->silence_err_logs = false; spin_unlock_irqrestore(hba->host->host_lock, flags); /* scale up clocks to max frequency before full reinitialization */ @@ -5785,22 +5791,12 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) static int ufshcd_reset_and_restore(struct ufs_hba *hba) { int err = 0; - unsigned long flags; int retries = MAX_HOST_RESET_RETRIES; do { err = ufshcd_host_reset_and_restore(hba); } while (err && --retries); - /* - * After reset the door-bell might be cleared, complete - * outstanding requests in s/w here. - */ - spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_transfer_req_compl(hba); - ufshcd_tmc_handler(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); - return err; } diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index cdc8bd05f7df..4aac4d86f57b 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -485,6 +485,7 @@ struct ufs_stats { * @uic_error: UFS interconnect layer error status * @saved_err: sticky error mask * @saved_uic_err: sticky UIC error mask + * @silence_err_logs: flag to silence error logs * @dev_cmd: ufs device management command information * @last_dme_cmd_tstamp: time stamp of the last completed DME command * @auto_bkops_enabled: to track whether bkops is enabled in device @@ -621,6 +622,7 @@ struct ufs_hba { u32 saved_err; u32 saved_uic_err; struct ufs_stats ufs_stats; + bool silence_err_logs; /* Device management request data */ struct ufs_dev_cmd dev_cmd; -- GitLab From 3f292dcec12777b0afcba5532d323b186fbca2ef Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:42:20 -0700 Subject: [PATCH 0143/1278] scsi: aic7xxx: Adjust indentation in ahc_find_syncrate [ Upstream commit 4dbc96ad65c45cdd4e895ed7ae4c151b780790c5 ] Clang warns: ../drivers/scsi/aic7xxx/aic7xxx_core.c:2317:5: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if ((syncrate->sxfr_u2 & ST_SXFR) != 0) ^ ../drivers/scsi/aic7xxx/aic7xxx_core.c:2310:4: note: previous statement is here if (syncrate == &ahc_syncrates[maxsync]) ^ 1 warning generated. This warning occurs because there is a space amongst the tabs on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. This has been a problem since the beginning of git history hence no fixes tag. Link: https://github.com/ClangBuiltLinux/linux/issues/817 Link: https://lore.kernel.org/r/20191218014220.52746-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aic7xxx/aic7xxx_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 381846164003..fdbb0a3dc9b4 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period, * At some speeds, we only support * ST transfers. */ - if ((syncrate->sxfr_u2 & ST_SXFR) != 0) + if ((syncrate->sxfr_u2 & ST_SXFR) != 0) *ppr_options &= ~MSG_EXT_PPR_DT_REQ; break; } -- GitLab From ff213227e66062249a665acbbeade6c0cab585ef Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 10 Dec 2019 13:05:21 +0800 Subject: [PATCH 0144/1278] drm/mediatek: handle events when enabling/disabling crtc [ Upstream commit 411f5c1eacfebb1f6e40b653d29447cdfe7282aa ] The driver currently handles vblank events only when updating planes on an already enabled CRTC. The atomic update API however allows requesting an event when enabling or disabling a CRTC. This currently leads to event objects being leaked in the kernel and to events not being sent out. Fix it. Signed-off-by: Bibby Hsieh Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 658b8dd45b83..3ea311d32fa9 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -307,6 +307,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) { struct drm_device *drm = mtk_crtc->base.dev; + struct drm_crtc *crtc = &mtk_crtc->base; int i; DRM_DEBUG_DRIVER("%s\n", __func__); @@ -328,6 +329,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) mtk_disp_mutex_unprepare(mtk_crtc->mutex); pm_runtime_put(drm->dev); + + if (crtc->state->event && !crtc->state->active) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irq(&crtc->dev->event_lock); + } } static void mtk_crtc_ddp_config(struct drm_crtc *crtc) -- GitLab From 285f156fc859f0a922173a81af7cc4ccb6ac4b5b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 11 Dec 2019 14:52:21 +0100 Subject: [PATCH 0145/1278] ARM: dts: r8a7779: Add device node for ARM global timer [ Upstream commit 8443ffd1bbd5be74e9b12db234746d12e8ea93e2 ] Add a device node for the global timer, which is part of the Cortex-A9 MPCore. The global timer can serve as an accurate (4 ns) clock source for scheduling and delay loops. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191211135222.26770-4-geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm/boot/dts/r8a7779.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 8ee0b2ca5d39..2face089d65b 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -67,6 +67,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = ; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; -- GitLab From 35557d209c94eae2be6613141d2f6f9902dc110c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 16 Dec 2019 12:01:16 -0700 Subject: [PATCH 0146/1278] dmaengine: Store module owner in dma_device struct [ Upstream commit dae7a589c18a4d979d5f14b09374e871b995ceb1 ] dma_chan_to_owner() dereferences the driver from the struct device to obtain the owner and call module_[get|put](). However, if the backing device is unbound before the dma_device is unregistered, the driver will be cleared and this will cause a NULL pointer dereference. Instead, store a pointer to the owner module in the dma_device struct so the module reference can be properly put when the channel is put, even if the backing device was destroyed first. This change helps to support a safer unbind of DMA engines. If the dma_device is unregistered in the driver's remove function, there's no guarantee that there are no existing clients and a users action may trigger the WARN_ONCE in dma_async_device_unregister() which is unlikely to leave the system in a consistent state. Instead, a better approach is to allow the backing driver to go away and fail any subsequent requests to it. Signed-off-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20191216190120.21374-2-logang@deltatee.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmaengine.c | 4 +++- include/linux/dmaengine.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b451354735d3..faaaf10311ec 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -192,7 +192,7 @@ __dma_device_satisfies_mask(struct dma_device *device, static struct module *dma_chan_to_owner(struct dma_chan *chan) { - return chan->device->dev->driver->owner; + return chan->device->owner; } /** @@ -928,6 +928,8 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + device->owner = device->dev->driver->owner; + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 087cbe776868..8089e28539f1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -677,6 +677,7 @@ struct dma_filter { * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api + * @owner: owner module (automatically set based on the provided dev) * @src_addr_widths: bit mask of src addr widths the device supports * @dst_addr_widths: bit mask of dst addr widths the device supports * @directions: bit mask of slave direction the device supports since @@ -738,6 +739,7 @@ struct dma_device { int dev_id; struct device *dev; + struct module *owner; u32 src_addr_widths; u32 dst_addr_widths; -- GitLab From 37a6fc4398a18616c835df4a63cafc81c3b2e065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Thu, 5 Dec 2019 21:36:07 -0500 Subject: [PATCH 0147/1278] x86/vdso: Provide missing include file [ Upstream commit bff47c2302cc249bcd550b17067f8dddbd4b6f77 ] When building with C=1, sparse issues a warning: CHECK arch/x86/entry/vdso/vdso32-setup.c arch/x86/entry/vdso/vdso32-setup.c:28:28: warning: symbol 'vdso32_enabled' was not declared. Should it be static? Provide the missing header file. Signed-off-by: Valdis Kletnieks Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/36224.1575599767@turing-police Signed-off-by: Sasha Levin --- arch/x86/entry/vdso/vdso32-setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990e..ddff0ca6f509 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include -- GitLab From b8bedd5bfaa6a1cc9df7c96d9723d9e7aa882f8d Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 12 Dec 2019 11:20:30 +0900 Subject: [PATCH 0148/1278] PM / devfreq: rk3399_dmc: Add COMPILE_TEST and HAVE_ARM_SMCCC dependency [ Upstream commit eff5d31f7407fa9d31fb840106f1593399457298 ] To build test, add COMPILE_TEST depedency to both ARM_RK3399_DMC_DEVFREQ and DEVFREQ_EVENT_ROCKCHIP_DFI configuration. And ARM_RK3399_DMC_DEVFREQ used the SMCCC interface so that add HAVE_ARM_SMCCC dependency to prevent the build break. Reported-by: kbuild test robot Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/Kconfig | 3 ++- drivers/devfreq/event/Kconfig | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 6a172d338f6d..4c4ec68b0566 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -103,7 +103,8 @@ config ARM_TEGRA_DEVFREQ config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" - depends on ARCH_ROCKCHIP + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ + (COMPILE_TEST && HAVE_ARM_SMCCC) select DEVFREQ_EVENT_ROCKCHIP_DFI select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_DEVFREQ_EVENT diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index cd949800eed9..8851bc4e8e3e 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU config DEVFREQ_EVENT_ROCKCHIP_DFI tristate "ROCKCHIP DFI DEVFREQ event Driver" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST help This add the devfreq-event driver for Rockchip SoC. It provides DFI (DDR Monitor Module) driver to count ddr load. -- GitLab From be95acb77293527a1d0b649b6a0ac4403edc47ed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Dec 2019 20:48:10 +0100 Subject: [PATCH 0149/1278] pinctrl: sh-pfc: sh7269: Fix CAN function GPIOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 02aeb2f21530c98fc3ca51028eda742a3fafbd9f ] pinmux_func_gpios[] contains a hole due to the missing function GPIO definition for the "CTX0&CTX1" signal, which is the logical "AND" of the first two CAN outputs. A closer look reveals other issues: - Some functionality is available on alternative pins, but the PINMUX_DATA() entries is using the wrong marks, - Several configurations are missing. Fix this by: - Renaming CTX0CTX1CTX2_MARK, CRX0CRX1_PJ22_MARK, and CRX0CRX1CRX2_PJ20_MARK to CTX0_CTX1_CTX2_MARK, CRX0_CRX1_PJ22_MARK, resp. CRX0_CRX1_CRX2_PJ20_MARK for consistency with the corresponding enum IDs, - Adding all missing enum IDs and marks, - Use the right (*_PJ2x) variants for alternative pins, - Adding all missing configurations to pinmux_data[], - Adding all missing function GPIO definitions to pinmux_func_gpios[]. See SH7268 Group, SH7269 Group User’s Manual: Hardware, Rev. 2.00: [1] Table 1.4 List of Pins [2] Figure 23.29 Connection Example when Using Channels 0 and 1 as One Channel (64 Mailboxes × 1 Channel) and Channel 2 as One Channel (32 Mailboxes × 1 Channel), [3] Figure 23.30 Connection Example when Using Channels 0, 1, and 2 as One Channel (96 Mailboxes × 1 Channel), [4] Table 48.3 Multiplexed Pins (Port B), [5] Table 48.4 Multiplexed Pins (Port C), [6] Table 48.10 Multiplexed Pins (Port J), [7] Section 48.2.4 Port B Control Registers 0 to 5 (PBCR0 to PBCR5). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191218194812.12741-5-geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/sh/include/cpu-sh2a/cpu/sh7269.h | 11 ++++++-- drivers/pinctrl/sh-pfc/pfc-sh7269.c | 39 ++++++++++++++++++--------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d48818..b887cc402b71 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c index cfdb4fc177c3..3df0c0d139d0 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c @@ -740,13 +740,12 @@ enum { CRX0_MARK, CTX0_MARK, CRX1_MARK, CTX1_MARK, CRX2_MARK, CTX2_MARK, - CRX0_CRX1_MARK, - CRX0_CRX1_CRX2_MARK, - CTX0CTX1CTX2_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK, CRX1_PJ22_MARK, CTX1_PJ23_MARK, CRX2_PJ20_MARK, CTX2_PJ21_MARK, - CRX0CRX1_PJ22_MARK, - CRX0CRX1CRX2_PJ20_MARK, + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK, + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK, /* VDC */ DV_CLK_MARK, @@ -824,6 +823,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CS3_MARK, PC8MD_001), PINMUX_DATA(TXD7_MARK, PC8MD_010), PINMUX_DATA(CTX1_MARK, PC8MD_011), + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100), PINMUX_DATA(PC7_DATA, PC7MD_000), PINMUX_DATA(CKE_MARK, PC7MD_001), @@ -836,11 +836,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CAS_MARK, PC6MD_001), PINMUX_DATA(SCK7_MARK, PC6MD_010), PINMUX_DATA(CTX0_MARK, PC6MD_011), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100), PINMUX_DATA(PC5_DATA, PC5MD_000), PINMUX_DATA(RAS_MARK, PC5MD_001), PINMUX_DATA(CRX0_MARK, PC5MD_011), - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100), PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101), PINMUX_DATA(PC4_DATA, PC4MD_00), @@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = { PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010), PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011), PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100), - PINMUX_DATA(CTX1_MARK, PJ23MD_101), + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101), + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110), PINMUX_DATA(PJ22_DATA, PJ22MD_000), PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001), PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010), PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011), PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100), - PINMUX_DATA(CRX1_MARK, PJ22MD_101), - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110), + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101), + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110), PINMUX_DATA(PJ21_DATA, PJ21MD_000), PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001), PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010), PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011), PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100), - PINMUX_DATA(CTX2_MARK, PJ21MD_101), + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101), + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110), PINMUX_DATA(PJ20_DATA, PJ20MD_000), PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001), PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010), PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011), PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100), - PINMUX_DATA(CRX2_MARK, PJ20MD_101), - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110), + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101), + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110), PINMUX_DATA(PJ19_DATA, PJ19MD_000), PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001), @@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(WDTOVF), /* CAN */ + GPIO_FN(CTX2), + GPIO_FN(CRX2), GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), GPIO_FN(CRX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0_CRX1), + GPIO_FN(CTX0_CTX1_CTX2), GPIO_FN(CRX0_CRX1_CRX2), + GPIO_FN(CTX2_PJ21), + GPIO_FN(CRX2_PJ20), + GPIO_FN(CTX1_PJ23), + GPIO_FN(CRX1_PJ22), + GPIO_FN(CTX0_CTX1_PJ23), + GPIO_FN(CRX0_CRX1_PJ22), + GPIO_FN(CTX0_CTX1_CTX2_PJ21), + GPIO_FN(CRX0_CRX1_CRX2_PJ20), /* DMAC */ GPIO_FN(TEND0), -- GitLab From 541078d09772ca69b5276be60657713dcc5ad72f Mon Sep 17 00:00:00 2001 From: Jiewei Ke Date: Fri, 27 Dec 2019 19:36:13 +0800 Subject: [PATCH 0150/1278] RDMA/rxe: Fix error type of mmap_offset [ Upstream commit 6ca18d8927d468c763571f78c9a7387a69ffa020 ] The type of mmap_offset should be u64 instead of int to match the type of mminfo.offset. If otherwise, after we create several thousands of CQs, it will run into overflow issues. Link: https://lore.kernel.org/r/20191227113613.5020-1-kejiewei.cn@gmail.com Signed-off-by: Jiewei Ke Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index d1cc89f6f2e3..46c8a66731e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -408,7 +408,7 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; -- GitLab From a801998986776ba437493f53001dd3eeb98db899 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 3 Jan 2020 22:35:03 -0800 Subject: [PATCH 0151/1278] clk: sunxi-ng: add mux and pll notifiers for A64 CPU clock [ Upstream commit ec97faff743b398e21f74a54c81333f3390093aa ] The A64 PLL_CPU clock has the same instability if some factor changed without the PLL gated like other SoCs with sun6i-style CCU, e.g. A33, H3. Add the mux and pll notifiers for A64 CPU clock to workaround the problem. Fixes: c6a0637460c2 ("clk: sunxi-ng: Add A64 clocks") Signed-off-by: Icenowy Zheng Signed-off-by: Vasily Khoruzhick Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index eaafc038368f..183985c8c9ba 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -884,11 +884,26 @@ static const struct sunxi_ccu_desc sun50i_a64_ccu_desc = { .num_resets = ARRAY_SIZE(sun50i_a64_ccu_resets), }; +static struct ccu_pll_nb sun50i_a64_pll_cpu_nb = { + .common = &pll_cpux_clk.common, + /* copy from pll_cpux_clk */ + .enable = BIT(31), + .lock = BIT(28), +}; + +static struct ccu_mux_nb sun50i_a64_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, /* > 8 clock cycles at 24 MHz */ + .bypass_index = 1, /* index of 24 MHz oscillator */ +}; + static int sun50i_a64_ccu_probe(struct platform_device *pdev) { struct resource *res; void __iomem *reg; u32 val; + int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(&pdev->dev, res); @@ -902,7 +917,18 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev) writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG); - return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + if (ret) + return ret; + + /* Gate then ungate PLL CPU after any rate changes */ + ccu_pll_notifier_register(&sun50i_a64_pll_cpu_nb); + + /* Reparent CPU during PLL CPU rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_a64_cpu_nb); + + return 0; } static const struct of_device_id sun50i_a64_ccu_ids[] = { -- GitLab From 7b50b1aaade963f9de1a7c7ba15f3ae0c87c4c04 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 4 Jan 2020 12:00:57 +0100 Subject: [PATCH 0152/1278] ALSA: sh: Fix unused variable warnings [ Upstream commit 5da116f164ce265e397b8f59af5c39e4a61d61a5 ] Remove unused variables that are left over after the conversion of new PCM ops: sound/sh/sh_dac_audio.c:166:26: warning: unused variable 'runtime' sound/sh/sh_dac_audio.c:186:26: warning: unused variable 'runtime' sound/sh/sh_dac_audio.c:205:26: warning: unused variable 'runtime' Fixes: 1cc2f8ba0b3e ("ALSA: sh: Convert to the new PCM ops") Link: https://lore.kernel.org/r/20200104110057.13875-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/sh/sh_dac_audio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index 834b2574786f..6251b5e1b64a 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -190,7 +190,6 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; if (copy_from_user_toio(chip->data_buffer + pos, src, count)) return -EFAULT; @@ -210,7 +209,6 @@ static int snd_sh_dac_pcm_copy_kernel(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memcpy_toio(chip->data_buffer + pos, src, count); chip->buffer_end = chip->data_buffer + pos + count; @@ -229,7 +227,6 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memset_io(chip->data_buffer + pos, 0, count); chip->buffer_end = chip->data_buffer + pos + count; -- GitLab From 5d5207cfadc08358d6bf764e54a9de9a77e015c6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 5 Jan 2020 15:48:23 +0100 Subject: [PATCH 0153/1278] ALSA: sh: Fix compile warning wrt const [ Upstream commit f1dd4795b1523fbca7ab4344dd5a8bb439cc770d ] A long-standing compile warning was seen during build test: sound/sh/aica.c: In function 'load_aica_firmware': sound/sh/aica.c:521:25: warning: passing argument 2 of 'spu_memload' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] Fixes: 198de43d758c ("[ALSA] Add ALSA support for the SEGA Dreamcast PCM device") Link: https://lore.kernel.org/r/20200105144823.29547-69-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/sh/aica.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/sh/aica.c b/sound/sh/aica.c index fdc680ae8aa0..d9acf551a898 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length) } /* spu_memload - write to SPU address space */ -static void spu_memload(u32 toi, void *from, int length) +static void spu_memload(u32 toi, const void *from, int length) { unsigned long flags; - u32 *froml = from; + const u32 *froml = from; u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi); int i; u32 val; -- GitLab From 29fc3c7b5bbc59468f760708ed7b62937c13094b Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Wed, 11 Dec 2019 08:01:09 +0000 Subject: [PATCH 0154/1278] tools lib api fs: Fix gcc9 stringop-truncation compilation error [ Upstream commit 6794200fa3c9c3e6759dae099145f23e4310f4f7 ] GCC9 introduced string hardening mechanisms, which exhibits the error during fs api compilation: error: '__builtin_strncpy' specified bound 4096 equals destination size [-Werror=stringop-truncation] This comes when the length of copy passed to strncpy is is equal to destination size, which could potentially lead to buffer overflow. There is a need to mitigate this potential issue by limiting the size of destination by 1 and explicitly terminate the destination with NULL. Signed-off-by: Andrey Zhizhikin Reviewed-by: Petr Mladek Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Kefeng Wang Cc: Martin KaFai Lau Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Song Liu Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20191211080109.18765-1-andrey.zhizhikin@leica-geosystems.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/api/fs/fs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index b24afc0e6e81..45b50b89009a 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs) size_t name_len = strlen(fs->name); /* name + "_PATH" + '\0' */ char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); mem_toupper(upper_name, name_len); strcpy(&upper_name[name_len], "_PATH"); @@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; - strncpy(fs->path, override_path, sizeof(fs->path)); + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; return true; } -- GitLab From 51d1d3f948ade28573a2dfddc3d39ac54c53e6e3 Mon Sep 17 00:00:00 2001 From: Dingchen Zhang Date: Mon, 10 Jun 2019 09:47:51 -0400 Subject: [PATCH 0155/1278] drm: remove the newline for CRC source name. [ Upstream commit 72a848f5c46bab4c921edc9cbffd1ab273b2be17 ] userspace may transfer a newline, and this terminating newline is replaced by a '\0' to avoid followup issues. 'len-1' is the index to replace the newline of CRC source name. v3: typo fix (Sam) v2: update patch subject, body and format. (Sam) Cc: Leo Li Cc: Harry Wentland Cc: Sam Ravnborg Signed-off-by: Dingchen Zhang Reviewed-by: Sam Ravnborg Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20190610134751.14356-1-dingchen.zhang@amd.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_debugfs_crc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c index 2901b7944068..6858c80d2eb5 100644 --- a/drivers/gpu/drm/drm_debugfs_crc.c +++ b/drivers/gpu/drm/drm_debugfs_crc.c @@ -101,8 +101,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf, if (IS_ERR(source)) return PTR_ERR(source); - if (source[len] == '\n') - source[len] = '\0'; + if (source[len - 1] == '\n') + source[len - 1] = '\0'; spin_lock_irq(&crc->lock); -- GitLab From cfce607a4f91c1ef1d800b536551e32750e03fd6 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 8 Jan 2020 18:24:16 -0700 Subject: [PATCH 0156/1278] usbip: Fix unsafe unaligned pointer usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 585c91f40d201bc564d4e76b83c05b3b5363fe7e ] Fix unsafe unaligned pointer usage in usbip network interfaces. usbip tool build fails with new gcc -Werror=address-of-packed-member checks. usbip_network.c: In function ‘usbip_net_pack_usb_device’: usbip_network.c:79:32: error: taking address of packed member of ‘struct usbip_usb_device’ may result in an unaligned pointer value [-Werror=address-of-packed-member] 79 | usbip_net_pack_uint32_t(pack, &udev->busnum); Fix with minor changes to pass by value instead of by address. Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20200109012416.2875-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- tools/usb/usbip/src/usbip_network.c | 40 +++++++++++++++++------------ tools/usb/usbip/src/usbip_network.h | 12 +++------ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c index b4c37e76a6e0..187dfaa67d0a 100644 --- a/tools/usb/usbip/src/usbip_network.c +++ b/tools/usb/usbip/src/usbip_network.c @@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg) info("using port %d (\"%s\")", usbip_port, usbip_port_string); } -void usbip_net_pack_uint32_t(int pack, uint32_t *num) +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num) { uint32_t i; if (pack) - i = htonl(*num); + i = htonl(num); else - i = ntohl(*num); + i = ntohl(num); - *num = i; + return i; } -void usbip_net_pack_uint16_t(int pack, uint16_t *num) +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num) { uint16_t i; if (pack) - i = htons(*num); + i = htons(num); else - i = ntohs(*num); + i = ntohs(num); - *num = i; + return i; } void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev) { - usbip_net_pack_uint32_t(pack, &udev->busnum); - usbip_net_pack_uint32_t(pack, &udev->devnum); - usbip_net_pack_uint32_t(pack, &udev->speed); + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum); + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum); + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed); - usbip_net_pack_uint16_t(pack, &udev->idVendor); - usbip_net_pack_uint16_t(pack, &udev->idProduct); - usbip_net_pack_uint16_t(pack, &udev->bcdDevice); + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor); + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct); + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice); } void usbip_net_pack_usb_interface(int pack __attribute__((unused)), @@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen) return usbip_net_xmit(sockfd, buff, bufflen, 1); } +static inline void usbip_net_pack_op_common(int pack, + struct op_common *op_common) +{ + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version); + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code); + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status); +} + int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) { struct op_common op_common; @@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) op_common.code = code; op_common.status = status; - PACK_OP_COMMON(1, &op_common); + usbip_net_pack_op_common(1, &op_common); rc = usbip_net_send(sockfd, &op_common, sizeof(op_common)); if (rc < 0) { @@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code) goto err; } - PACK_OP_COMMON(0, &op_common); + usbip_net_pack_op_common(0, &op_common); if (op_common.version != USBIP_VERSION) { dbg("version mismatch: %d %d", op_common.version, diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h index 7032687621d3..8e8330c0f1c9 100644 --- a/tools/usb/usbip/src/usbip_network.h +++ b/tools/usb/usbip/src/usbip_network.h @@ -34,12 +34,6 @@ struct op_common { } __attribute__((packed)); -#define PACK_OP_COMMON(pack, op_common) do {\ - usbip_net_pack_uint16_t(pack, &(op_common)->version);\ - usbip_net_pack_uint16_t(pack, &(op_common)->code);\ - usbip_net_pack_uint32_t(pack, &(op_common)->status);\ -} while (0) - /* ---------------------------------------------------------------------- */ /* Dummy Code */ #define OP_UNSPEC 0x00 @@ -165,11 +159,11 @@ struct op_devlist_reply_extra { } while (0) #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\ - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\ + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\ } while (0) -void usbip_net_pack_uint32_t(int pack, uint32_t *num); -void usbip_net_pack_uint16_t(int pack, uint16_t *num); +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num); +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num); void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev); void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf); -- GitLab From b48c6936909d230b7818fffa553d91a060f2dfac Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 7 Jan 2020 16:36:49 +0100 Subject: [PATCH 0157/1278] udf: Fix free space reporting for metadata and virtual partitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a4a8b99ec819ca60b49dc582a4287ef03411f117 ] Free space on filesystems with metadata or virtual partition maps currently gets misreported. This is because these partitions are just remapped onto underlying real partitions from which keep track of free blocks. Take this remapping into account when counting free blocks as well. Reviewed-by: Pali Rohár Reported-by: Pali Rohár Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/super.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 242d960df9a1..51de27685e18 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2467,17 +2467,29 @@ static unsigned int udf_count_free_table(struct super_block *sb, static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - struct udf_sb_info *sbi; + struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; + unsigned int part = sbi->s_partition; + int ptype = sbi->s_partmaps[part].s_partition_type; + + if (ptype == UDF_METADATA_MAP25) { + part = sbi->s_partmaps[part].s_type_specific.s_metadata. + s_phys_partition_ref; + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) { + /* + * Filesystems with VAT are append-only and we cannot write to + * them. Let's just report 0 here. + */ + return 0; + } - sbi = UDF_SB(sb); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *) sbi->s_lvid_bh->b_data; - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) { + if (le32_to_cpu(lvid->numOfPartitions) > part) { accum = le32_to_cpu( - lvid->freeSpaceTable[sbi->s_partition]); + lvid->freeSpaceTable[part]); if (accum == 0xFFFFFFFF) accum = 0; } @@ -2486,7 +2498,7 @@ static unsigned int udf_count_free(struct super_block *sb) if (accum) return accum; - map = &sbi->s_partmaps[sbi->s_partition]; + map = &sbi->s_partmaps[part]; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, map->s_uspace.s_bitmap); -- GitLab From b8005f7f3e07bc83e8052e6ae48fec69c8b6e4cc Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 6 Jan 2020 08:42:28 -0500 Subject: [PATCH 0158/1278] IB/hfi1: Add software counter for ctxt0 seq drop [ Upstream commit 5ffd048698ea5139743acd45e8ab388a683642b8 ] All other code paths increment some form of drop counter. This was missed in the original implementation. Fixes: 82c2611daaf0 ("staging/rdma/hfi1: Handle packets with invalid RHF on context 0") Link: https://lore.kernel.org/r/20200106134228.119356.96828.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/chip.c | 10 ++++++++++ drivers/infiniband/hw/hfi1/chip.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 1 + drivers/infiniband/hw/hfi1/hfi.h | 2 ++ 4 files changed, 14 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4a0b7c003477..cb5785dda524 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1686,6 +1686,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4246,6 +4254,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 50b8645d0b87..a88ef2433cea 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -864,6 +864,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 72c836b826ca..7aa1aabb7a43 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -710,6 +710,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 810ef5114772..cf9bc95d8039 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1043,6 +1043,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; -- GitLab From ac6fd7bc483484c408042672f213469ccc335f0d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 18 Dec 2019 21:23:03 +0300 Subject: [PATCH 0159/1278] soc/tegra: fuse: Correct straps' address for older Tegra124 device trees [ Upstream commit 2d9ea1934f8ef0dfb862d103389562cc28b4fc03 ] Trying to read out Chip ID before APBMISC registers are mapped won't succeed, in a result Tegra124 gets a wrong address for the HW straps register if machine uses an old outdated device tree. Fixes: 297c4f3dcbff ("soc/tegra: fuse: Restrict legacy code to 32-bit ARM") Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/soc/tegra/fuse/tegra-apbmisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index 5b18f6ffa45c..cd61c883c19f 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void) apbmisc.flags = IORESOURCE_MEM; /* strapping options */ - if (tegra_get_chip_id() == TEGRA124) { + if (of_machine_is_compatible("nvidia,tegra124")) { straps.start = 0x7000e864; straps.end = 0x7000e867; } else { -- GitLab From 012e9492ef1b3046b57ef2aff689aacbff1b0315 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:46 +0100 Subject: [PATCH 0160/1278] efi/x86: Don't panic or BUG() on non-critical error conditions [ Upstream commit e2d68a955e49d61fd0384f23e92058dc9b79be5e ] The logic in __efi_enter_virtual_mode() does a number of steps in sequence, all of which may fail in one way or the other. In most cases, we simply print an error and disable EFI runtime services support, but in some cases, we BUG() or panic() and bring down the system when encountering conditions that we could easily handle in the same way. While at it, replace a pointless page-to-virt-phys conversion with one that goes straight from struct page to physical. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-14-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi.c | 28 ++++++++++++++-------------- arch/x86/platform/efi/efi_64.c | 9 +++++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5b0275310070..e7f19dec16b9 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -930,16 +930,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -953,8 +951,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -962,12 +959,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -987,9 +983,9 @@ static void __init __efi_enter_virtual_mode(void) } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } /* @@ -1016,6 +1012,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ae369c2bbc3e..0ebb7f94fd51 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -390,11 +390,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); -- GitLab From eb89193c7aacbfbd77e43c952d06bfda30ba019a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Nov 2019 09:42:13 -0800 Subject: [PATCH 0161/1278] rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls [ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ] Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet # For KCSAN Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- include/linux/list_nulls.h | 8 ++++---- include/linux/rculist_nulls.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 3ef96743db8d..1ecd35664e0d 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index a10da545b3f6..cf64a9492256 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -94,10 +94,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** -- GitLab From e623ca6cc396b115cc785a8feb9f4c9e222aea75 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 9 Jan 2020 17:03:21 -0800 Subject: [PATCH 0162/1278] Input: edt-ft5x06 - work around first register access error [ Upstream commit e112324cc0422c046f1cf54c56f333d34fa20885 ] The EP0700MLP1 returns bogus data on the first register read access (reading the threshold parameter from register 0x00): edt_ft5x06 2-0038: crc error: 0xfc expected, got 0x40 It ignores writes until then. This patch adds a dummy read after which the number of sensors and parameter read/writes work correctly. Signed-off-by: Philipp Zabel Signed-off-by: Marco Felsch Tested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/edt-ft5x06.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 5bf63f76ddda..4eff5b44640c 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -888,6 +888,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, { const struct edt_i2c_chip_data *chip_data; struct edt_ft5x06_ts_data *tsdata; + u8 buf[2] = { 0xfc, 0x00 }; struct input_dev *input; unsigned long irq_flags; int error; @@ -957,6 +958,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, return error; } + /* + * Dummy read access. EP0700MLP1 returns bogus data on the first + * register read access and ignores writes. + */ + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf); + edt_ft5x06_ts_set_regs(tsdata); edt_ft5x06_ts_get_defaults(&client->dev, tsdata); edt_ft5x06_ts_get_parameters(tsdata); -- GitLab From 4b08dd2bd613ba002d70f29ca4bece7dad01c60e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 12 Jan 2020 13:04:42 +0100 Subject: [PATCH 0163/1278] wan: ixp4xx_hss: fix compile-testing on 64-bit [ Upstream commit 504c28c853ec5c626900b914b5833daf0581a344 ] Change the driver to use portable integer types to avoid warnings during compile testing: drivers/net/wan/ixp4xx_hss.c:863:21: error: cast to 'u32 *' (aka 'unsigned int *') from smaller integer type 'int' [-Werror,-Wint-to-pointer-cast] memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); ^ drivers/net/wan/ixp4xx_hss.c:979:12: error: incompatible pointer types passing 'u32 *' (aka 'unsigned int *') to parameter of type 'dma_addr_t *' (aka 'unsigned long long *') [-Werror,-Wincompatible-pointer-types] &port->desc_tab_phys))) ^~~~~~~~~~~~~~~~~~~~ include/linux/dmapool.h:27:20: note: passing argument to parameter 'handle' here dma_addr_t *handle); ^ Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wan/ixp4xx_hss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 6a505c26a3e7..a269ed63d90f 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -261,7 +261,7 @@ struct port { struct hss_plat_info *plat; buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; struct desc *desc_tab; /* coherent */ - u32 desc_tab_phys; + dma_addr_t desc_tab_phys; unsigned int id; unsigned int clock_type, clock_rate, loopback; unsigned int initialized, carrier; @@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_dropped++; return NETDEV_TX_OK; } - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4); dev_kfree_skb(skb); #endif -- GitLab From ae34e82fec4df033bec710a285e8af5bdbeead74 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Mon, 13 Jan 2020 21:32:42 +0800 Subject: [PATCH 0164/1278] ASoC: atmel: fix build error with CONFIG_SND_ATMEL_SOC_DMA=m [ Upstream commit 8fea78029f5e6ed734ae1957bef23cfda1af4354 ] If CONFIG_SND_ATMEL_SOC_DMA=m, build error: sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_set_audio': (.text+0x7cd): undefined reference to `atmel_pcm_dma_platform_register' Function atmel_pcm_dma_platform_register is defined under CONFIG SND_ATMEL_SOC_DMA, so select SND_ATMEL_SOC_DMA in CONFIG SND_ATMEL_SOC_SSC, same to CONFIG_SND_ATMEL_SOC_PDC. Reported-by: Hulk Robot Signed-off-by: Chen Zhou Link: https://lore.kernel.org/r/20200113133242.144550-1-chenzhou10@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/atmel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..23887613b5c3 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA config SND_ATMEL_SOC_SSC_DMA tristate + select SND_ATMEL_SOC_DMA + select SND_ATMEL_SOC_PDC config SND_ATMEL_SOC_SSC tristate -- GitLab From b53fe3e57c93ee8a044c1296bbb57aed863b2bff Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 19:47:20 -0700 Subject: [PATCH 0165/1278] tty: synclinkmp: Adjust indentation in several functions [ Upstream commit 1feedf61e7265128244f6993f23421f33dd93dbc ] Clang warns: ../drivers/tty/synclinkmp.c:1456:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (C_CRTSCTS(tty)) { ^ ../drivers/tty/synclinkmp.c:1453:2: note: previous statement is here if (I_IXOFF(tty)) ^ ../drivers/tty/synclinkmp.c:2473:8: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] info->port.tty->hw_stopped = 0; ^ ../drivers/tty/synclinkmp.c:2471:7: note: previous statement is here if ( debug_level >= DEBUG_LEVEL_ISR ) ^ ../drivers/tty/synclinkmp.c:2482:8: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] info->port.tty->hw_stopped = 1; ^ ../drivers/tty/synclinkmp.c:2480:7: note: previous statement is here if ( debug_level >= DEBUG_LEVEL_ISR ) ^ ../drivers/tty/synclinkmp.c:2809:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) ^ ../drivers/tty/synclinkmp.c:2807:2: note: previous statement is here if (I_INPCK(info->port.tty)) ^ ../drivers/tty/synclinkmp.c:3246:3: warning: misleading indentation; statement is not part of the previous 'else' [-Wmisleading-indentation] set_signals(info); ^ ../drivers/tty/synclinkmp.c:3244:2: note: previous statement is here else ^ 5 warnings generated. The indentation on these lines is not at all consistent, tabs and spaces are mixed together. Convert to just using tabs to be consistent with the Linux kernel coding style and eliminate these warnings from clang. Link: https://github.com/ClangBuiltLinux/linux/issues/823 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20191218024720.3528-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/synclinkmp.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index 4fed9e7b281f..3c9e314406b4 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty) if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty) send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2484,7 +2484,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (status & SerialSignal_CTS) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx start..."); - info->port.tty->hw_stopped = 0; + info->port.tty->hw_stopped = 0; tx_start(info); info->pending_bh |= BH_TRANSMIT; return; @@ -2493,7 +2493,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (!(status & SerialSignal_CTS)) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx stop..."); - info->port.tty->hw_stopped = 1; + info->port.tty->hw_stopped = 1; tx_stop(info); } } @@ -2820,8 +2820,8 @@ static void change_params(SLMP_INFO *info) info->read_status_mask2 = OVRN; if (I_INPCK(info->port.tty)) info->read_status_mask2 |= PE | FRME; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask1 |= BRKD; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask1 |= BRKD; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask2 |= PE | FRME; if (I_IGNBRK(info->port.tty)) { @@ -3191,7 +3191,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) | @@ -3229,7 +3229,7 @@ static int tiocmset(struct tty_struct *tty, info->serial_signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; @@ -3241,7 +3241,7 @@ static int carrier_raised(struct tty_port *port) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; @@ -3257,7 +3257,7 @@ static void dtr_rts(struct tty_port *port, int on) info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR; else info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } -- GitLab From 86663c7bce98475a43a1c64df48c67f599e89a88 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 19:39:13 -0700 Subject: [PATCH 0166/1278] tty: synclink_gt: Adjust indentation in several functions [ Upstream commit 446e76873b5e4e70bdee5db2f2a894d5b4a7d081 ] Clang warns: ../drivers/tty/synclink_gt.c:1337:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (C_CRTSCTS(tty)) { ^ ../drivers/tty/synclink_gt.c:1335:2: note: previous statement is here if (I_IXOFF(tty)) ^ ../drivers/tty/synclink_gt.c:2563:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) ^ ../drivers/tty/synclink_gt.c:2561:2: note: previous statement is here if (I_INPCK(info->port.tty)) ^ ../drivers/tty/synclink_gt.c:3221:3: warning: misleading indentation; statement is not part of the previous 'else' [-Wmisleading-indentation] set_signals(info); ^ ../drivers/tty/synclink_gt.c:3219:2: note: previous statement is here else ^ 3 warnings generated. The indentation on these lines is not at all consistent, tabs and spaces are mixed together. Convert to just using tabs to be consistent with the Linux kernel coding style and eliminate these warnings from clang. Link: https://github.com/ClangBuiltLinux/linux/issues/822 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20191218023912.13827-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/synclink_gt.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 344e8c427c7e..9d68f89a2bf8 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1349,10 +1349,10 @@ static void throttle(struct tty_struct * tty) DBGINFO(("%s throttle\n", info->device_name)); if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1374,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty) else send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2575,8 +2575,8 @@ static void change_params(struct slgt_info *info) info->read_status_mask = IRQ_RXOVER; if (I_INPCK(info->port.tty)) info->read_status_mask |= MASK_PARITY | MASK_FRAMING; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask |= MASK_BREAK; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask |= MASK_BREAK; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; if (I_IGNBRK(info->port.tty)) { @@ -3207,7 +3207,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3218,7 +3218,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3233,7 +3233,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } -- GitLab From 062cfcd86078bc85220aa77385f0317110ad3062 Mon Sep 17 00:00:00 2001 From: Simon Schwartz Date: Tue, 10 Dec 2019 17:41:37 -0500 Subject: [PATCH 0167/1278] driver core: platform: Prevent resouce overflow from causing infinite loops [ Upstream commit 39cc539f90d035a293240c9443af50be55ee81b8 ] num_resources in the platform_device struct is declared as a u32. The for loops that iterate over num_resources use an int as the counter, which can cause infinite loops on architectures with smaller ints. Change the loop counters to u32. Signed-off-by: Simon Schwartz Link: https://lore.kernel.org/r/2201ce63a2a171ffd2ed14e867875316efcf71db.camel@theschwartz.xyz Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/platform.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index f1105de0d9fe..e3d40c41c33b 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -68,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev) struct resource *platform_get_resource(struct platform_device *dev, unsigned int type, unsigned int num) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -163,7 +164,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev, unsigned int type, const char *name) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -360,7 +361,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties); */ int platform_device_add(struct platform_device *pdev) { - int i, ret; + u32 i; + int ret; if (!pdev) return -EINVAL; @@ -447,7 +449,7 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - int i; + u32 i; if (pdev) { device_remove_properties(&pdev->dev); -- GitLab From 6439e61a69d12624e82ded29ac2693d768261412 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 6 Dec 2019 14:22:19 +0100 Subject: [PATCH 0168/1278] driver core: Print device when resources present in really_probe() [ Upstream commit 7c35e699c88bd60734277b26962783c60e04b494 ] If a device already has devres items attached before probing, a warning backtrace is printed. However, this backtrace does not reveal the offending device, leaving the user uninformed. Furthermore, using WARN_ON() causes systems with panic-on-warn to reboot. Fix this by replacing the WARN_ON() by a dev_crit() message. Abort probing the device, to prevent doing more damage to the device's resources. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191206132219.28908-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/dd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 536c9ac3b848..aa1a2d32360f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -375,7 +375,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); - WARN_ON(!list_empty(&dev->devres_head)); + if (!list_empty(&dev->devres_head)) { + dev_crit(dev, "Resources present before probing\n"); + return -EBUSY; + } re_probe: dev->driver = drv; -- GitLab From 8f8d6aebe2d84c54e143c490b56a60f7e6832fe3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 21:05:43 +0100 Subject: [PATCH 0169/1278] vme: bridges: reduce stack usage [ Upstream commit 7483e7a939c074d887450ef1c4d9ccc5909405f8 ] With CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3, the stack usage in vme_fake grows above the warning limit: drivers/vme/bridges/vme_fake.c: In function 'fake_master_read': drivers/vme/bridges/vme_fake.c:610:1: error: the frame size of 1160 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] drivers/vme/bridges/vme_fake.c: In function 'fake_master_write': drivers/vme/bridges/vme_fake.c:797:1: error: the frame size of 1160 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] The problem is that in some configurations, each call to fake_vmereadX() puts another variable on the stack. Reduce the amount of inlining to get back to the previous state, with no function using more than 200 bytes each. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200107200610.3482901-1-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/vme/bridges/vme_fake.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c index 30b3acc93833..e81ec763b555 100644 --- a/drivers/vme/bridges/vme_fake.c +++ b/drivers/vme/bridges/vme_fake.c @@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr, } } -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u8 retval = 0xff; int i; @@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u16 retval = 0xffff; int i; @@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u32 retval = 0xffffffff; int i; @@ -613,8 +616,9 @@ static ssize_t fake_master_read(struct vme_master_resource *image, void *buf, return retval; } -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge, + u8 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, } -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge, + u16 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, } -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge, + u32 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; -- GitLab From f964992fdb8710beb3de984ec8c6a8e7670e3a9e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Jan 2020 08:46:01 +0300 Subject: [PATCH 0170/1278] drm/nouveau/secboot/gm20b: initialize pointer in gm20b_secboot_new() [ Upstream commit 3613a9bea95a1470dd42e4ed1cc7d86ebe0a2dc0 ] We accidentally set "psb" which is a no-op instead of "*psb" so it generates a static checker warning. We should probably set it before the first error return so that it's always initialized. Fixes: 923f1bd27bf1 ("drm/nouveau/secboot/gm20b: add secure boot support") Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c index 30491d132d59..fbd10a67c6c6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c @@ -108,6 +108,7 @@ gm20b_secboot_new(struct nvkm_device *device, int index, struct gm200_secboot *gsb; struct nvkm_acr *acr; + *psb = NULL; acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) | BIT(NVKM_SECBOOT_FALCON_PMU)); if (IS_ERR(acr)) @@ -116,10 +117,8 @@ gm20b_secboot_new(struct nvkm_device *device, int index, acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU); gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); - if (!gsb) { - psb = NULL; + if (!gsb) return -ENOMEM; - } *psb = &gsb->base; ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base); -- GitLab From fefca6abc571a5dcb7704a70e87896584d06830d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 9 Jan 2020 11:46:15 +1000 Subject: [PATCH 0171/1278] drm/nouveau/gr/gk20a,gm200-: add terminators to method lists read from fw [ Upstream commit 7adc77aa0e11f25b0e762859219c70852cd8d56f ] Method init is typically ordered by class in the FW image as ThreeD, TwoD, Compute. Due to a bug in parsing the FW into our internal format, we've been accidentally sending Twod + Compute methods to the ThreeD class, as well as Compute methods to the TwoD class - oops. Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- .../gpu/drm/nouveau/nvkm/engine/gr/gk20a.c | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index de8b806b88fd..7618b2eb4fdf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, nent = (fuc.size / sizeof(struct gk20a_fw_av)); - pack = vzalloc((sizeof(*pack) * max_classes) + - (sizeof(*init) * (nent + 1))); + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) + + (sizeof(*init) * (nent + max_classes + 1))); if (!pack) { ret = -ENOMEM; goto end; } - init = (void *)(pack + max_classes); + init = (void *)(pack + max_classes + 1); - for (i = 0; i < nent; i++) { - struct gf100_gr_init *ent = &init[i]; + for (i = 0; i < nent; i++, init++) { struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i]; u32 class = av->addr & 0xffff; u32 addr = (av->addr & 0xffff0000) >> 14; if (prevclass != class) { - pack[classidx].init = ent; + if (prevclass) /* Add terminator to the method list. */ + init++; + pack[classidx].init = init; pack[classidx].type = class; prevclass = class; if (++classidx >= max_classes) { @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, } } - ent->addr = addr; - ent->data = av->data; - ent->count = 1; - ent->pitch = 1; + init->addr = addr; + init->data = av->data; + init->count = 1; + init->pitch = 1; } *ppack = pack; -- GitLab From bf24b7d69aead3fbe5131204a78a06787e7a2f0b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 10 Jan 2020 14:32:01 +0800 Subject: [PATCH 0172/1278] drm/nouveau: Fix copy-paste error in nouveau_fence_wait_uevent_handler [ Upstream commit 1eb013473bff5f95b6fe1ca4dd7deda47257b9c2 ] Like other cases, it should use rcu protected 'chan' rather than 'fence->channel' in nouveau_fence_wait_uevent_handler. Fixes: 0ec5f02f0e2c ("drm/nouveau: prevent stale fence->channel pointers, and protect with rcu") Signed-off-by: YueHaibing Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99e14e3e0fe4..72532539369f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify) fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (nouveau_fence_update(fence->channel, fctx)) + if (nouveau_fence_update(chan, fctx)) ret = NVIF_NOTIFY_DROP; } spin_unlock_irqrestore(&fctx->lock, flags); -- GitLab From e7f9d07dcc424b132f47dbc835c090ff7c4449ff Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 23:37:58 -0500 Subject: [PATCH 0173/1278] drm/vmwgfx: prevent memory leak in vmw_cmdbuf_res_add [ Upstream commit 40efb09a7f53125719e49864da008495e39aaa1e ] In vmw_cmdbuf_res_add if drm_ht_insert_item fails the allocated memory for cres should be released. Fixes: 18e4a4669c50 ("drm/vmwgfx: Fix compat shader namespace") Signed-off-by: Navid Emamdoost Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 36c7b6c839c0..738ad2fc79a2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, cres->hash.key = user_key | (res_type << 24); ret = drm_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { + kfree(cres); goto out_invalid_key; + } cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); -- GitLab From 66c863f64b7eb4d0e8d958e4013ab68e420925c0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 15 Jan 2020 07:25:26 -0600 Subject: [PATCH 0174/1278] usb: musb: omap2430: Get rid of musb .set_vbus for omap2430 glue [ Upstream commit 91b6dec32e5c25fbdbb564d1e5af23764ec17ef1 ] We currently have musb_set_vbus() called from two different paths. Mostly it gets called from the USB PHY via omap_musb_set_mailbox(), but in some cases it can get also called from musb_stage0_irq() rather via .set_vbus: (musb_set_host [musb_hdrc]) (omap2430_musb_set_vbus [omap2430]) (musb_stage0_irq [musb_hdrc]) (musb_interrupt [musb_hdrc]) (omap2430_musb_interrupt [omap2430]) This is racy and will not work with introducing generic helper functions for musb_set_host() and musb_set_peripheral(). We want to get rid of the busy loops in favor of usleep_range(). Let's just get rid of .set_vbus for omap2430 glue layer and let the PHY code handle VBUS with musb_set_vbus(). Note that in the follow-up patch we can completely remove omap2430_musb_set_vbus(), but let's do it in a separate patch as this change may actually turn out to be needed as a fix. Reported-by: Pavel Machek Acked-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Link: https://lore.kernel.org/r/20200115132547.364-5-b-liu@ti.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/musb/omap2430.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 456f3e6ecf03..26e69c2766f5 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = { .init = omap2430_musb_init, .exit = omap2430_musb_exit, - .set_vbus = omap2430_musb_set_vbus, - .enable = omap2430_musb_enable, .disable = omap2430_musb_disable, -- GitLab From af9f7ff24e89689390a79b38df195b3909f42939 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Jan 2020 15:21:47 +0000 Subject: [PATCH 0175/1278] iommu/arm-smmu-v3: Use WRITE_ONCE() when changing validity of an STE [ Upstream commit d71e01716b3606a6648df7e5646ae12c75babde4 ] If, for some bizarre reason, the compiler decided to split up the write of STE DWORD 0, we could end up making a partial structure valid. Although this probably won't happen, follow the example of the context-descriptor code and use WRITE_ONCE() to ensure atomicity of the write. Reported-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm-smmu-v3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 09eb258a9a7d..29feafa8007f 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1145,7 +1145,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ -- GitLab From 7d36bf199518818956c4369e16337ea0aa3c423a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 Dec 2019 18:32:16 -0800 Subject: [PATCH 0176/1278] f2fs: free sysfs kobject [ Upstream commit 820d366736c949ffe698d3b3fe1266a91da1766d ] Detected kmemleak. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 93af9d7dfcdc..79e45e760c20 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -557,4 +557,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); } -- GitLab From 192f211d6f8460d6a8c15316ace784d61f0339a8 Mon Sep 17 00:00:00 2001 From: Nick Black Date: Thu, 26 Dec 2019 15:31:48 -0500 Subject: [PATCH 0177/1278] scsi: iscsi: Don't destroy session if there are outstanding connections [ Upstream commit 54155ed4199c7aa3fd20866648024ab63c96d579 ] A faulty userspace that calls destroy_session() before destroying the connections can trigger the failure. This patch prevents the issue by refusing to destroy the session if there are outstanding connections. ------------[ cut here ]------------ kernel BUG at mm/slub.c:306! invalid opcode: 0000 [#1] SMP PTI CPU: 1 PID: 1224 Comm: iscsid Not tainted 5.4.0-rc2.iscsi+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:__slab_free+0x181/0x350 [...] [ 1209.686056] RSP: 0018:ffffa93d4074fae0 EFLAGS: 00010246 [ 1209.686694] RAX: ffff934efa5ad800 RBX: 000000008010000a RCX: ffff934efa5ad800 [ 1209.687651] RDX: ffff934efa5ad800 RSI: ffffeb4041e96b00 RDI: ffff934efd402c40 [ 1209.688582] RBP: ffffa93d4074fb80 R08: 0000000000000001 R09: ffffffffbb5dfa26 [ 1209.689425] R10: ffff934efa5ad800 R11: 0000000000000001 R12: ffffeb4041e96b00 [ 1209.690285] R13: ffff934efa5ad800 R14: ffff934efd402c40 R15: 0000000000000000 [ 1209.691213] FS: 00007f7945dfb540(0000) GS:ffff934efda80000(0000) knlGS:0000000000000000 [ 1209.692316] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1209.693013] CR2: 000055877fd3da80 CR3: 0000000077384000 CR4: 00000000000006e0 [ 1209.693897] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1209.694773] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1209.695631] Call Trace: [ 1209.695957] ? __wake_up_common_lock+0x8a/0xc0 [ 1209.696712] iscsi_pool_free+0x26/0x40 [ 1209.697263] iscsi_session_teardown+0x2f/0xf0 [ 1209.698117] iscsi_sw_tcp_session_destroy+0x45/0x60 [ 1209.698831] iscsi_if_rx+0xd88/0x14e0 [ 1209.699370] netlink_unicast+0x16f/0x200 [ 1209.699932] netlink_sendmsg+0x21a/0x3e0 [ 1209.700446] sock_sendmsg+0x4f/0x60 [ 1209.700902] ___sys_sendmsg+0x2ae/0x320 [ 1209.701451] ? cp_new_stat+0x150/0x180 [ 1209.701922] __sys_sendmsg+0x59/0xa0 [ 1209.702357] do_syscall_64+0x52/0x160 [ 1209.702812] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1209.703419] RIP: 0033:0x7f7946433914 [...] [ 1209.706084] RSP: 002b:00007fffb99f2378 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 1209.706994] RAX: ffffffffffffffda RBX: 000055bc869eac20 RCX: 00007f7946433914 [ 1209.708082] RDX: 0000000000000000 RSI: 00007fffb99f2390 RDI: 0000000000000005 [ 1209.709120] RBP: 00007fffb99f2390 R08: 000055bc84fe9320 R09: 00007fffb99f1f07 [ 1209.710110] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000038 [ 1209.711085] R13: 000055bc8502306e R14: 0000000000000000 R15: 0000000000000000 Modules linked in: ---[ end trace a2d933ede7f730d8 ]--- Link: https://lore.kernel.org/r/20191226203148.2172200-1-krisman@collabora.com Signed-off-by: Nick Black Co-developed-by: Salman Qazi Signed-off-by: Salman Qazi Co-developed-by: Junho Ryu Signed-off-by: Junho Ryu Co-developed-by: Khazhismel Kumykov Signed-off-by: Khazhismel Kumykov Co-developed-by: Gabriel Krisman Bertazi Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/iscsi_tcp.c | 4 ++++ drivers/scsi/scsi_transport_iscsi.c | 26 +++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 7e3a77d3c6f0..e3ca16043f9a 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -890,6 +890,10 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + struct iscsi_session *session = cls_session->dd_data; + + if (WARN_ON_ONCE(session->leadconn)) + return; iscsi_tcp_r2tpool_free(cls_session->dd_data); iscsi_session_teardown(cls_session); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 95d71e301a53..aecb563a2b4e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2945,6 +2945,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_session_has_conns(int sid) +{ + struct iscsi_cls_conn *conn; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&connlock, flags); + list_for_each_entry(conn, &connlist, conn_list) { + if (iscsi_conn_get_sid(conn) == sid) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&connlock, flags); + + return found; +} + static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) @@ -3522,10 +3540,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); - if (session) - transport->destroy_session(session); - else + if (!session) err = -EINVAL; + else if (iscsi_session_has_conns(ev->u.d_session.sid)) + err = -EBUSY; + else + transport->destroy_session(session); break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); -- GitLab From 556b80a2b57acc5289367436251ae72193746021 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 31 Oct 2019 12:46:52 -0700 Subject: [PATCH 0178/1278] arm64: fix alternatives with LLVM's integrated assembler [ Upstream commit c54f90c2627cc316d365e3073614731e17dbc631 ] LLVM's integrated assembler fails with the following error when building KVM: :12:6: error: expected absolute expression .if kvm_update_va_mask == 0 ^ :21:6: error: expected absolute expression .if kvm_update_va_mask == 0 ^ :24:2: error: unrecognized instruction mnemonic NOT_AN_INSTRUCTION ^ LLVM ERROR: Error parsing inline asm These errors come from ALTERNATIVE_CB and __ALTERNATIVE_CFG, which test for the existence of the callback parameter in inline assembly using the following expression: " .if " __stringify(cb) " == 0\n" This works with GNU as, but isn't supported by LLVM. This change splits __ALTERNATIVE_CFG and ALTINSTR_ENTRY into separate macros to fix the LLVM build. Link: https://github.com/ClangBuiltLinux/linux/issues/472 Signed-off-by: Sami Tolvanen Tested-by: Nick Desaulniers Reviewed-by: Kees Cook Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/alternative.h | 32 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a91933b1e2e6..4cd4a793dc32 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -30,13 +30,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -57,15 +60,14 @@ void apply_alternatives(void *start, size_t length); * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -73,17 +75,25 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include -- GitLab From 434f42546a6eda220f53b489a377a34f35f542d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jan 2020 19:17:02 +0100 Subject: [PATCH 0179/1278] watchdog/softlockup: Enforce that timestamp is valid on boot [ Upstream commit 11e31f608b499f044f24b20be73f1dcab3e43f8a ] Robert reported that during boot the watchdog timestamp is set to 0 for one second which is the indicator for a watchdog reset. The reason for this is that the timestamp is in seconds and the time is taken from sched clock and divided by ~1e9. sched clock starts at 0 which means that for the first second during boot the watchdog timestamp is 0, i.e. reset. Use ULONG_MAX as the reset indicator value so the watchdog works correctly right from the start. ULONG_MAX would only conflict with a real timestamp if the system reaches an uptime of 136 years on 32bit and almost eternity on 64bit. Reported-by: Robert Richter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87o8v3uuzl.fsf@nanos.tec.linutronix.de Signed-off-by: Sasha Levin --- kernel/watchdog.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 087994b23f8b..e4db5d54c07c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -164,6 +164,8 @@ static void lockup_detector_update_enable(void) #ifdef CONFIG_SOFTLOCKUP_DETECTOR +#define SOFTLOCKUP_RESET ULONG_MAX + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -271,7 +273,7 @@ notrace void touch_softlockup_watchdog_sched(void) * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ - raw_cpu_write(watchdog_touch_ts, 0); + raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } notrace void touch_softlockup_watchdog(void) @@ -295,14 +297,14 @@ void touch_all_softlockup_watchdogs(void) * the softlockup check. */ for_each_cpu(cpu, &watchdog_allowed_mask) - per_cpu(watchdog_touch_ts, cpu) = 0; + per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET; wq_watchdog_touch(-1); } void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); - __this_cpu_write(watchdog_touch_ts, 0); + __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } static int is_softlockup(unsigned long touch_ts) @@ -354,7 +356,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - if (touch_ts == 0) { + if (touch_ts == SOFTLOCKUP_RESET) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically -- GitLab From 981bc763712ca0a42ec7323d974d07cea0ee6eff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Dec 2019 17:41:41 +0800 Subject: [PATCH 0180/1278] f2fs: fix memleak of kobject [ Upstream commit fe396ad8e7526f059f7b8c7290d33a1b84adacab ] If kobject_init_and_add() failed, caller needs to invoke kobject_put() to release kobject explicitly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/sysfs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 79e45e760c20..a55919eec035 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -507,10 +507,12 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); - else + } else { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + } return ret; } @@ -531,8 +533,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); -- GitLab From 9e09e071f7e0e75546071c5e1c1392641b8ab900 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:36 +0100 Subject: [PATCH 0181/1278] x86/mm: Fix NX bit clearing issue in kernel_map_pages_in_pgd [ Upstream commit 75fbef0a8b6b4bb19b9a91b5214f846c2dc5139e ] The following commit: 15f003d20782 ("x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd()") modified kernel_map_pages_in_pgd() to manage writable permissions of memory mappings in the EFI page table in a different way, but in the process, it removed the ability to clear NX attributes from read-only mappings, by clobbering the clear mask if _PAGE_RW is not being requested. Failure to remove the NX attribute from read-only mappings is unlikely to be a security issue, but it does prevent us from tightening the permissions in the EFI page tables going forward, so let's fix it now. Fixes: 15f003d20782 ("x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd() Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-5-ardb@kernel.org Signed-off-by: Sasha Levin --- arch/x86/mm/pageattr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 835620ab435f..eaee1a7ed0b5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2077,19 +2077,13 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); -- GitLab From 01289476f55d716dc705a50e5fd6296c73342912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 11 Nov 2019 10:03:54 +0100 Subject: [PATCH 0182/1278] pwm: omap-dmtimer: Remove PWM chip in .remove before making it unfunctional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 43efdc8f0e6d7088ec61bd55a73bf853f002d043 ] In the old code (e.g.) mutex_destroy() was called before pwmchip_remove(). Between these two calls it is possible that a PWM callback is used which tries to grab the mutex. Fixes: 6604c6556db9 ("pwm: Add PWM driver for OMAP using dual-mode timers") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-omap-dmtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 5ad42f33e70c..2e15acf13893 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) static int pwm_omap_dmtimer_remove(struct platform_device *pdev) { struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&omap->chip); + if (ret) + return ret; if (pm_runtime_active(&omap->dm_timer_pdev->dev)) omap->pdata->stop(omap->dm_timer); @@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev) mutex_destroy(&omap->mutex); - return pwmchip_remove(&omap->chip); + return 0; } static const struct of_device_id pwm_omap_dmtimer_of_match[] = { -- GitLab From 8f80d62a385ad6e44c3702f489bb3f7ace779f30 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Jan 2020 16:04:41 +0300 Subject: [PATCH 0183/1278] cmd64x: potential buffer overflow in cmd64x_program_timings() [ Upstream commit 117fcc3053606d8db5cef8821dca15022ae578bb ] The "drive->dn" value is a u8 and it is controlled by root only, but it could be out of bounds here so let's check. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ide/cmd64x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index b127ed60c733..9dde8390da09 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode) struct ide_timing t; u8 arttim = 0; + if (drive->dn >= ARRAY_SIZE(drwtim_regs)) + return; + ide_timing_compute(drive, mode, &t, T, 0); /* -- GitLab From 3c353d76495fced50a0ab75ed3720353013a211e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Jan 2020 16:06:07 +0300 Subject: [PATCH 0184/1278] ide: serverworks: potential overflow in svwks_set_pio_mode() [ Upstream commit ce1f31b4c0b9551dd51874dd5364654ed4ca13ae ] The "drive->dn" variable is a u8 controlled by root. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ide/serverworks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index a97affca18ab..0f57d45484d1 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) struct pci_dev *dev = to_pci_dev(hwif->dev); const u8 pio = drive->pio_mode - XFER_PIO_0; + if (drive->dn >= ARRAY_SIZE(drive_pci)) + return; + pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]); if (svwks_csb_check(dev)) { @@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0; + if (drive->dn >= ARRAY_SIZE(drive_pci2)) + return; + pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); pci_read_config_byte(dev, 0x54, &ultra_enable); -- GitLab From 8e1b70446107558ebfbae42761da003fed6f6793 Mon Sep 17 00:00:00 2001 From: yu kuai Date: Mon, 20 Jan 2020 19:51:43 +0800 Subject: [PATCH 0185/1278] pwm: Remove set but not set variable 'pwm' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9871abffc81048e20f02e15d6aa4558a44ad53ea ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/pwm/pwm-pca9685.c: In function ‘pca9685_pwm_gpio_free’: drivers/pwm/pwm-pca9685.c:162:21: warning: variable ‘pwm’ set but not used [-Wunused-but-set-variable] It is never used, and so can be removed. In that case, hold and release the lock 'pca->lock' can be removed since nothing will be done between them. Fixes: e926b12c611c ("pwm: Clear chip_data in pwm_put()") Signed-off-by: yu kuai Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-pca9685.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 567f5e2771c4..e1e5dfcb16f3 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -170,13 +170,9 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset, static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); - mutex_lock(&pca->lock); - pwm = &pca->chip.pwms[offset]; - mutex_unlock(&pca->lock); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, -- GitLab From ad915b3d80144a437dd3c08491a72e0bb4154a2a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 5 Dec 2019 14:19:57 +0100 Subject: [PATCH 0186/1278] btrfs: fix possible NULL-pointer dereference in integrity checks [ Upstream commit 3dbd351df42109902fbcebf27104149226a4fcd9 ] A user reports a possible NULL-pointer dereference in btrfsic_process_superblock(). We are assigning state->fs_info to a local fs_info variable and afterwards checking for the presence of state. While we would BUG_ON() a NULL state anyways, we can also just remove the local fs_info copy, as fs_info is only used once as the first argument for btrfs_num_copies(). There we can just pass in state->fs_info as well. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=205003 Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/check-integrity.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7d5a9b51f0d7..4be07cf31d74 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -642,7 +642,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_info *fs_info = state->fs_info; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -713,7 +712,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, break; } - num_copies = btrfs_num_copies(fs_info, next_bytenr, + num_copies = btrfs_num_copies(state->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) pr_info("num_copies(log_bytenr=%llu) = %d\n", -- GitLab From 76d4e6aeac0b7cb830f72780e89b96ba3052118f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Jan 2020 15:29:53 +0100 Subject: [PATCH 0187/1278] btrfs: safely advance counter when looking up bio csums [ Upstream commit 4babad10198fa73fe73239d02c2e99e3333f5f5c ] Dan's smatch tool reports fs/btrfs/file-item.c:295 btrfs_lookup_bio_sums() warn: should this be 'count == -1' which points to the while (count--) loop. With count == 0 the check itself could decrement it to -1. There's a WARN_ON a few lines below that has never been seen in practice though. It turns out that the value of page_bytes_left matches the count (by sectorsize multiples). The loop never reaches the state where count would go to -1, because page_bytes_left == 0 is found first and this breaks out. For clarity, use only plain check on count (and only for positive value), decrement safely inside the loop. Any other discrepancy after the whole bio list processing should be reported by the exising WARN_ON_ONCE as well. Reported-by: Dan Carpenter Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/file-item.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 702b3606ad0e..717d82d51bb1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -288,7 +288,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio csum += count * csum_size; nblocks -= count; next: - while (count--) { + while (count > 0) { + count--; disk_bytenr += fs_info->sectorsize; offset += fs_info->sectorsize; page_bytes_left -= fs_info->sectorsize; -- GitLab From 36bd3298141ab084e6d68f6f54d8eefd80381fb0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 10 Jan 2020 12:26:34 +0800 Subject: [PATCH 0188/1278] btrfs: device stats, log when stats are zeroed [ Upstream commit a69976bc69308aa475d0ba3b8b3efd1d013c0460 ] We had a report indicating that some read errors aren't reported by the device stats in the userland. It is important to have the errors reported in the device stat as user land scripts might depend on it to take the reasonable corrective actions. But to debug these issue we need to be really sure that request to reset the device stat did not come from the userland itself. So log an info message when device error reset happens. For example: BTRFS info (device sdc): device stats zeroed by btrfs(9223) Reported-by: philip@philip-seeger.de Link: https://www.spinics.net/lists/linux-btrfs/msg96528.html Reviewed-by: Josef Bacik Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 358e930df4ac..6d34842912e8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7227,6 +7227,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, else btrfs_dev_stat_reset(dev, i); } + btrfs_info(fs_info, "device stats zeroed by %s (%d)", + current->comm, task_pid_nr(current)); } else { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) if (stats->nr_items > i) -- GitLab From e8e2344aa493382890e2f9fd3bff343b7116aaa2 Mon Sep 17 00:00:00 2001 From: Brandon Maier Date: Thu, 30 May 2019 17:52:23 -0500 Subject: [PATCH 0189/1278] remoteproc: Initialize rproc_class before use [ Upstream commit a8f40111d184098cd2b3dc0c7170c42250a5fa09 ] The remoteproc_core and remoteproc drivers all initialize with module_init(). However remoteproc drivers need the rproc_class during their probe. If one of the remoteproc drivers runs init and gets through probe before remoteproc_init() runs, a NULL pointer access of rproc_class's `glue_dirs` spinlock occurs. > Unable to handle kernel NULL pointer dereference at virtual address 000000dc > pgd = c0004000 > [000000dc] *pgd=00000000 > Internal error: Oops: 5 [#1] PREEMPT ARM > Modules linked in: > CPU: 0 PID: 1 Comm: swapper Tainted: G W 4.14.106-rt56 #1 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > task: c6050000 task.stack: c604a000 > PC is at rt_spin_lock+0x40/0x6c > LR is at rt_spin_lock+0x28/0x6c > pc : [] lr : [] psr: 60000013 > sp : c604bdc0 ip : 00000000 fp : 00000000 > r10: 00000000 r9 : c61c7c10 r8 : c6269c20 > r7 : c0905888 r6 : c6269c20 r5 : 00000000 r4 : 000000d4 > r3 : 000000dc r2 : c6050000 r1 : 00000002 r0 : 000000d4 > Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none ... > [] (rt_spin_lock) from [] (get_device_parent+0x54/0x17c) > [] (get_device_parent) from [] (device_add+0xe0/0x5b4) > [] (device_add) from [] (rproc_add+0x18/0xd8) > [] (rproc_add) from [] (my_rproc_probe+0x158/0x204) > [] (my_rproc_probe) from [] (platform_drv_probe+0x34/0x70) > [] (platform_drv_probe) from [] (driver_probe_device+0x2c8/0x420) > [] (driver_probe_device) from [] (__driver_attach+0x100/0x11c) > [] (__driver_attach) from [] (bus_for_each_dev+0x7c/0xc0) > [] (bus_for_each_dev) from [] (bus_add_driver+0x1cc/0x264) > [] (bus_add_driver) from [] (driver_register+0x78/0xf8) > [] (driver_register) from [] (do_one_initcall+0x100/0x190) > [] (do_one_initcall) from [] (kernel_init_freeable+0x130/0x1d0) > [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) > [] (kernel_init) from [] (ret_from_fork+0x14/0x24) > Code: e2843008 e3c2203f f5d3f000 e5922010 (e193cf9f) > ---[ end trace 0000000000000002 ]--- Signed-off-by: Brandon Maier Link: https://lore.kernel.org/r/20190530225223.136420-1-brandon.maier@rockwellcollins.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/remoteproc/remoteproc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index eab14b414bf0..cc733b89560a 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1620,7 +1620,7 @@ static int __init remoteproc_init(void) return 0; } -module_init(remoteproc_init); +subsys_initcall(remoteproc_init); static void __exit remoteproc_exit(void) { -- GitLab From fb065f7bc0d378dda1b5e98d8afb22ccfc07e0aa Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 17 Jan 2020 01:38:43 +0800 Subject: [PATCH 0190/1278] irqchip/mbigen: Set driver .suppress_bind_attrs to avoid remove problems [ Upstream commit d6152e6ec9e2171280436f7b31a571509b9287e1 ] The following crash can be seen for setting CONFIG_DEBUG_TEST_DRIVER_REMOVE=y for DT FW (which some people still use): Hisilicon MBIGEN-V2 60080000.interrupt-controller: Failed to create mbi-gen irqdomain Hisilicon MBIGEN-V2: probe of 60080000.interrupt-controller failed with error -12 [...] Unable to handle kernel paging request at virtual address 0000000000005008 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000041fb9990000 [0000000000005008] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 7 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc6-00002-g3fc42638a506-dirty #1622 Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT21 Nemo 2.0 RC0 04/18/2018 pstate: 40000085 (nZcv daIf -PAN -UAO) pc : mbigen_set_type+0x38/0x60 lr : __irq_set_trigger+0x6c/0x188 sp : ffff800014b4b400 x29: ffff800014b4b400 x28: 0000000000000007 x27: 0000000000000000 x26: 0000000000000000 x25: ffff041fd83bd0d4 x24: ffff041fd83bd188 x23: 0000000000000000 x22: ffff80001193ce00 x21: 0000000000000004 x20: 0000000000000000 x19: ffff041fd83bd000 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000119098c8 x14: ffff041fb94ec91c x13: ffff041fb94ec1a1 x12: 0000000000000030 x11: 0101010101010101 x10: 0000000000000040 x9 : 0000000000000000 x8 : ffff041fb98c6680 x7 : ffff800014b4b380 x6 : ffff041fd81636c8 x5 : 0000000000000000 x4 : 000000000000025f x3 : 0000000000005000 x2 : 0000000000005008 x1 : 0000000000000004 x0 : 0000000080000000 Call trace: mbigen_set_type+0x38/0x60 __setup_irq+0x744/0x900 request_threaded_irq+0xe0/0x198 pcie_pme_probe+0x98/0x118 pcie_port_probe_service+0x38/0x78 really_probe+0xa0/0x3e0 driver_probe_device+0x58/0x100 __device_attach_driver+0x90/0xb0 bus_for_each_drv+0x64/0xc8 __device_attach+0xd8/0x138 device_initial_probe+0x10/0x18 bus_probe_device+0x90/0x98 device_add+0x4c4/0x770 device_register+0x1c/0x28 pcie_port_device_register+0x1e4/0x4f0 pcie_portdrv_probe+0x34/0xd8 local_pci_probe+0x3c/0xa0 pci_device_probe+0x128/0x1c0 really_probe+0xa0/0x3e0 driver_probe_device+0x58/0x100 __device_attach_driver+0x90/0xb0 bus_for_each_drv+0x64/0xc8 __device_attach+0xd8/0x138 device_attach+0x10/0x18 pci_bus_add_device+0x4c/0xb8 pci_bus_add_devices+0x38/0x88 pci_host_probe+0x3c/0xc0 pci_host_common_probe+0xf0/0x208 hisi_pcie_almost_ecam_probe+0x24/0x30 platform_drv_probe+0x50/0xa0 really_probe+0xa0/0x3e0 driver_probe_device+0x58/0x100 device_driver_attach+0x6c/0x90 __driver_attach+0x84/0xc8 bus_for_each_dev+0x74/0xc8 driver_attach+0x20/0x28 bus_add_driver+0x148/0x1f0 driver_register+0x60/0x110 __platform_driver_register+0x40/0x48 hisi_pcie_almost_ecam_driver_init+0x1c/0x24 The specific problem here is that the mbigen driver real probe has failed as the mbigen_of_create_domain()->of_platform_device_create() call fails, the reason for that being that we never destroyed the platform device created during the remove test dry run and there is some conflict. Since we generally would never want to unbind this driver, and to save adding a driver tear down path for that, just set the driver .suppress_bind_attrs member to avoid this possibility. Signed-off-by: John Garry Signed-off-by: Marc Zyngier Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/1579196323-180137-1-git-send-email-john.garry@huawei.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mbigen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 98b6e1d4b1a6..f7fdbf5d183b 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -381,6 +381,7 @@ static struct platform_driver mbigen_platform_driver = { .name = "Hisilicon MBIGEN-V2", .of_match_table = mbigen_of_match, .acpi_match_table = ACPI_PTR(mbigen_acpi_match), + .suppress_bind_attrs = true, }, .probe = mbigen_device_probe, }; -- GitLab From 02b675041fcd930fd7d95b4e7bb68ddea4b21d9e Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 20 Jan 2020 18:01:17 +0200 Subject: [PATCH 0191/1278] ALSA: hda/hdmi - add retry logic to parse_intel_hdmi() [ Upstream commit 2928fa0a97ebb9549cb877fdc99aed9b95438c3a ] The initial snd_hda_get_sub_node() can fail on certain devices (e.g. some Chromebook models using Intel GLK). The failure rate is very low, but as this is is part of the probe process, end-user impact is high. In observed cases, related hardware status registers have expected values, but the node query still fails. Retrying the node query does seem to help, so fix the problem by adding retry logic to the query. This does not impact non-Intel platforms. BugLink: https://github.com/thesofproject/linux/issues/1642 Signed-off-by: Kai Vehmanen Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200120160117.29130-4-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_hdmi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index f21405597215..12913368c231 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2574,9 +2574,12 @@ static int alloc_intel_hdmi(struct hda_codec *codec) /* parse and post-process for Intel codecs */ static int parse_intel_hdmi(struct hda_codec *codec) { - int err; + int err, retries = 3; + + do { + err = hdmi_parse_codec(codec); + } while (err < 0 && retries--); - err = hdmi_parse_codec(codec); if (err < 0) { generic_spec_free(codec); return err; -- GitLab From aca257592d0b571f86b08f0336229e0d8cc41bbc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 Jan 2020 13:11:54 +0900 Subject: [PATCH 0192/1278] x86/decoder: Add TEST opcode to Group3-2 [ Upstream commit 8b7e20a7ba54836076ff35a28349dabea4cec48f ] Add TEST opcode to Group3-2 reg=001b as same as Group3-1 does. Commit 12a78d43de76 ("x86/decoder: Add new TEST instruction pattern") added a TEST opcode assignment to f6 XX/001/XXX (Group 3-1), but did not add f7 XX/001/XXX (Group 3-2). Actually, this TEST opcode variant (ModRM.reg /1) is not described in the Intel SDM Vol2 but in AMD64 Architecture Programmer's Manual Vol.3, Appendix A.2 Table A-6. ModRM.reg Extensions for the Primary Opcode Map. Without this fix, Randy found a warning by insn_decoder_test related to this issue as below. HOSTCC arch/x86/tools/insn_decoder_test HOSTCC arch/x86/tools/insn_sanity TEST posttest arch/x86/tools/insn_decoder_test: warning: Found an x86 instruction decoder bug, please report this. arch/x86/tools/insn_decoder_test: warning: ffffffff81000bf1: f7 0b 00 01 08 00 testl $0x80100,(%rbx) arch/x86/tools/insn_decoder_test: warning: objdump says 6 bytes, but insn_get_length() says 2 arch/x86/tools/insn_decoder_test: warning: Decoded and checked 11913894 instructions with 1 failures TEST posttest arch/x86/tools/insn_sanity: Success: decoded and checked 1000000 random instructions with 0 errors (seed:0x871ce29c) To fix this error, add the TEST opcode according to AMD64 APM Vol.3. [ bp: Massage commit message. ] Reported-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lkml.kernel.org/r/157966631413.9580.10311036595431878351.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/x86/lib/x86-opcode-map.txt | 2 +- tools/objtool/arch/x86/lib/x86-opcode-map.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f284..5cb9f009f2be 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -909,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f284..5cb9f009f2be 100644 --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt @@ -909,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev -- GitLab From cf3a133389666d42e79a41095ba4018616f6d186 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2019 14:33:39 +0100 Subject: [PATCH 0193/1278] s390/ftrace: generate traced function stack frame [ Upstream commit 45f7a0da600d3c409b5ad8d5ddddacd98ddc8840 ] Currently backtrace from ftraced function does not contain ftraced function itself. e.g. for "path_openat": arch_stack_walk+0x15c/0x2d8 stack_trace_save+0x50/0x68 stack_trace_call+0x15e/0x3d8 ftrace_graph_caller+0x0/0x1c <-- ftrace code do_filp_open+0x7c/0xe8 <-- ftraced function caller do_open_execat+0x76/0x1b8 open_exec+0x52/0x78 load_elf_binary+0x180/0x1160 search_binary_handler+0x8e/0x288 load_script+0x2a8/0x2b8 search_binary_handler+0x8e/0x288 __do_execve_file.isra.39+0x6fa/0xb40 __s390x_sys_execve+0x56/0x68 system_call+0xdc/0x2d8 Ftraced function is expected in the backtrace by ftrace kselftests, which are now failing. It would also be nice to have it for clarity reasons. "ftrace_caller" itself is called without stack frame allocated for it and does not store its caller (ftraced function). Instead it simply allocates a stack frame for "ftrace_trace_function" and sets backchain to point to ftraced function stack frame (which contains ftraced function caller in saved r14). To fix this issue make "ftrace_caller" allocate a stack frame for itself just to store ftraced function for the stack unwinder. As a result backtrace looks like the following: arch_stack_walk+0x15c/0x2d8 stack_trace_save+0x50/0x68 stack_trace_call+0x15e/0x3d8 ftrace_graph_caller+0x0/0x1c <-- ftrace code path_openat+0x6/0xd60 <-- ftraced function do_filp_open+0x7c/0xe8 <-- ftraced function caller do_open_execat+0x76/0x1b8 open_exec+0x52/0x78 load_elf_binary+0x180/0x1160 search_binary_handler+0x8e/0x288 load_script+0x2a8/0x2b8 search_binary_handler+0x8e/0x288 __do_execve_file.isra.39+0x6fa/0xb40 __s390x_sys_execve+0x56/0x68 system_call+0xdc/0x2d8 Reported-by: Sven Schnelle Tested-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/mcount.S | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 27110f3294ed..0cfd5a83a1da 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -25,6 +25,12 @@ ENTRY(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -38,9 +44,16 @@ ENTRY(ftrace_caller) #ifndef CC_USING_HOTPATCH aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES -- GitLab From cb259e08cb91c893028ea19188e5fae8ea3d9959 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 16 Jan 2020 17:57:58 +0000 Subject: [PATCH 0194/1278] driver core: platform: fix u32 greater or equal to zero comparison [ Upstream commit 0707cfa5c3ef58effb143db9db6d6e20503f9dec ] Currently the check that a u32 variable i is >= 0 is always true because the unsigned variable will never be negative, causing the loop to run forever. Fix this by changing the pre-decrement check to a zero check on i followed by a decrement of i. Addresses-Coverity: ("Unsigned compared against 0") Fixes: 39cc539f90d0 ("driver core: platform: Prevent resouce overflow from causing infinite loops") Signed-off-by: Colin Ian King Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20200116175758.88396-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index e3d40c41c33b..bcb6519fe211 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -428,7 +428,7 @@ int platform_device_add(struct platform_device *pdev) pdev->id = PLATFORM_DEVID_AUTO; } - while (--i >= 0) { + while (i--) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); -- GitLab From e0e88677fabe4e514e2ed27815ec3141120a4afa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Gro=C3=9Fe?= Date: Wed, 22 Jan 2020 19:01:06 +0100 Subject: [PATCH 0195/1278] ALSA: hda - Add docking station support for Lenovo Thinkpad T420s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ef7d84caa5928b40b1c93a26dbe5a3f12737c6ab ] Lenovo Thinkpad T420s uses the same codec as T420, so apply the same quirk to enable audio output on a docking station. Signed-off-by: Peter Große Link: https://lore.kernel.org/r/20200122180106.9351-1-pegro@friiks.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 382b6d2ed803..9cc9304ff21a 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -969,6 +969,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410), + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), -- GitLab From 77f804437a9cdfe4e782e31539f6cb411cf2188c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 21 Aug 2019 16:26:53 +1000 Subject: [PATCH 0196/1278] powerpc/sriov: Remove VF eeh_dev state when disabling SR-IOV [ Upstream commit 1fb4124ca9d456656a324f1ee29b7bf942f59ac8 ] When disabling virtual functions on an SR-IOV adapter we currently do not correctly remove the EEH state for the now-dead virtual functions. When removing the pci_dn that was created for the VF when SR-IOV was enabled we free the corresponding eeh_dev without removing it from the child device list of the eeh_pe that contained it. This can result in crashes due to the use-after-free. Signed-off-by: Oliver O'Halloran Reviewed-by: Sam Bobroff Tested-by: Sam Bobroff Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190821062655.19735-1-oohall@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/pci_dn.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0e395afbf0f4..0e45a446a8c7 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -261,9 +261,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } -- GitLab From 6ad76bf5b160a17143dc9a995e289faa26a2fca8 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 4 Dec 2019 20:46:11 +0800 Subject: [PATCH 0197/1278] jbd2: switch to use jbd2_journal_abort() when failed to submit the commit record [ Upstream commit d0a186e0d3e7ac05cc77da7c157dae5aa59f95d9 ] We invoke jbd2_journal_abort() to abort the journal and record errno in the jbd2 superblock when committing journal transaction besides the failure on submitting the commit record. But there is no need for the case and we can also invoke jbd2_journal_abort() instead of __jbd2_journal_abort_hard(). Fixes: 818d276ceb83a ("ext4: Add the journal checksum feature") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20191204124614.45424-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index cb0da3d4adc0..1a4bd8d9636e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -783,7 +783,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } blk_finish_plug(&plug); @@ -876,7 +876,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } if (cbh) err = journal_wait_on_commit_record(journal, cbh); -- GitLab From 111159e8e3232b3897f27409244764c5aaba3aa2 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 4 Dec 2019 20:46:13 +0800 Subject: [PATCH 0198/1278] jbd2: make sure ESHUTDOWN to be recorded in the journal superblock [ Upstream commit 0e98c084a21177ef136149c6a293b3d1eb33ff92 ] Commit fb7c02445c49 ("ext4: pass -ESHUTDOWN code to jbd2 layer") want to allow jbd2 layer to distinguish shutdown journal abort from other error cases. So the ESHUTDOWN should be taken precedence over any other errno which has already been recoded after EXT4_FLAGS_SHUTDOWN is set, but it only update errno in the journal suoerblock now if the old errno is 0. Fixes: fb7c02445c49 ("ext4: pass -ESHUTDOWN code to jbd2 layer") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20191204124614.45424-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/journal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eae9ced846d5..6e054b368b5f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2119,8 +2119,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) if (journal->j_flags & JBD2_ABORT) { write_unlock(&journal->j_state_lock); - if (!old_errno && old_errno != -ESHUTDOWN && - errno == -ESHUTDOWN) + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) jbd2_journal_update_sb_errno(journal); return; } -- GitLab From 26b1c4da29a4d33d31d63a5908dce5ee80d7a7f5 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 10 Jan 2020 13:37:59 +0100 Subject: [PATCH 0199/1278] ARM: 8951/1: Fix Kexec compilation issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 76950f7162cad51d2200ebd22c620c14af38f718 ] To perform the reserve_crashkernel() operation kexec uses SECTION_SIZE to find a memblock in a range. SECTION_SIZE is not defined for nommu systems. Trying to compile kexec in these conditions results in a build error: linux/arch/arm/kernel/setup.c: In function ‘reserve_crashkernel’: linux/arch/arm/kernel/setup.c:1016:25: error: ‘SECTION_SIZE’ undeclared (first use in this function); did you mean ‘SECTIONS_WIDTH’? crash_size, SECTION_SIZE); ^~~~~~~~~~~~ SECTIONS_WIDTH linux/arch/arm/kernel/setup.c:1016:25: note: each undeclared identifier is reported only once for each function it appears in linux/scripts/Makefile.build:265: recipe for target 'arch/arm/kernel/setup.o' failed Make KEXEC depend on MMU to fix the compilation issue. Signed-off-by: Vincenzo Frascino Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba9325fc75b8..7a8fbe9a077b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2005,7 +2005,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your -- GitLab From 16563da40b950b6d6f2f3fe0205cc1cd60fb4966 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:15:46 -0700 Subject: [PATCH 0200/1278] hostap: Adjust indentation in prism2_hostapd_add_sta [ Upstream commit b61156fba74f659d0bc2de8f2dbf5bad9f4b8faf ] Clang warns: ../drivers/net/wireless/intersil/hostap/hostap_ap.c:2511:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (sta->tx_supp_rates & WLAN_RATE_5M5) ^ ../drivers/net/wireless/intersil/hostap/hostap_ap.c:2509:2: note: previous statement is here if (sta->tx_supp_rates & WLAN_RATE_2M) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: ff1d2767d5a4 ("Add HostAP wireless driver.") Link: https://github.com/ClangBuiltLinux/linux/issues/813 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intersil/hostap/hostap_ap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index 1a8d8db80b05..486ca1ee306e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -2568,7 +2568,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap, sta->supported_rates[0] = 2; if (sta->tx_supp_rates & WLAN_RATE_2M) sta->supported_rates[1] = 4; - if (sta->tx_supp_rates & WLAN_RATE_5M5) + if (sta->tx_supp_rates & WLAN_RATE_5M5) sta->supported_rates[2] = 11; if (sta->tx_supp_rates & WLAN_RATE_11M) sta->supported_rates[3] = 22; -- GitLab From 8fcb294f77535fc14b050879c7817f36df3a4a6f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 26 Jan 2020 00:09:54 +0000 Subject: [PATCH 0201/1278] iwlegacy: ensure loop counter addr does not wrap and cause an infinite loop [ Upstream commit c2f9a4e4a5abfc84c01b738496b3fd2d471e0b18 ] The loop counter addr is a u16 where as the upper limit of the loop is an int. In the unlikely event that the il->cfg->eeprom_size is greater than 64K then we end up with an infinite loop since addr will wrap around an never reach upper loop limit. Fix this by making addr an int. Addresses-Coverity: ("Infinite loop") Fixes: be663ab67077 ("iwlwifi: split the drivers for agn and legacy devices 3945/4965") Signed-off-by: Colin Ian King Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlegacy/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 8d5acda92a9b..6e6b124f0d5e 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il) u32 gp = _il_rd(il, CSR_EEPROM_GP); int sz; int ret; - u16 addr; + int addr; /* allocate eeprom */ sz = il->cfg->eeprom_size; -- GitLab From e534bec3712fc768f133defd6303353087bf90c8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 22 Jan 2020 11:07:56 +1000 Subject: [PATCH 0202/1278] cifs: fix NULL dereference in match_prepath [ Upstream commit fe1292686333d1dadaf84091f585ee903b9ddb84 ] RHBZ: 1760879 Fix an oops in match_prepath() by making sure that the prepath string is not NULL before we pass it into strcmp(). This is similar to other checks we make for example in cifs_root_iget() Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f0b1279a7de6..6e5ecf70996a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3047,8 +3047,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + old->prepath; + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + new->prepath; if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; -- GitLab From b817cc9f7d50bacbb6bd9b0f764432bdac68327c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 10 Dec 2019 20:29:40 -0500 Subject: [PATCH 0203/1278] ceph: check availability of mds cluster on mount after wait timeout [ Upstream commit 97820058fb2831a4b203981fa2566ceaaa396103 ] If all the MDS daemons are down for some reason, then the first mount attempt will fail with EIO after the mount request times out. A mount attempt will also fail with EIO if all of the MDS's are laggy. This patch changes the code to return -EHOSTUNREACH in these situations and adds a pr_info error message to help the admin determine the cause. URL: https://tracker.ceph.com/issues/4386 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 3 +-- fs/ceph/super.c | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b968334f841e..f36ddfea4997 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2261,8 +2261,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { - err = -ENOENT; - pr_info("probably no mds server is up\n"); + err = -EHOSTUNREACH; goto finish; } } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 088c4488b449..6b10b20bfe32 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1055,6 +1055,11 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, return res; out_splat: + if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { + pr_info("No mds server is up or the cluster is laggy\n"); + err = -EHOSTUNREACH; + } + ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; -- GitLab From 730dd07c6bb12218ccdc783a14185697e967cdb0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Dec 2019 11:24:57 +0000 Subject: [PATCH 0204/1278] irqchip/gic-v3: Only provision redistributors that are enabled in ACPI [ Upstream commit 926b5dfa6b8dc666ff398044af6906b156e1d949 ] We currently allocate redistributor region structures for individual redistributors when ACPI doesn't present us with compact MMIO regions covering multiple redistributors. It turns out that we allocate these structures even when the redistributor is flagged as disabled by ACPI. It works fine until someone actually tries to tarse one of these structures, and access the corresponding MMIO region. Instead, track the number of enabled redistributors, and only allocate what is required. This makes sure that there is no invalid data to misuse. Signed-off-by: Marc Zyngier Reported-by: Heyi Guo Tested-by: Heyi Guo Link: https://lore.kernel.org/r/20191216062745.63397-1-guoheyi@huawei.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 3d7374655587..730b3c1cf7f6 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1253,6 +1253,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1347,8 +1348,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; return 0; + } /* * It's perfectly valid firmware can pass disabled GICC entry, driver @@ -1378,8 +1381,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } -- GitLab From b06d001e8831d05e0d6d0d1549f489c19fffa52c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 28 Jan 2020 14:39:26 +1000 Subject: [PATCH 0205/1278] drm/nouveau/disp/nv50-: prevent oops when no channel method map provided [ Upstream commit 0e6176c6d286316e9431b4f695940cfac4ffe6c2 ] The implementations for most channel types contains a map of methods to priv registers in order to provide debugging info when a disp exception has been raised. This info is missing from the implementation of PIO channels as they're rather simplistic already, however, if an exception is raised by one of them, we'd end up triggering a NULL-pointer deref. Not ideal... Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206299 Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 0c0310498afd..cd9666583d4b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -73,6 +73,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) if (debug > subdev->debug) return; + if (!mthd) + return; for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) { u32 base = chan->head * mthd->addr; -- GitLab From 9df00bc555e465cb38490ea986cf06191b25a979 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:02:56 +0300 Subject: [PATCH 0206/1278] ftrace: fpid_next() should increase position index [ Upstream commit e4075e8bdffd93a9b6d6e1d52fabedceeca5a91b ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Without patch: # dd bs=4 skip=1 if=/sys/kernel/tracing/set_ftrace_pid dd: /sys/kernel/tracing/set_ftrace_pid: cannot skip to specified offset id no pid 2+1 records in 2+1 records out 10 bytes copied, 0.000213285 s, 46.9 kB/s Notice the "id" followed by "no pid". With the patch: # dd bs=4 skip=1 if=/sys/kernel/tracing/set_ftrace_pid dd: /sys/kernel/tracing/set_ftrace_pid: cannot skip to specified offset id 0+1 records in 0+1 records out 3 bytes copied, 0.000202112 s, 14.8 kB/s Notice that it only prints "id" and not the "no pid" afterward. Link: http://lkml.kernel.org/r/4f87c6ad-f114-30bb-8506-c32274ce2992@virtuozzo.com https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8974ecbcca3c..8a8d92a8045b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6317,9 +6317,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } -- GitLab From c156943230070d2c1240da3b3b051e64aad265b0 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:03:06 +0300 Subject: [PATCH 0207/1278] trigger_next should increase position index [ Upstream commit 6722b23e7a2ace078344064a9735fb73e554e9ef ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Without patch: # dd bs=30 skip=1 if=/sys/kernel/tracing/events/sched/sched_switch/trigger dd: /sys/kernel/tracing/events/sched/sched_switch/trigger: cannot skip to specified offset n traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist # Available triggers: # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist 6+1 records in 6+1 records out 206 bytes copied, 0.00027916 s, 738 kB/s Notice the printing of "# Available triggers:..." after the line. With the patch: # dd bs=30 skip=1 if=/sys/kernel/tracing/events/sched/sched_switch/trigger dd: /sys/kernel/tracing/events/sched/sched_switch/trigger: cannot skip to specified offset n traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist 2+1 records in 2+1 records out 88 bytes copied, 0.000526867 s, 167 kB/s It only prints the end of the file, and does not restart. Link: http://lkml.kernel.org/r/3c35ee24-dd3a-8119-9c19-552ed253388a@virtuozzo.com https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_events_trigger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index e2da180ca172..31e91efe243e 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -127,9 +127,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } -- GitLab From 3e85259525a152b9a2c09855722b106878eaeadf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 Jan 2020 17:09:52 +0100 Subject: [PATCH 0208/1278] radeon: insert 10ms sleep in dce5_crtc_load_lut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ec3d65082d7dabad6fa8f66a8ef166f2d522d6b2 ] Per at least one tester this is enough magic to recover the regression introduced for some people (but not all) in commit b8e2b0199cc377617dc238f5106352c06dcd3fa2 Author: Peter Rosin Date: Tue Jul 4 12:36:57 2017 +0200 drm/fb-helper: factor out pseudo-palette which for radeon had the side-effect of refactoring out a seemingly redudant writing of the color palette. 10ms in a fairly slow modeset path feels like an acceptable form of duct-tape, so maybe worth a shot and see what sticks. Cc: Alex Deucher Cc: Michel Dänzer References: https://bugzilla.kernel.org/show_bug.cgi?id=198123 Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 4f94b78cb464..d86110cdf085 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -119,6 +119,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id); + msleep(10); + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset, (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) | NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS))); -- GitLab From f354a9e6cf2cb4725bebf3b2e254925f9136f24e Mon Sep 17 00:00:00 2001 From: wangyan Date: Thu, 30 Jan 2020 22:11:50 -0800 Subject: [PATCH 0209/1278] ocfs2: fix a NULL pointer dereference when call ocfs2_update_inode_fsync_trans() [ Upstream commit 9f16ca48fc818a17de8be1f75d08e7f4addc4497 ] I found a NULL pointer dereference in ocfs2_update_inode_fsync_trans(), handle->h_transaction may be NULL in this situation: ocfs2_file_write_iter ->__generic_file_write_iter ->generic_perform_write ->ocfs2_write_begin ->ocfs2_write_begin_nolock ->ocfs2_write_cluster_by_desc ->ocfs2_write_cluster ->ocfs2_mark_extent_written ->ocfs2_change_extent_flag ->ocfs2_split_extent ->ocfs2_try_to_merge_extent ->ocfs2_extend_rotate_transaction ->ocfs2_extend_trans ->jbd2_journal_restart ->jbd2__journal_restart // handle->h_transaction is NULL here ->handle->h_transaction = NULL; ->start_this_handle /* journal aborted due to storage network disconnection, return error */ ->return -EROFS; /* line 3806 in ocfs2_try_to_merge_extent (), it will ignore ret error. */ ->ret = 0; ->... ->ocfs2_write_end ->ocfs2_write_end_nolock ->ocfs2_update_inode_fsync_trans // NULL pointer dereference ->oi->i_sync_tid = handle->h_transaction->t_tid; The information of NULL pointer dereference as follows: JBD2: Detected IO errors while flushing file data on dm-11-45 Aborting journal on device dm-11-45. JBD2: Error -5 detected when updating journal superblock for dm-11-45. (dd,22081,3):ocfs2_extend_trans:474 ERROR: status = -30 (dd,22081,3):ocfs2_try_to_merge_extent:3877 ERROR: status = -30 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e74e1338 [0000000000000008] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Process dd (pid: 22081, stack limit = 0x00000000584f35a9) CPU: 3 PID: 22081 Comm: dd Kdump: loaded Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 0.98 08/25/2019 pstate: 60400009 (nZCv daif +PAN -UAO) pc : ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] lr : ocfs2_write_end_nolock+0x2a0/0x550 [ocfs2] sp : ffff0000459fba70 x29: ffff0000459fba70 x28: 0000000000000000 x27: ffff807ccf7f1000 x26: 0000000000000001 x25: ffff807bdff57970 x24: ffff807caf1d4000 x23: ffff807cc79e9000 x22: 0000000000001000 x21: 000000006c6cd000 x20: ffff0000091d9000 x19: ffff807ccb239db0 x18: ffffffffffffffff x17: 000000000000000e x16: 0000000000000007 x15: ffff807c5e15bd78 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000001 x9 : 0000000000000228 x8 : 000000000000000c x7 : 0000000000000fff x6 : ffff807a308ed6b0 x5 : ffff7e01f10967c0 x4 : 0000000000000018 x3 : d0bc661572445600 x2 : 0000000000000000 x1 : 000000001b2e0200 x0 : 0000000000000000 Call trace: ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] ocfs2_write_end+0x4c/0x80 [ocfs2] generic_perform_write+0x108/0x1a8 __generic_file_write_iter+0x158/0x1c8 ocfs2_file_write_iter+0x668/0x950 [ocfs2] __vfs_write+0x11c/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc To prevent NULL pointer dereference in this situation, we use is_handle_aborted() before using handle->h_transaction->t_tid. Link: http://lkml.kernel.org/r/03e750ab-9ade-83aa-b000-b9e81e34e539@huawei.com Signed-off-by: Yan Wang Reviewed-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/journal.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 497a4171ef61..bfb50fc51528 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, { struct ocfs2_inode_info *oi = OCFS2_I(inode); - oi->i_sync_tid = handle->h_transaction->t_tid; - if (datasync) - oi->i_datasync_tid = handle->h_transaction->t_tid; + if (!is_handle_aborted(handle)) { + oi->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + oi->i_datasync_tid = handle->h_transaction->t_tid; + } } #endif /* OCFS2_JOURNAL_H */ -- GitLab From 55f09a4b9d19a9e4549928cddeea4a23285edcbc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 30 Jan 2020 22:16:37 -0800 Subject: [PATCH 0210/1278] lib/scatterlist.c: adjust indentation in __sg_alloc_table [ Upstream commit 4e456fee215677584cafa7f67298a76917e89c64 ] Clang warns: ../lib/scatterlist.c:314:5: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] return -ENOMEM; ^ ../lib/scatterlist.c:311:4: note: previous statement is here if (prv) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Link: http://lkml.kernel.org/r/20191218033606.11942-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/830 Fixes: edce6820a9fd ("scatterlist: prevent invalid free when alloc fails") Signed-off-by: Nathan Chancellor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 11fce289d116..834c846c5af8 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (prv) table->nents = ++table->orig_nents; - return -ENOMEM; + return -ENOMEM; } sg_init_table(sg, alloc_size); -- GitLab From 8f75e384833b26ec44cb44e82d73ce706f591bdd Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 30 Jan 2020 22:17:26 -0800 Subject: [PATCH 0211/1278] reiserfs: prevent NULL pointer dereference in reiserfs_insert_item() [ Upstream commit aacee5446a2a1aa35d0a49dab289552578657fb4 ] The variable inode may be NULL in reiserfs_insert_item(), but there is no check before accessing the member of inode. Fix this by adding NULL pointer check before calling reiserfs_debug(). Link: http://lkml.kernel.org/r/79c5135d-ff25-1cc9-4e99-9f572b88cc00@huawei.com Signed-off-by: Yunfeng Ye Cc: zhengbin Cc: Hu Shiyuan Cc: Feilong Lin Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/reiserfs/stree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 0037aea97d39..2946713cb00d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -2250,7 +2250,8 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, /* also releases the path */ unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + if (inode) + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif -- GitLab From e8518a5ce4035ff5094dca6150919e004ece54e5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:32 +0800 Subject: [PATCH 0212/1278] bcache: explicity type cast in bset_bkey_last() [ Upstream commit 7c02b0055f774ed9afb6e1c7724f33bf148ffdc0 ] In bset.h, macro bset_bkey_last() is defined as, bkey_idx((struct bkey *) (i)->d, (i)->keys) Parameter i can be variable type of data structure, the macro always works once the type of struct i has member 'd' and 'keys'. bset_bkey_last() is also used in macro csum_set() to calculate the checksum of a on-disk data structure. When csum_set() is used to calculate checksum of on-disk bcache super block, the parameter 'i' data type is struct cache_sb_disk. Inside struct cache_sb_disk (also in struct cache_sb) the member keys is __u16 type. But bkey_idx() expects unsigned int (a 32bit width), so there is problem when sending parameters via stack to call bkey_idx(). Sparse tool from Intel 0day kbuild system reports this incompatible problem. bkey_idx() is part of user space API, so the simplest fix is to cast the (i)->keys to unsigned int type in macro bset_bkey_last(). Reported-by: kbuild test robot Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/bset.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 8d1964b472e7..0bfde500af19 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -381,7 +381,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *); /* Bkey utility code */ -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { -- GitLab From 74b16aa6ad15ab29286a09dd1d2595d6422119c4 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Mon, 2 Dec 2019 15:10:21 +0800 Subject: [PATCH 0213/1278] irqchip/gic-v3-its: Reference to its_invall_cmd descriptor when building INVALL [ Upstream commit 107945227ac5d4c37911c7841b27c64b489ce9a9 ] It looks like an obvious mistake to use its_mapc_cmd descriptor when building the INVALL command block. It so far worked by luck because both its_mapc_cmd.col and its_invall_cmd.col sit at the same offset of the ITS command descriptor, but we should not rely on it. Fixes: cc2d3216f53c ("irqchip: GICv3: ITS command queue") Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20191202071021.1251-1-yuzenghui@huawei.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 52238e6bed39..799df1e598db 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -527,7 +527,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { its_encode_cmd(cmd, GITS_CMD_INVALL); - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); its_fixup_cmd(cmd); -- GitLab From 16ccc575ff186a051dfe595db582e4f929e2356a Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 31 Jan 2020 15:45:24 +0200 Subject: [PATCH 0214/1278] iwlwifi: mvm: Fix thermal zone registration [ Upstream commit baa6cf8450b72dcab11f37c47efce7c5b9b8ad0f ] Use a unique name when registering a thermal zone. Otherwise, with multiple NICS, we hit the following warning during the unregistration. WARNING: CPU: 2 PID: 3525 at fs/sysfs/group.c:255 RIP: 0010:sysfs_remove_group+0x80/0x90 Call Trace: dpm_sysfs_remove+0x57/0x60 device_del+0x5a/0x350 ? sscanf+0x4e/0x70 device_unregister+0x1a/0x60 hwmon_device_unregister+0x4a/0xa0 thermal_remove_hwmon_sysfs+0x175/0x1d0 thermal_zone_device_unregister+0x188/0x1e0 iwl_mvm_thermal_exit+0xe7/0x100 [iwlmvm] iwl_op_mode_mvm_stop+0x27/0x180 [iwlmvm] _iwl_op_mode_stop.isra.3+0x2b/0x50 [iwlwifi] iwl_opmode_deregister+0x90/0xa0 [iwlwifi] __exit_compat+0x10/0x2c7 [iwlmvm] __x64_sys_delete_module+0x13f/0x270 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 1232f63278eb..319103f4b432 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -739,7 +739,8 @@ static struct thermal_zone_device_ops tzone_ops = { static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) { int i; - char name[] = "iwlwifi"; + char name[16]; + static atomic_t counter = ATOMIC_INIT(0); if (!iwl_mvm_is_tt_in_fw(mvm)) { mvm->tz_device.tzone = NULL; @@ -749,6 +750,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH); + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF); mvm->tz_device.tzone = thermal_zone_device_register(name, IWL_MAX_DTS_TRIPS, IWL_WRITABLE_TRIPS_MSK, -- GitLab From aa9c1f410398a2a70e03bd23f8c80c0699948259 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Sat, 11 Jan 2020 18:44:34 +0530 Subject: [PATCH 0215/1278] microblaze: Prevent the overflow of the start [ Upstream commit 061d2c1d593076424c910cb1b64ecdb5c9a6923f ] In case the start + cache size is more than the max int the start overflows. Prevent the same. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Michal Simek Signed-off-by: Sasha Levin --- arch/microblaze/kernel/cpu/cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa69..dcba53803fa5 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) -- GitLab From c911b13e8b2d3427061c2da6372da987353eddc1 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Tue, 4 Feb 2020 19:30:20 +0800 Subject: [PATCH 0216/1278] brd: check and limit max_part par [ Upstream commit c8ab422553c81a0eb070329c63725df1cd1425bc ] In brd_init func, rd_nr num of brd_device are firstly allocated and add in brd_devices, then brd_devices are traversed to add each brd_device by calling add_disk func. When allocating brd_device, the disk->first_minor is set to i * max_part, if rd_nr * max_part is larger than MINORMASK, two different brd_device may have the same devt, then only one of them can be successfully added. when rmmod brd.ko, it will cause oops when calling brd_exit. Follow those steps: # modprobe brd rd_nr=3 rd_size=102400 max_part=1048576 # rmmod brd then, the oops will appear. Oops log: [ 726.613722] Call trace: [ 726.614175] kernfs_find_ns+0x24/0x130 [ 726.614852] kernfs_find_and_get_ns+0x44/0x68 [ 726.615749] sysfs_remove_group+0x38/0xb0 [ 726.616520] blk_trace_remove_sysfs+0x1c/0x28 [ 726.617320] blk_unregister_queue+0x98/0x100 [ 726.618105] del_gendisk+0x144/0x2b8 [ 726.618759] brd_exit+0x68/0x560 [brd] [ 726.619501] __arm64_sys_delete_module+0x19c/0x2a0 [ 726.620384] el0_svc_common+0x78/0x130 [ 726.621057] el0_svc_handler+0x38/0x78 [ 726.621738] el0_svc+0x8/0xc [ 726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260) Here, we add brd_check_and_reset_par func to check and limit max_part par. -- V5->V6: - remove useless code V4->V5:(suggested by Ming Lei) - make sure max_part is not larger than DISK_MAX_PARTS V3->V4:(suggested by Ming Lei) - remove useless change - add one limit of max_part V2->V3: (suggested by Ming Lei) - clear .minors when running out of consecutive minor space in brd_alloc - remove limit of rd_nr V1->V2: - add more checks in brd_check_par_valid as suggested by Ming Lei. Signed-off-by: Zhiqiang Liu Reviewed-by: Bob Liu Reviewed-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/brd.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 2d7178f7754e..0129b1921cb3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -529,6 +529,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) return kobj; } +static inline void brd_check_and_reset_par(void) +{ + if (unlikely(!max_part)) + max_part = 1; + + /* + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), + * otherwise, it is possiable to get same dev_t when adding partitions. + */ + if ((1U << MINORBITS) % max_part != 0) + max_part = 1UL << fls(max_part); + + if (max_part > DISK_MAX_PARTS) { + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", + DISK_MAX_PARTS, DISK_MAX_PARTS); + max_part = DISK_MAX_PARTS; + } +} + static int __init brd_init(void) { struct brd_device *brd, *next; @@ -552,8 +571,7 @@ static int __init brd_init(void) if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) return -EIO; - if (unlikely(!max_part)) - max_part = 1; + brd_check_and_reset_par(); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); -- GitLab From 2329f0eded07c62dbe40d8b523001525e91b99b4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 09:10:47 +0300 Subject: [PATCH 0217/1278] help_next should increase position index [ Upstream commit 9f198a2ac543eaaf47be275531ad5cbd50db3edf ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Mike Marshall Signed-off-by: Sasha Levin --- fs/orangefs/orangefs-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1c59dff530de..34d1cc98260d 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -305,6 +305,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; -- GitLab From a4ba26c8801b10e98a6026b71a49ff80146974a0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 6 Feb 2020 02:40:58 -0500 Subject: [PATCH 0218/1278] virtio_balloon: prevent pfn array overflow [ Upstream commit 6e9826e77249355c09db6ba41cd3f84e89f4b614 ] Make sure, at build time, that pfn array is big enough to hold a single page. It happens to be true since the PAGE_SHIFT value at the moment is 20, which is 1M - exactly 256 4K balloon pages. Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand Signed-off-by: Sasha Levin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 499531608fa2..71970773aad1 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -132,6 +132,8 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; + BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); + /* * Set balloon pfns pointing at this page. * Note that the first pfn points at start of the page. -- GitLab From bc99bd6e85e1a08a1c75cd25e6175223083fe54a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 7 Feb 2020 19:26:28 +0200 Subject: [PATCH 0219/1278] mlxsw: spectrum_dpipe: Add missing error path [ Upstream commit 3a99cbb6fa7bca1995586ec2dc21b0368aad4937 ] In case devlink_dpipe_entry_ctx_prepare() failed, release RTNL that was previously taken and free the memory allocated by mlxsw_sp_erif_entry_prepare(). Fixes: 2ba5999f009d ("mlxsw: spectrum: Add Support for erif table entries access") Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..3c04f3d5de2d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -225,7 +225,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); if (err) - return err; + goto err_ctx_prepare; j = 0; for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -257,6 +257,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, return 0; err_entry_append: err_entry_get: +err_ctx_prepare: rtnl_unlock(); devlink_dpipe_entry_clear(&entry); return err; -- GitLab From ee15cd14fa26e84607b735c6cf15648209081b02 Mon Sep 17 00:00:00 2001 From: Jaihind Yadav Date: Tue, 17 Dec 2019 17:25:47 +0530 Subject: [PATCH 0220/1278] selinux: ensure we cleanup the internal AVC counters on error in avc_update() [ Upstream commit 030b995ad9ece9fa2d218af4429c1c78c2342096 ] In AVC update we don't call avc_node_kill() when avc_xperms_populate() fails, resulting in the avc->avc_cache.active_nodes counter having a false value. In last patch this changes was missed , so correcting it. Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls") Signed-off-by: Jaihind Yadav Signed-off-by: Ravi Kumar Siddojigari [PM: merge fuzz, minor description cleanup] Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- security/selinux/avc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 2380b8d72cec..23f387b30ece 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -863,7 +863,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, if (orig->ae.xp_node) { rc = avc_xperms_populate(node, orig->ae.xp_node); if (rc) { - kmem_cache_free(avc_node_cachep, node); + avc_node_kill(node); goto out_unlock; } } -- GitLab From a90fd80a5c0d4b57f790518d5ed9c1b55e549801 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Wed, 12 Feb 2020 06:09:17 +0100 Subject: [PATCH 0221/1278] enic: prevent waking up stopped tx queues over watchdog reset [ Upstream commit 0f90522591fd09dd201065c53ebefdfe3c6b55cb ] Recent months, our customer reported several kernel crashes all preceding with following message: NETDEV WATCHDOG: eth2 (enic): transmit queue 0 timed out Error message of one of those crashes: BUG: unable to handle kernel paging request at ffffffffa007e090 After analyzing severl vmcores, I found that most of crashes are caused by memory corruption. And all the corrupted memory areas are overwritten by data of network packets. Moreover, I also found that the tx queues were enabled over watchdog reset. After going through the source code, I found that in enic_stop(), the tx queues stopped by netif_tx_disable() could be woken up over a small time window between netif_tx_disable() and the napi_disable() by the following code path: napi_poll-> enic_poll_msix_wq-> vnic_cq_service-> enic_wq_service-> netif_wake_subqueue(enic->netdev, q_number)-> test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state) In turn, upper netowrk stack could queue skb to ENIC NIC though enic_hard_start_xmit(). And this might introduce some race condition. Our customer comfirmed that this kind of kernel crash doesn't occur over 90 days since they applied this patch. Signed-off-by: Firo Yang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 19f374b180fc..52a3b32390a9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1972,10 +1972,10 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); -- GitLab From b0f803378344e1a2dfec9818f2d42abcc5c989ed Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Thu, 13 Feb 2020 15:37:09 +0100 Subject: [PATCH 0222/1278] net: dsa: tag_qca: Make sure there is headroom for tag [ Upstream commit 04fb91243a853dbde216d829c79d9632e52aa8d9 ] Passing tag size to skb_cow_head will make sure there is enough headroom for the tag data. This change does not introduce any overhead in case there is already available headroom for tag. Signed-off-by: Per Forlin Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/tag_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index af3a12a36d88..f268c5c3eedb 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -41,7 +41,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); u16 *phdr, hdr; - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; skb_push(skb, QCA_HDR_LEN); -- GitLab From 221a199d7c171c63e1ba6d0a9975b535d143bdb1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 11 Feb 2020 19:33:39 +0100 Subject: [PATCH 0223/1278] net/sched: matchall: add missing validation of TCA_MATCHALL_FLAGS [ Upstream commit 1afa3cc90f8fb745c777884d79eaa1001d6927a6 ] unlike other classifiers that can be offloaded (i.e. users can set flags like 'skip_hw' and 'skip_sw'), 'cls_matchall' doesn't validate the size of netlink attribute 'TCA_MATCHALL_FLAGS' provided by user: add a proper entry to mall_policy. Fixes: b87f7936a932 ("net/sched: Add match-all classifier hw offloading.") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_matchall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d8fd152779c8..a985f91e8b47 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -136,6 +136,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle) static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC }, [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 }, + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, -- GitLab From 3fdba7cb6f45818086d597546761f2602f44502e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 11 Feb 2020 19:33:40 +0100 Subject: [PATCH 0224/1278] net/sched: flower: add missing validation of TCA_FLOWER_FLAGS [ Upstream commit e2debf0852c4d66ba1a8bde12869b196094c70a7 ] unlike other classifiers that can be offloaded (i.e. users can set flags like 'skip_hw' and 'skip_sw'), 'cls_flower' doesn't validate the size of netlink attribute 'TCA_FLOWER_FLAGS' provided by user: add a proper entry to fl_policy. Fixes: 5b33f48842fa ("net/flower: Introduce hardware offload support") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_flower.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1879665e5a2b..80a5a6d503c8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -445,6 +445,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, }; static void fl_set_key_val(struct nlattr **tb, -- GitLab From 6e60e4463c2694207932bd0dda9f82536303466f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2020 11:36:13 -0800 Subject: [PATCH 0225/1278] net/smc: fix leak of kernel memory to user space [ Upstream commit 457fed775c97ac2c0cd1672aaf2ff2c8a6235e87 ] As nlmsg_put() does not clear the memory that is reserved, it this the caller responsability to make sure all of this memory will be written, in order to not reveal prior content. While we are at it, we can provide the socket cookie even if clsock is not set. syzbot reported : BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in __swab32p include/uapi/linux/swab.h:179 [inline] BUG: KMSAN: uninit-value in __be32_to_cpup include/uapi/linux/byteorder/little_endian.h:82 [inline] BUG: KMSAN: uninit-value in get_unaligned_be32 include/linux/unaligned/access_ok.h:30 [inline] BUG: KMSAN: uninit-value in ____bpf_skb_load_helper_32 net/core/filter.c:240 [inline] BUG: KMSAN: uninit-value in ____bpf_skb_load_helper_32_no_cache net/core/filter.c:255 [inline] BUG: KMSAN: uninit-value in bpf_skb_load_helper_32_no_cache+0x14a/0x390 net/core/filter.c:252 CPU: 1 PID: 5262 Comm: syz-executor.5 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] __swab32p include/uapi/linux/swab.h:179 [inline] __be32_to_cpup include/uapi/linux/byteorder/little_endian.h:82 [inline] get_unaligned_be32 include/linux/unaligned/access_ok.h:30 [inline] ____bpf_skb_load_helper_32 net/core/filter.c:240 [inline] ____bpf_skb_load_helper_32_no_cache net/core/filter.c:255 [inline] bpf_skb_load_helper_32_no_cache+0x14a/0x390 net/core/filter.c:252 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_kmalloc_large+0x73/0xc0 mm/kmsan/kmsan_hooks.c:128 kmalloc_large_node_hook mm/slub.c:1406 [inline] kmalloc_large_node+0x282/0x2c0 mm/slub.c:3841 __kmalloc_node_track_caller+0x44b/0x1200 mm/slub.c:4368 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_dump+0x44b/0x1ab0 net/netlink/af_netlink.c:2224 __netlink_dump_start+0xbb2/0xcf0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:233 [inline] smc_diag_handler_dump+0x2ba/0x300 net/smc/smc_diag.c:242 sock_diag_rcv_msg+0x211/0x610 net/core/sock_diag.c:256 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:275 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] kernel_sendmsg+0x433/0x440 net/socket.c:679 sock_no_sendpage+0x235/0x300 net/core/sock.c:2740 kernel_sendpage net/socket.c:3776 [inline] sock_sendpage+0x1e1/0x2c0 net/socket.c:937 pipe_to_sendpage+0x38c/0x4c0 fs/splice.c:458 splice_from_pipe_feed fs/splice.c:512 [inline] __splice_from_pipe+0x539/0xed0 fs/splice.c:636 splice_from_pipe fs/splice.c:671 [inline] generic_splice_sendpage+0x1d5/0x2d0 fs/splice.c:844 do_splice_from fs/splice.c:863 [inline] do_splice fs/splice.c:1170 [inline] __do_sys_splice fs/splice.c:1447 [inline] __se_sys_splice+0x2380/0x3350 fs/splice.c:1427 __x64_sys_splice+0x6e/0x90 fs/splice.c:1427 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: f16a7dd5cf27 ("smc: netlink interface for SMC sockets") Signed-off-by: Eric Dumazet Cc: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_diag.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..576c37d86051 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,15 +38,14 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; + sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; } -- GitLab From 010e880595cb51dd8ba6da202761ecec6785753c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 13 Feb 2020 12:56:04 +0300 Subject: [PATCH 0226/1278] thunderbolt: Prevent crash if non-active NVMem file is read commit 03cd45d2e219301880cabc357e3cf478a500080f upstream. The driver does not populate .reg_read callback for the non-active NVMem because the file is supposed to be write-only. However, it turns out NVMem subsystem does not yet support this and expects that the .reg_read callback is provided. If user reads the binary attribute it triggers NULL pointer dereference like this one: BUG: kernel NULL pointer dereference, address: 0000000000000000 ... Call Trace: bin_attr_nvmem_read+0x64/0x80 kernfs_fop_read+0xa7/0x180 vfs_read+0xbd/0x170 ksys_read+0x5a/0xd0 do_syscall_64+0x43/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this in the driver by providing .reg_read callback that always returns an error. Reported-by: Nicholas Johnson Fixes: e6b245ccd524 ("thunderbolt: Add support for host and device NVM firmware upgrade") Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200213095604.1074-1-mika.westerberg@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index fe2384b019ec..9cfc65ca173d 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -240,6 +240,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val, return dma_port_flash_read(sw->dma_port, offset, val, bytes); } +static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val, + size_t bytes) +{ + return -EPERM; +} + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -285,6 +291,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id, config.read_only = true; } else { config.name = "nvm_non_active"; + config.reg_read = tb_switch_nvm_no_read; config.reg_write = tb_switch_nvm_write; config.root_only = true; } -- GitLab From 556098be62dd2fe6e90063f0fc7895a40c107807 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Feb 2020 20:04:21 -0800 Subject: [PATCH 0227/1278] USB: misc: iowarrior: add support for 2 OEMed devices commit 461d8deb26a7d70254bc0391feb4fd8a95e674e8 upstream. Add support for two OEM devices that are identical to existing IO-Warrior devices, except for the USB device id. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200212040422.2991-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 7f226cc3ef8a..9c94ce4a413e 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -33,6 +33,10 @@ /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* OEMed devices */ +#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a +#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b + /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define IOWARRIOR_MINOR_BASE 0 @@ -137,6 +141,8 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -364,6 +370,7 @@ static ssize_t iowarrior_write(struct file *file, } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: + case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: @@ -378,6 +385,7 @@ static ssize_t iowarrior_write(struct file *file, goto exit; break; case USB_DEVICE_ID_CODEMERCS_IOW56: + case USB_DEVICE_ID_CODEMERCS_IOW56AM: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -502,6 +510,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { @@ -786,7 +795,8 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } - if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -799,7 +809,8 @@ static int iowarrior_probe(struct usb_interface *interface, /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56)) + ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; -- GitLab From b949c86834e7aae6040723e074b9d15534ba8ff7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Feb 2020 20:04:22 -0800 Subject: [PATCH 0228/1278] USB: misc: iowarrior: add support for the 28 and 28L devices commit 5f6f8da2d7b5a431d3f391d0d73ace8edfb42af7 upstream. Add new device ids for the 28 and 28L devices. These have 4 interfaces instead of 2, but the driver binds the same, so the driver changes are minimal. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200212040422.2991-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 9c94ce4a413e..c7e1d6b64705 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -32,6 +32,9 @@ #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* fuller speed iowarrior */ +#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 +#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a @@ -143,6 +146,8 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -386,6 +391,8 @@ static ssize_t iowarrior_write(struct file *file, break; case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: + case USB_DEVICE_ID_CODEMERCS_IOW28: + case USB_DEVICE_ID_CODEMERCS_IOW28L: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -796,7 +803,9 @@ static int iowarrior_probe(struct usb_interface *interface, } if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM)) { + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -810,7 +819,9 @@ static int iowarrior_probe(struct usb_interface *interface, dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM))) + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; -- GitLab From b80f5a9b41aea2d6f7ad860721ec617275731405 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Feb 2020 08:11:48 -0800 Subject: [PATCH 0229/1278] USB: misc: iowarrior: add support for the 100 device commit bab5417f5f0118ce914bc5b2f8381e959e891155 upstream. Add a new device id for the 100 devie. It has 4 interfaces like the 28 and 28L devices but a larger endpoint so more I/O pins. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200214161148.GA3963518@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index c7e1d6b64705..1ec32e5aa004 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -35,6 +35,7 @@ /* fuller speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 #define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 +#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a @@ -148,6 +149,7 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -393,6 +395,7 @@ static ssize_t iowarrior_write(struct file *file, case USB_DEVICE_ID_CODEMERCS_IOW56AM: case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: + case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -805,7 +808,8 @@ static int iowarrior_probe(struct usb_interface *interface, if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L)) { + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -821,7 +825,8 @@ static int iowarrior_probe(struct usb_interface *interface, ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L))) + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; -- GitLab From e4c587650f64608fcd3506fa2cb47f81c0f88348 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Feb 2020 12:43:35 -0800 Subject: [PATCH 0230/1278] floppy: check FDC index for errors before assigning it commit 2e90ca68b0d2f5548804f22f0dd61145516171e3 upstream. Jordy Zomer reported a KASAN out-of-bounds read in the floppy driver in wait_til_ready(). Which on the face of it can't happen, since as Willy Tarreau points out, the function does no particular memory access. Except through the FDCS macro, which just indexes a static allocation through teh current fdc, which is always checked against N_FDC. Except the checking happens after we've already assigned the value. The floppy driver is a disgrace (a lot of it going back to my original horrd "design"), and has no real maintainer. Nobody has the hardware, and nobody really cares. But it still gets used in virtual environment because it's one of those things that everybody supports. The whole thing should be re-written, or at least parts of it should be seriously cleaned up. The 'current fdc' index, which is used by the FDCS macro, and which is often shadowed by a local 'fdc' variable, is a prime example of how not to write code. But because nobody has the hardware or the motivation, let's just fix up the immediate problem with a nasty band-aid: test the fdc index before actually assigning it to the static 'fdc' variable. Reported-by: Jordy Zomer Cc: Willy Tarreau Cc: Dan Carpenter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5f1aa3197244..cbf74731cfce 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -848,14 +848,17 @@ static void reset_fdc_info(int mode) /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { + unsigned int new_fdc = fdc; + if (drive >= 0 && drive < N_DRIVE) { - fdc = FDC(drive); + new_fdc = FDC(drive); current_drive = drive; } - if (fdc != 1 && fdc != 0) { + if (new_fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } + fdc = new_fdc; set_dor(fdc, ~0, 8); #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); -- GitLab From 00d04bd42fbc1e1f2a2b6e1c6db8b784f9a890c7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Feb 2020 09:11:30 +0100 Subject: [PATCH 0231/1278] vt: selection, handle pending signals in paste_selection commit 687bff0cd08f790d540cfb7b2349f0d876cdddec upstream. When pasting a selection to a vt, the task is set as INTERRUPTIBLE while waiting for a tty to unthrottle. But signals are not handled at all. Normally, this is not a problem as tty_ldisc_receive_buf receives all the goods and a user has no reason to interrupt the task. There are two scenarios where this matters: 1) when the tty is throttled and a signal is sent to the process, it spins on a CPU until the tty is unthrottled. schedule() does not really echedule, but returns immediately, of course. 2) when the sel_buffer becomes invalid, KASAN prevents any reads from it and the loop simply does not proceed and spins forever (causing the tty to throttle, but the code never sleeps, the same as above). This sometimes happens as there is a race in the sel_buffer handling code. So add signal handling to this ioctl (TIOCL_PASTESEL) and return -EINTR in case a signal is pending. Signed-off-by: Jiri Slaby Cc: stable Link: https://lore.kernel.org/r/20200210081131.23572-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 7a4c8022c023..b157f17d2be2 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -27,6 +27,8 @@ #include #include +#include + /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') @@ -338,6 +340,7 @@ int paste_selection(struct tty_struct *tty) unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + int ret = 0; console_lock(); poke_blanked_console(); @@ -351,6 +354,10 @@ int paste_selection(struct tty_struct *tty) add_wait_queue(&vc->paste_wait, &wait); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) { + ret = -EINTR; + break; + } if (tty_throttled(tty)) { schedule(); continue; @@ -366,5 +373,5 @@ int paste_selection(struct tty_struct *tty) tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); - return 0; + return ret; } -- GitLab From 60ba005bbf5751c2c58ca23ccfc9289ae06782b7 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 27 Jan 2020 15:56:16 -0800 Subject: [PATCH 0232/1278] staging: android: ashmem: Disallow ashmem memory from being remapped commit 6d67b0290b4b84c477e6a2fc6e005e174d3c7786 upstream. When ashmem file is mmapped, the resulting vma->vm_file points to the backing shmem file with the generic fops that do not check ashmem permissions like fops of ashmem do. If an mremap is done on the ashmem region, then the permission checks will be skipped. Fix that by disallowing mapping operation on the backing shmem file. Reported-by: Jann Horn Signed-off-by: Suren Baghdasaryan Cc: stable # 4.4,4.9,4.14,4.18,5.4 Signed-off-by: Todd Kjos Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20200127235616.48920-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 4151bb44a410..9481c0b23386 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -361,8 +361,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot) _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC); } +static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* do not allow to mmap ashmem backing shmem file directly */ + return -EPERM; +} + +static unsigned long +ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) { + static struct file_operations vmfile_fops; struct ashmem_area *asma = file->private_data; int ret = 0; @@ -403,6 +418,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; + /* + * override mmap operation of the vmfile so that it can't be + * remapped which would lead to creation of a new vma with no + * asma permission checks. Have to override get_unmapped_area + * as well to prevent VM_BUG_ON check for f_ops modification. + */ + if (!vmfile_fops.mmap) { + vmfile_fops = *vmfile->f_op; + vmfile_fops.mmap = ashmem_vmfile_mmap; + vmfile_fops.get_unmapped_area = + ashmem_vmfile_get_unmapped_area; + } + vmfile->f_op = &vmfile_fops; } get_file(asma->file); -- GitLab From 72fb144918083f37110a8b9d762059068b381a5f Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 4 Feb 2020 19:34:02 +0000 Subject: [PATCH 0233/1278] staging: vt6656: fix sign of rx_dbm to bb_pre_ed_rssi. commit 93134df520f23f4e9998c425b8987edca7016817 upstream. bb_pre_ed_rssi is an u8 rx_dm always returns negative signed values add minus operator to always yield positive. fixes issue where rx sensitivity is always set to maximum because the unsigned numbers were always greater then 100. Fixes: 63b9907f58f1 ("staging: vt6656: mac80211 conversion: create rx function.") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/aceac98c-6e69-3ce1-dfec-2bf27b980221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c index 655f0002f880..7b73fa2f8834 100644 --- a/drivers/staging/vt6656/dpc.c +++ b/drivers/staging/vt6656/dpc.c @@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb, vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm); - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1; + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1; priv->current_rssi = priv->bb_pre_ed_rssi; frame = skb_data + 8; -- GitLab From 04e2dcbed2136c3b332e10647beb8dc9a7a79e1a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:50 +0200 Subject: [PATCH 0234/1278] xhci: Force Maximum Packet size for Full-speed bulk devices to valid range. commit f148b9f402ef002b57bcff3964d45abc8ffb6c3f upstream. A Full-speed bulk USB audio device (DJ-Tech CTRL) with a invalid Maximum Packet Size of 4 causes a xHC "Parameter Error" at enumeration. This is because valid Maximum packet sizes for Full-speed bulk endpoints are 8, 16, 32 and 64 bytes. Hosts are not required to support other values than these. See usb 2 specs section 5.8.3 for details. The device starts working after forcing the maximum packet size to 8. This is most likely the case with other devices as well, so force the maximum packet size to a valid range. Cc: stable@vger.kernel.org Reported-by: Rene D Obermueller Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index a80a57decda1..70452c881e56 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1479,9 +1479,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Allow 3 retries for everything but isoc, set CErr = 3 */ if (!usb_endpoint_xfer_isoc(&ep->desc)) err_count = 3; - /* Some devices get this wrong */ - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH) - max_packet = 512; + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */ + if (usb_endpoint_xfer_bulk(&ep->desc)) { + if (udev->speed == USB_SPEED_HIGH) + max_packet = 512; + if (udev->speed == USB_SPEED_FULL) { + max_packet = rounddown_pow_of_two(max_packet); + max_packet = clamp_val(max_packet, 8, 64); + } + } /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */ if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) avg_trb_len = 8; -- GitLab From 3026b4972d406b05c9349a8cd1cf931614043d23 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:52 +0200 Subject: [PATCH 0235/1278] xhci: fix runtime pm enabling for quirky Intel hosts commit 024d411e9c5d49eb96c825af52a3ce2682895676 upstream. Intel hosts that need the XHCI_PME_STUCK_QUIRK flag should enable runtime pm by calling xhci_pme_acpi_rtd3_enable() before usb_hcd_pci_probe() calls pci_dev_run_wake(). Otherwise usage count for the device won't be decreased, and runtime suspend is prevented. usb_hcd_pci_probe() only decreases the usage count if device can generate run-time wake-up events, i.e. when pci_dev_run_wake() returns true. This issue was exposed by pci_dev_run_wake() change in commit 8feaec33b986 ("PCI / PM: Always check PME wakeup capability for runtime wakeup support") and should be backported to kernels with that change Cc: # 4.13+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 09f228279c01..42d368cb76ce 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -284,6 +284,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(pdev); + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn); /* Find any debug ports */ @@ -344,9 +347,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; - if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_acpi_rtd3_enable(dev); - /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); -- GitLab From e64eae745849c6db1d3497071cc75044126ad501 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 15 Nov 2019 18:50:00 +0200 Subject: [PATCH 0236/1278] usb: host: xhci: update event ring dequeue pointer on purpose commit dc0ffbea5729a3abafa577ebfce87f18b79e294b upstream. On some situations, the software handles TRB events slower than adding TRBs, then xhci_handle_event can't return zero long time, the xHC will consider the event ring is full, and trigger "Event Ring Full" error, but in fact, the software has already finished lots of events, just no chance to update ERDP (event ring dequeue pointer). In this commit, we force update ERDP if half of TRBS_PER_SEGMENT events have handled to avoid "Event Ring Full" error. Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/1573836603-10871-2-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 60 ++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 61fa3007a74a..868878f5b72b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2758,6 +2758,42 @@ static int xhci_handle_event(struct xhci_hcd *xhci) return 1; } +/* + * Update Event Ring Dequeue Pointer: + * - When all events have finished + * - To avoid "Event Ring Full Error" condition + */ +static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, + union xhci_trb *event_ring_deq) +{ + u64 temp_64; + dma_addr_t deq; + + temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + /* If necessary, update the HW's version of the event ring deq ptr. */ + if (event_ring_deq != xhci->event_ring->dequeue) { + deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, + xhci->event_ring->dequeue); + if (deq == 0) + xhci_warn(xhci, "WARN something wrong with SW event ring dequeue ptr\n"); + /* + * Per 4.9.4, Software writes to the ERDP register shall + * always advance the Event Ring Dequeue Pointer value. + */ + if ((temp_64 & (u64) ~ERST_PTR_MASK) == + ((u64) deq & (u64) ~ERST_PTR_MASK)) + return; + + /* Update HC event ring dequeue pointer */ + temp_64 &= ERST_PTR_MASK; + temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); + } + + /* Clear the event handler busy flag (RW1C) */ + temp_64 |= ERST_EHB; + xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); +} + /* * xHCI spec says we can get an interrupt, and if the HC has an error condition, * we might get bad data out of the event ring. Section 4.10.2.7 has a list of @@ -2769,9 +2805,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) union xhci_trb *event_ring_deq; irqreturn_t ret = IRQ_NONE; unsigned long flags; - dma_addr_t deq; u64 temp_64; u32 status; + int event_loop = 0; spin_lock_irqsave(&xhci->lock, flags); /* Check if the xHC generated the interrupt, or the irq is shared */ @@ -2825,24 +2861,14 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) /* FIXME this should be a delayed service routine * that clears the EHB. */ - while (xhci_handle_event(xhci) > 0) {} - - temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); - /* If necessary, update the HW's version of the event ring deq ptr. */ - if (event_ring_deq != xhci->event_ring->dequeue) { - deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, - xhci->event_ring->dequeue); - if (deq == 0) - xhci_warn(xhci, "WARN something wrong with SW event " - "ring dequeue ptr.\n"); - /* Update HC event ring dequeue pointer */ - temp_64 &= ERST_PTR_MASK; - temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); + while (xhci_handle_event(xhci) > 0) { + if (event_loop++ < TRBS_PER_SEGMENT / 2) + continue; + xhci_update_erst_dequeue(xhci, event_ring_deq); + event_loop = 0; } - /* Clear the event handler busy flag (RW1C); event ring is empty. */ - temp_64 |= ERST_EHB; - xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); + xhci_update_erst_dequeue(xhci, event_ring_deq); ret = IRQ_HANDLED; out: -- GitLab From f6f227e878ff5c3f969cadea3dc4a833bc27e247 Mon Sep 17 00:00:00 2001 From: EJ Hsu Date: Thu, 30 Jan 2020 01:25:06 -0800 Subject: [PATCH 0237/1278] usb: uas: fix a plug & unplug racing commit 3e99862c05a9caa5a27969f41566b428696f5a9a upstream. When a uas disk is plugged into an external hub, uas_probe() will be called by the hub thread to do the probe. It will first create a SCSI host and then do the scan for this host. During the scan, it will probe the LUN using SCSI INQUERY command which will be packed in the URB and submitted to uas disk. There might be a chance that this external hub with uas disk attached is unplugged during the scan. In this case, uas driver will fail to submit the URB (due to the NOTATTACHED state of uas device) and try to put this SCSI command back to request queue waiting for next chance to run. In normal case, this cycle will terminate when hub thread gets disconnection event and calls into uas_disconnect() accordingly. But in this case, uas_disconnect() will not be called because hub thread of external hub gets stuck waiting for the completion of this SCSI command. A deadlock happened. In this fix, uas will call scsi_scan_host() asynchronously to avoid the blocking of hub thread. Signed-off-by: EJ Hsu Acked-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20200130092506.102760-1-ejh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 8391a88cf90f..9d97543449e6 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -46,6 +46,7 @@ struct uas_dev_info { struct scsi_cmnd *cmnd[MAX_CMNDS]; spinlock_t lock; struct work_struct work; + struct work_struct scan_work; /* for async scanning */ }; enum { @@ -115,6 +116,17 @@ static void uas_do_work(struct work_struct *work) spin_unlock_irqrestore(&devinfo->lock, flags); } +static void uas_scan_work(struct work_struct *work) +{ + struct uas_dev_info *devinfo = + container_of(work, struct uas_dev_info, scan_work); + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf); + + dev_dbg(&devinfo->intf->dev, "starting scan\n"); + scsi_scan_host(shost); + dev_dbg(&devinfo->intf->dev, "scan complete\n"); +} + static void uas_add_work(struct uas_cmd_info *cmdinfo) { struct scsi_pointer *scp = (void *)cmdinfo; @@ -989,6 +1001,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) init_usb_anchor(&devinfo->data_urbs); spin_lock_init(&devinfo->lock); INIT_WORK(&devinfo->work, uas_do_work); + INIT_WORK(&devinfo->scan_work, uas_scan_work); result = uas_configure_endpoints(devinfo); if (result) @@ -1005,7 +1018,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto free_streams; - scsi_scan_host(shost); + /* Submit the delayed_work for SCSI-device scanning */ + schedule_work(&devinfo->scan_work); + return result; free_streams: @@ -1173,6 +1188,12 @@ static void uas_disconnect(struct usb_interface *intf) usb_kill_anchored_urbs(&devinfo->data_urbs); uas_zap_pending(devinfo, DID_NO_CONNECT); + /* + * Prevent SCSI scanning (if it hasn't started yet) + * or wait for the SCSI-scanning routine to stop. + */ + cancel_work_sync(&devinfo->scan_work); + scsi_remove_host(shost); uas_free_streams(devinfo); scsi_host_put(shost); -- GitLab From 2bc3e83764e0be8537d44e6e63260676f05cbee7 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 12 Feb 2020 14:22:18 +0000 Subject: [PATCH 0238/1278] USB: Fix novation SourceControl XL after suspend commit b692056db8ecc7f452b934f016c17348282b7699 upstream. Currently, the SourceControl will stay in power-down mode after resuming from suspend. This patch resets the device after suspend to power it up. Signed-off-by: Richard Dodd Cc: stable Link: https://lore.kernel.org/r/20200212142220.36892-1-richard.o.dodd@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 19e819aa2419..ad8307140df8 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -291,6 +291,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; -- GitLab From 37218ed6c9b268560000d38fd5da4013bfe8cf42 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 31 Jan 2020 10:39:26 -0500 Subject: [PATCH 0239/1278] USB: hub: Don't record a connect-change event during reset-resume commit 8099f58f1ecddf4f374f4828a3dff8397c7cbd74 upstream. Paul Zimmerman reports that his USB Bluetooth adapter sometimes crashes following system resume, when it receives a Get-Device-Descriptor request while it is busy doing something else. Such a request was added by commit a4f55d8b8c14 ("usb: hub: Check device descriptor before resusciation"). It gets sent when the hub driver's work thread checks whether a connect-change event on an enabled port really indicates a new device has been connected, as opposed to an old device momentarily disconnecting and then reconnecting (which can happen with xHCI host controllers, since they automatically enable connected ports). The same kind of thing occurs when a port's power session is lost during system suspend. When the system wakes up it sees a connect-change event on the port, and if the child device's persist_enabled flag was set then hub_activate() sets the device's reset_resume flag as well as the port's bit in hub->change_bits. The reset-resume code then takes responsibility for checking that the same device is still attached to the port, and it does this as part of the device's resume pathway. By the time the hub driver's work thread starts up again, the device has already been fully reinitialized and is busy doing its own thing. There's no need for the work thread to do the same check a second time, and in fact this unnecessary check is what caused the problem that Paul observed. Note that performing the unnecessary check is not actually a bug. Devices are supposed to be able to send descriptors back to the host even when they are busy doing something else. The underlying cause of Paul's problem lies in his Bluetooth adapter. Nevertheless, we shouldn't perform the same check twice in a row -- and as a nice side benefit, removing the extra check allows the Bluetooth adapter to work more reliably. The work thread performs its check when it sees that the port's bit is set in hub->change_bits. In this situation that bit is interpreted as though a connect-change event had occurred on the port _after_ the reset-resume, which is not what actually happened. One possible fix would be to make the reset-resume code clear the port's bit in hub->change_bits. But it seems simpler to just avoid setting the bit during hub_activate() in the first place. That's what this patch does. (Proving that the patch is correct when CONFIG_PM is disabled requires a little thought. In that setting hub_activate() will be called only for initialization and resets, since there won't be any resumes or reset-resumes. During initialization and hub resets the hub doesn't have any child devices, and so this code path never gets executed.) Reported-and-tested-by: Paul Zimmerman Signed-off-by: Alan Stern Link: https://marc.info/?t=157949360700001&r=1&w=2 CC: David Heinzelmann CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2001311037460.1577-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7d5ecf36a33c..1bc826285343 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1189,11 +1189,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) #ifdef CONFIG_PM udev->reset_resume = 1; #endif - /* Don't set the change_bits when the device - * was powered off. - */ - if (test_bit(port1, hub->power_bits)) - set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ -- GitLab From 937008f2e1c167ec0e6eaa946bf355b5bae0772b Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Thu, 6 Feb 2020 12:49:23 +0100 Subject: [PATCH 0240/1278] USB: hub: Fix the broken detection of USB3 device in SMSC hub commit 1208f9e1d758c991b0a46a1bd60c616b906bbe27 upstream. Renesas R-Car H3ULCB + Kingfisher Infotainment Board is either not able to detect the USB3.0 mass storage devices or is detecting those as USB2.0 high speed devices. The explanation given by Renesas is that, due to a HW issue, the XHCI driver does not wake up after going to sleep on connecting a USB3.0 device. In order to mitigate that, disable the auto-suspend feature specifically for SMSC hubs from hub_probe() function, as a quirk. Renesas Kingfisher Infotainment Board has two USB3.0 ports (CN2) which are connected via USB5534B 4-port SuperSpeed/Hi-Speed, low-power, configurable hub controller. [1] SanDisk USB 3.0 device detected as USB-2.0 before the patch [ 74.036390] usb 5-1.1: new high-speed USB device number 4 using xhci-hcd [ 74.061598] usb 5-1.1: New USB device found, idVendor=0781, idProduct=5581, bcdDevice= 1.00 [ 74.069976] usb 5-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 74.077303] usb 5-1.1: Product: Ultra [ 74.080980] usb 5-1.1: Manufacturer: SanDisk [ 74.085263] usb 5-1.1: SerialNumber: 4C530001110208116550 [2] SanDisk USB 3.0 device detected as USB-3.0 after the patch [ 34.565078] usb 6-1.1: new SuperSpeed Gen 1 USB device number 3 using xhci-hcd [ 34.588719] usb 6-1.1: New USB device found, idVendor=0781, idProduct=5581, bcdDevice= 1.00 [ 34.597098] usb 6-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 34.604430] usb 6-1.1: Product: Ultra [ 34.608110] usb 6-1.1: Manufacturer: SanDisk [ 34.612397] usb 6-1.1: SerialNumber: 4C530001110208116550 Suggested-by: Alan Stern Signed-off-by: Hardik Gajjar Acked-by: Alan Stern Tested-by: Eugeniu Rosca Cc: stable Link: https://lore.kernel.org/r/1580989763-32291-1-git-send-email-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 15 +++++++++++++++ drivers/usb/core/hub.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1bc826285343..ff1be6a6841b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -36,7 +36,9 @@ #include "otg_whitelist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 +#define USB_VENDOR_SMSC 0x0424 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 +#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -1680,6 +1682,10 @@ static void hub_disconnect(struct usb_interface *intf) kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); + + if (hub->quirk_disable_autosuspend) + usb_autopm_put_interface(intf); + kref_put(&hub->kref, hub_release); } @@ -1810,6 +1816,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; + if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { + hub->quirk_disable_autosuspend = 1; + usb_autopm_get_interface(intf); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) return 0; @@ -5288,6 +5299,10 @@ static void hub_event(struct work_struct *work) } static const struct usb_device_id hub_id_table[] = { + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, + .idVendor = USB_VENDOR_SMSC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 34c1a7e22aae..657bacfbe3a7 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -69,6 +69,7 @@ struct usb_hub { unsigned quiescing:1; unsigned disconnected:1; unsigned in_reset:1; + unsigned quirk_disable_autosuspend:1; unsigned quirk_check_port_auto_suspend:1; -- GitLab From 2851358d8f2b2aa1c3302abf47b6f179c074e056 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:30 -0600 Subject: [PATCH 0241/1278] staging: rtl8188eu: Fix potential security hole commit 499c405b2b80bb3a04425ba3541d20305e014d3e upstream. In routine rtw_hostapd_ioctl(), the user-controlled p->length is assumed to be at least the size of struct ieee_param size, but this assumption is never checked. This could result in out-of-bounds read/write on kernel heap in case a p->length less than the size of struct ieee_param is specified by the user. If p->length is allowed to be greater than the size of the struct, then a malicious user could be wasting kernel memory. Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes: a2c60d42d97c ("staging: r8188eu: Add files for new driver - part 16") Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-2-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 446310775e90..8c9c5cd03e86 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2856,7 +2856,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) goto out; } - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } -- GitLab From b34152501205a893f8091a509cc876e9f4e3f4a0 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:32 -0600 Subject: [PATCH 0242/1278] staging: rtl8188eu: Fix potential overuse of kernel memory commit 4ddf8ab8d15ddbc52eefb44eb64e38466ce1f70f upstream. In routine wpa_supplicant_ioctl(), the user-controlled p->length is checked to be at least the size of struct ieee_param size, but the code does not detect the case where p->length is greater than the size of the struct, thus a malicious user could be wasting kernel memory. Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-4-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 8c9c5cd03e86..184fc05a0f8b 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2051,7 +2051,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } -- GitLab From ad39c28f11943283429a290f216729af21e03c63 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:31 -0600 Subject: [PATCH 0243/1278] staging: rtl8723bs: Fix potential security hole commit ac33597c0c0d1d819dccfe001bcd0acef7107e7c upstream. In routine rtw_hostapd_ioctl(), the user-controlled p->length is assumed to be at least the size of struct ieee_param size, but this assumption is never checked. This could result in out-of-bounds read/write on kernel heap in case a p->length less than the size of struct ieee_param is specified by the user. If p->length is allowed to be greater than the size of the struct, then a malicious user could be wasting kernel memory. Fixes commit 554c0a3abf216 ("0taging: Add rtl8723bs sdio wifi driver"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes 554c0a3abf216 ("0taging: Add rtl8723bs sdio wifi driver"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-3-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 1b61da61690b..9dc4786fde75 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -4340,7 +4340,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(*param)) { ret = -EINVAL; goto out; } -- GitLab From df1c95e4c1217f09f7b7ac2eae718f331ed57089 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:33 -0600 Subject: [PATCH 0244/1278] staging: rtl8723bs: Fix potential overuse of kernel memory commit 23954cb078febfc63a755301fe77e06bccdb4d2a upstream. In routine wpa_supplicant_ioctl(), the user-controlled p->length is checked to be at least the size of struct ieee_param size, but the code does not detect the case where p->length is greater than the size of the struct, thus a malicious user could be wasting kernel memory. Fixes commit 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes: 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-5-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 9dc4786fde75..d51f6c452972 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3495,7 +3495,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } -- GitLab From fcbfe89dd4003645887e7e7a47905031fc457052 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Feb 2020 13:28:41 +0100 Subject: [PATCH 0245/1278] x86/mce/amd: Publish the bank pointer only after setup has succeeded commit 6e5cf31fbe651bed7ba1df768f2e123531132417 upstream. threshold_create_bank() creates a bank descriptor per MCA error thresholding counter which can be controlled over sysfs. It publishes the pointer to that bank in a per-CPU variable and then goes on to create additional thresholding blocks if the bank has such. However, that creation of additional blocks in allocate_threshold_blocks() can fail, leading to a use-after-free through the per-CPU pointer. Therefore, publish that pointer only after all blocks have been setup successfully. Fixes: 019f34fccfd5 ("x86, MCE, AMD: Move shared bank to node descriptor") Reported-by: Saar Amar Reported-by: Dan Carpenter Signed-off-by: Borislav Petkov Cc: Link: http://lkml.kernel.org/r/20200128140846.phctkvx5btiexvbx@kili.mountain Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 33 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a8f47697276b..4fb28239e49c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1151,8 +1151,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1196,16 +1197,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1213,7 +1210,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1298,8 +1295,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1310,9 +1305,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); -- GitLab From 1a7fb626c92beee0dd0283e2036f3289e7109ae6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Feb 2020 19:01:34 +0100 Subject: [PATCH 0246/1278] x86/mce/amd: Fix kobject lifetime commit 51dede9c05df2b78acd6dcf6a17d21f0877d2d7b upstream. Accessing the MCA thresholding controls in sysfs concurrently with CPU hotplug can lead to a couple of KASAN-reported issues: BUG: KASAN: use-after-free in sysfs_file_ops+0x155/0x180 Read of size 8 at addr ffff888367578940 by task grep/4019 and BUG: KASAN: use-after-free in show_error_count+0x15c/0x180 Read of size 2 at addr ffff888368a05514 by task grep/4454 for example. Both result from the fact that the threshold block creation/teardown code frees the descriptor memory itself instead of defining proper ->release function and leaving it to the driver core to take care of that, after all sysfs accesses have completed. Do that and get rid of the custom freeing code, fixing the above UAFs in the process. [ bp: write commit message. ] Fixes: 95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200214082801.13836-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 4fb28239e49c..bbe94b682119 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1116,9 +1116,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1320,8 +1323,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1331,13 +1338,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) -- GitLab From d5f2c5dd15051766b2ba8d141201d8c33a6257b8 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 10 Feb 2020 16:20:53 +0100 Subject: [PATCH 0247/1278] tty/serial: atmel: manage shutdown in case of RS485 or ISO7816 mode commit 04b5bfe3dc94e64d0590c54045815cb5183fb095 upstream. In atmel_shutdown() we call atmel_stop_rx() and atmel_stop_tx() functions. Prevent the rx restart that is implemented in RS485 or ISO7816 modes when calling atmel_stop_tx() by using the atomic information tasklet_shutdown that is already in place for this purpose. Fixes: 98f2082c3ac4 ("tty/serial: atmel: enforce tasklet init and termination sequences") Signed-off-by: Nicolas Ferre Cc: stable Link: https://lore.kernel.org/r/20200210152053.8289-1-nicolas.ferre@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 367ce812743e..a00227d312d3 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -498,7 +498,8 @@ static void atmel_stop_tx(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); if (atmel_uart_is_half_duplex(port)) - atmel_start_rx(port); + if (!atomic_read(&atmel_port->tasklet_shutdown)) + atmel_start_rx(port); } -- GitLab From 75ca0a8f124bf40d55947fdef395c800d03c6831 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Tue, 11 Feb 2020 14:16:01 +0800 Subject: [PATCH 0248/1278] tty: serial: imx: setup the correct sg entry for tx dma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f76707831829530ffdd3888bebc108aecefccaa0 upstream. There has oops as below happen on i.MX8MP EVK platform that has 6G bytes DDR memory. when (xmit->tail < xmit->head) && (xmit->head == 0), it setups one sg entry with sg->length is zero: sg_set_buf(sgl + 1, xmit->buf, xmit->head); if xmit->buf is allocated from >4G address space, and SDMA only support <4G address space, then dma_map_sg() will call swiotlb_map() to do bounce buffer copying and mapping. But swiotlb_map() don't allow sg entry's length is zero, otherwise report BUG_ON(). So the patch is to correct the tx DMA scatter list. Oops: [ 287.675715] kernel BUG at kernel/dma/swiotlb.c:497! [ 287.680592] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 287.686075] Modules linked in: [ 287.689133] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.3-00016-g3fdc4e0-dirty #10 [ 287.696872] Hardware name: FSL i.MX8MP EVK (DT) [ 287.701402] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 287.706199] pc : swiotlb_tbl_map_single+0x1fc/0x310 [ 287.711076] lr : swiotlb_map+0x60/0x148 [ 287.714909] sp : ffff800010003c00 [ 287.718221] x29: ffff800010003c00 x28: 0000000000000000 [ 287.723533] x27: 0000000000000040 x26: ffff800011ae0000 [ 287.728844] x25: ffff800011ae09f8 x24: 0000000000000000 [ 287.734155] x23: 00000001b7af9000 x22: 0000000000000000 [ 287.739465] x21: ffff000176409c10 x20: 00000000001f7ffe [ 287.744776] x19: ffff000176409c10 x18: 000000000000002e [ 287.750087] x17: 0000000000000000 x16: 0000000000000000 [ 287.755397] x15: 0000000000000000 x14: 0000000000000000 [ 287.760707] x13: ffff00017f334000 x12: 0000000000000001 [ 287.766018] x11: 00000000001fffff x10: 0000000000000000 [ 287.771328] x9 : 0000000000000003 x8 : 0000000000000000 [ 287.776638] x7 : 0000000000000000 x6 : 0000000000000000 [ 287.781949] x5 : 0000000000200000 x4 : 0000000000000000 [ 287.787259] x3 : 0000000000000001 x2 : 00000001b7af9000 [ 287.792570] x1 : 00000000fbfff000 x0 : 0000000000000000 [ 287.797881] Call trace: [ 287.800328] swiotlb_tbl_map_single+0x1fc/0x310 [ 287.804859] swiotlb_map+0x60/0x148 [ 287.808347] dma_direct_map_page+0xf0/0x130 [ 287.812530] dma_direct_map_sg+0x78/0xe0 [ 287.816453] imx_uart_dma_tx+0x134/0x2f8 [ 287.820374] imx_uart_dma_tx_callback+0xd8/0x168 [ 287.824992] vchan_complete+0x194/0x200 [ 287.828828] tasklet_action_common.isra.0+0x154/0x1a0 [ 287.833879] tasklet_action+0x24/0x30 [ 287.837540] __do_softirq+0x120/0x23c [ 287.841202] irq_exit+0xb8/0xd8 [ 287.844343] __handle_domain_irq+0x64/0xb8 [ 287.848438] gic_handle_irq+0x5c/0x148 [ 287.852185] el1_irq+0xb8/0x180 [ 287.855327] cpuidle_enter_state+0x84/0x360 [ 287.859508] cpuidle_enter+0x34/0x48 [ 287.863083] call_cpuidle+0x18/0x38 [ 287.866571] do_idle+0x1e0/0x280 [ 287.869798] cpu_startup_entry+0x20/0x40 [ 287.873721] rest_init+0xd4/0xe0 [ 287.876949] arch_call_rest_init+0xc/0x14 [ 287.880958] start_kernel+0x420/0x44c [ 287.884622] Code: 9124c021 9417aff8 a94363f7 17ffffd5 (d4210000) [ 287.890718] ---[ end trace 5bc44c4ab6b009ce ]--- [ 287.895334] Kernel panic - not syncing: Fatal exception in interrupt [ 287.901686] SMP: stopping secondary CPUs [ 288.905607] SMP: failed to stop secondary CPUs 0-1 [ 288.910395] Kernel Offset: disabled [ 288.913882] CPU features: 0x0002,2000200c [ 288.917888] Memory Limit: none [ 288.920944] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Reported-by: Eagle Zhou Tested-by: Eagle Zhou Signed-off-by: Fugang Duan Cc: stable Fixes: 7942f8577f2a ("serial: imx: TX DMA: clean up sg initialization") Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/1581401761-6378-1-git-send-email-fugang.duan@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 969497599e88..630065b551f5 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -557,7 +557,7 @@ static void imx_dma_tx(struct imx_port *sport) sport->tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes); } else { -- GitLab From 0b86f288e79191a87733796fe161ca76bb391621 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 10 Feb 2020 15:57:30 +0100 Subject: [PATCH 0249/1278] serdev: ttyport: restore client ops on deregistration commit 0c5aae59270fb1f827acce182786094c9ccf598e upstream. The serdev tty-port controller driver should reset the tty-port client operations also on deregistration to avoid a NULL-pointer dereference in case the port is later re-registered as a normal tty device. Note that this can only happen with tty drivers such as 8250 which have statically allocated port structures that can end up being reused and where a later registration would not register a serdev controller (e.g. due to registration errors or if the devicetree has been changed in between). Specifically, this can be an issue for any statically defined ports that would be registered by 8250 core when an 8250 driver is being unbound. Fixes: bed35c6dfa6a ("serdev: add a tty port controller driver") Cc: stable # 4.11 Reported-by: Loic Poulain Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200210145730.22762-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 6 ++---- drivers/tty/tty_port.c | 5 +++-- include/linux/tty.h | 2 ++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 69fc6d9ab490..88cf520da739 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -238,7 +238,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { - const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; int ret; @@ -257,7 +256,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; - old_ops = port->client_ops; port->client_ops = &client_ops; port->client_data = ctrl; @@ -270,7 +268,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, err_reset_data: port->client_data = NULL; - port->client_ops = old_ops; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return ERR_PTR(ret); @@ -285,8 +283,8 @@ int serdev_tty_port_unregister(struct tty_port *port) return -ENODEV; serdev_controller_remove(ctrl); - port->client_ops = NULL; port->client_data = NULL; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return 0; diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index c93a33701d32..dd12c3b86eb4 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -51,10 +51,11 @@ static void tty_port_default_wakeup(struct tty_port *port) } } -static const struct tty_port_client_operations default_client_ops = { +const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .write_wakeup = tty_port_default_wakeup, }; +EXPORT_SYMBOL_GPL(tty_port_default_client_ops); void tty_port_init(struct tty_port *port) { @@ -67,7 +68,7 @@ void tty_port_init(struct tty_port *port) spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; - port->client_ops = &default_client_ops; + port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); diff --git a/include/linux/tty.h b/include/linux/tty.h index 0cd621d8c7f0..ead308e996c0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -224,6 +224,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ -- GitLab From 664817c8226582d2a16aa2a260fa4e4ab3b40a09 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 12 Feb 2020 18:04:33 +0200 Subject: [PATCH 0250/1278] MAINTAINERS: Update drm/i915 bug filing URL commit 96228b7df33f8eb9006f8ae96949400aed9bd303 upstream. We've moved from bugzilla to gitlab. Cc: stable@vger.kernel.org Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200212160434.6437-1-jani.nikula@intel.com (cherry picked from commit 3a6a4f0810c8ade6f1ff63c34aa9834176b9d88b) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 029f96c43250..e2dd302345c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6877,7 +6877,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel -- GitLab From 122f23f4bc5c7ea9b0f35cb1b04c9f7886eadb81 Mon Sep 17 00:00:00 2001 From: Ioanna Alifieraki Date: Thu, 20 Feb 2020 20:04:00 -0800 Subject: [PATCH 0251/1278] Revert "ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()" commit edf28f4061afe4c2d9eb1c3323d90e882c1d6800 upstream. This reverts commit a97955844807e327df11aa33869009d14d6b7de0. Commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") removes a lock that is needed. This leads to a process looping infinitely in exit_sem() and can also lead to a crash. There is a reproducer available in [1] and with the commit reverted the issue does not reproduce anymore. Using the reproducer found in [1] is fairly easy to reach a point where one of the child processes is looping infinitely in exit_sem between for(;;) and if (semid == -1) block, while it's trying to free its last sem_undo structure which has already been freed by freeary(). Each sem_undo struct is on two lists: one per semaphore set (list_id) and one per process (list_proc). The list_id list tracks undos by semaphore set, and the list_proc by process. Undo structures are removed either by freeary() or by exit_sem(). The freeary function is invoked when the user invokes a syscall to remove a semaphore set. During this operation freeary() traverses the list_id associated with the semaphore set and removes the undo structures from both the list_id and list_proc lists. For this case, exit_sem() is called at process exit. Each process contains a struct sem_undo_list (referred to as "ulp") which contains the head for the list_proc list. When the process exits, exit_sem() traverses this list to remove each sem_undo struct. As in freeary(), whenever a sem_undo struct is removed from list_proc, it is also removed from the list_id list. Removing elements from list_id is safe for both exit_sem() and freeary() due to sem_lock(). Removing elements from list_proc is not safe; freeary() locks &un->ulp->lock when it performs list_del_rcu(&un->list_proc) but exit_sem() does not (locking was removed by commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()"). This can result in the following situation while executing the reproducer [1] : Consider a child process in exit_sem() and the parent in freeary() (because of semctl(sid[i], NSEM, IPC_RMID)). - The list_proc for the child contains the last two undo structs A and B (the rest have been removed either by exit_sem() or freeary()). - The semid for A is 1 and semid for B is 2. - exit_sem() removes A and at the same time freeary() removes B. - Since A and B have different semid sem_lock() will acquire different locks for each process and both can proceed. The bug is that they remove A and B from the same list_proc at the same time because only freeary() acquires the ulp lock. When exit_sem() removes A it makes ulp->list_proc.next to point at B and at the same time freeary() removes B setting B->semid=-1. At the next iteration of for(;;) loop exit_sem() will try to remove B. The only way to break from for(;;) is for (&un->list_proc == &ulp->list_proc) to be true which is not. Then exit_sem() will check if B->semid=-1 which is and will continue looping in for(;;) until the memory for B is reallocated and the value at B->semid is changed. At that point, exit_sem() will crash attempting to unlink B from the lists (this can be easily triggered by running the reproducer [1] a second time). To prove this scenario instrumentation was added to keep information about each sem_undo (un) struct that is removed per process and per semaphore set (sma). CPU0 CPU1 [caller holds sem_lock(sma for A)] ... freeary() exit_sem() ... ... ... sem_lock(sma for B) spin_lock(A->ulp->lock) ... list_del_rcu(un_A->list_proc) list_del_rcu(un_B->list_proc) Undo structures A and B have different semid and sem_lock() operations proceed. However they belong to the same list_proc list and they are removed at the same time. This results into ulp->list_proc.next pointing to the address of B which is already removed. After reverting commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") the issue was no longer reproducible. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1694779 Link: http://lkml.kernel.org/r/20191211191318.11860-1-ioanna-maria.alifieraki@canonical.com Fixes: a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") Signed-off-by: Ioanna Alifieraki Acked-by: Manfred Spraul Acked-by: Herton R. Krzesinski Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Cc: Joel Fernandes (Google) Cc: Davidlohr Bueso Cc: Jay Vosburgh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/sem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index d6dd2dc9ddad..6adc245f3e02 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2248,11 +2248,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { -- GitLab From e3880aaacc5fb7b75af72286158d12c1c54d630e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Feb 2020 20:04:24 -0800 Subject: [PATCH 0252/1278] mm/vmscan.c: don't round up scan size for online memory cgroup commit 76073c646f5f4999d763f471df9e38a5a912d70d upstream. Commit 68600f623d69 ("mm: don't miss the last page because of round-off error") makes the scan size round up to @denominator regardless of the memory cgroup's state, online or offline. This affects the overall reclaiming behavior: the corresponding LRU list is eligible for reclaiming only when its size logically right shifted by @sc->priority is bigger than zero in the former formula. For example, the inactive anonymous LRU list should have at least 0x4000 pages to be eligible for reclaiming when we have 60/12 for swappiness/priority and without taking scan/rotation ratio into account. After the roundup is applied, the inactive anonymous LRU list becomes eligible for reclaiming when its size is bigger than or equal to 0x1000 in the same condition. (0x4000 >> 12) * 60 / (60 + 140 + 1) = 1 ((0x1000 >> 12) * 60) + 200) / (60 + 140 + 1) = 1 aarch64 has 512MB huge page size when the base page size is 64KB. The memory cgroup that has a huge page is always eligible for reclaiming in that case. The reclaiming is likely to stop after the huge page is reclaimed, meaing the further iteration on @sc->priority and the silbing and child memory cgroups will be skipped. The overall behaviour has been changed. This fixes the issue by applying the roundup to offlined memory cgroups only, to give more preference to reclaim memory from offlined memory cgroup. It sounds reasonable as those memory is unlikedly to be used by anyone. The issue was found by starting up 8 VMs on a Ampere Mustang machine, which has 8 CPUs and 16 GB memory. Each VM is given with 2 vCPUs and 2GB memory. It took 264 seconds for all VMs to be completely up and 784MB swap is consumed after that. With this patch applied, it took 236 seconds and 60MB swap to do same thing. So there is 10% performance improvement for my case. Note that KSM is disable while THP is enabled in the testing. total used free shared buff/cache available Mem: 16196 10065 2049 16 4081 3749 Swap: 8175 784 7391 total used free shared buff/cache available Mem: 16196 11324 3656 24 1215 2936 Swap: 8175 60 8115 Link: http://lkml.kernel.org/r/20200211024514.8730-1-gshan@redhat.com Fixes: 68600f623d69 ("mm: don't miss the last page because of round-off error") Signed-off-by: Gavin Shan Acked-by: Roman Gushchin Cc: [4.20+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0cc3c1eb15f5..c6962aa5ddb4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2369,10 +2369,13 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. - * Make sure we don't miss the last page - * because of a round-off error. + * Make sure we don't miss the last page on + * the offlined memory cgroups because of a + * round-off error. */ - scan = DIV64_U64_ROUND_UP(scan * fraction[file], + scan = mem_cgroup_online(memcg) ? + div64_u64(scan * fraction[file], denominator) : + DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: -- GitLab From edae04b8f4632b19acf1f11345e7d22264f73a86 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2020 01:46:16 -0500 Subject: [PATCH 0253/1278] drm/amdgpu/soc15: fix xclk for raven commit c657b936ea98630ef5ba4f130ab1ad5c534d0165 upstream. It's 25 Mhz (refclk / 4). This fixes the interpretation of the rlc clock counter. Acked-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ff7d4827385e..7a2366bd1fba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -279,7 +279,12 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) } static u32 soc15_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + if (adev->asic_type == CHIP_RAVEN) + return reference_clock / 4; + + return reference_clock; } -- GitLab From f15a843b030022c4e54c6fb04a3102c5af8b0fba Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 14 Feb 2020 10:32:38 +0800 Subject: [PATCH 0254/1278] KVM: x86: don't notify userspace IOAPIC on edge-triggered interrupt EOI commit 7455a8327674e1a7c9a1f5dd1b0743ab6713f6d1 upstream. Commit 13db77347db1 ("KVM: x86: don't notify userspace IOAPIC on edge EOI") said, edge-triggered interrupts don't set a bit in TMR, which means that IOAPIC isn't notified on EOI. And var level indicates level-triggered interrupt. But commit 3159d36ad799 ("KVM: x86: use generic function for MSI parsing") replace var level with irq.level by mistake. Fix it by changing irq.level to irq.trig_mode. Cc: stable@vger.kernel.org Fixes: 3159d36ad799 ("KVM: x86: use generic function for MSI parsing") Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/irq_comm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3cc3b2d130a0..4d000aea05e0 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -427,7 +427,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } -- GitLab From 69b2384bf875b59e85ac38abe6a535440706987a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:53 +0200 Subject: [PATCH 0255/1278] xhci: apply XHCI_PME_STUCK_QUIRK to Intel Comet Lake platforms commit a3ae87dce3a5abe0b57c811bab02b2564b574106 upstream. Intel Comet Lake based platform require the XHCI_PME_STUCK_QUIRK quirk as well. Without this xHC can not enter D3 in runtime suspend. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 42d368cb76ce..908496ed3254 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -53,6 +53,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -191,7 +192,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && -- GitLab From 153ad3d8fa86c9c6db330d90b7fe87922891ef1f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Sep 2017 12:34:13 -0400 Subject: [PATCH 0256/1278] VT_RESIZEX: get rid of field-by-field copyin [ Upstream commit 1b3bce4d6bf839304a90951b4b25a5863533bf2a ] Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- drivers/tty/vt/vt_ioctl.c | 68 ++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 7b34b0ddbf0e..be7990548afe 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -847,58 +847,44 @@ int vt_ioctl(struct tty_struct *tty, case VT_RESIZEX: { - struct vt_consize __user *vtconsize = up; - ushort ll,cc,vlin,clin,vcol,ccol; + struct vt_consize v; if (!perm) return -EPERM; - if (!access_ok(VERIFY_READ, vtconsize, - sizeof(struct vt_consize))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&v, up, sizeof(struct vt_consize))) + return -EFAULT; /* FIXME: Should check the copies properly */ - __get_user(ll, &vtconsize->v_rows); - __get_user(cc, &vtconsize->v_cols); - __get_user(vlin, &vtconsize->v_vlin); - __get_user(clin, &vtconsize->v_clin); - __get_user(vcol, &vtconsize->v_vcol); - __get_user(ccol, &vtconsize->v_ccol); - vlin = vlin ? vlin : vc->vc_scan_lines; - if (clin) { - if (ll) { - if (ll != vlin/clin) { - /* Parameters don't add up */ - ret = -EINVAL; - break; - } - } else - ll = vlin/clin; + if (!v.v_vlin) + v.v_vlin = vc->vc_scan_lines; + if (v.v_clin) { + int rows = v.v_vlin/v.v_clin; + if (v.v_rows != rows) { + if (v.v_rows) /* Parameters don't add up */ + return -EINVAL; + v.v_rows = rows; + } } - if (vcol && ccol) { - if (cc) { - if (cc != vcol/ccol) { - ret = -EINVAL; - break; - } - } else - cc = vcol/ccol; + if (v.v_vcol && v.v_ccol) { + int cols = v.v_vcol/v.v_ccol; + if (v.v_cols != cols) { + if (v.v_cols) + return -EINVAL; + v.v_cols = cols; + } } - if (clin > 32) { - ret = -EINVAL; - break; - } - + if (v.v_clin > 32) + return -EINVAL; + for (i = 0; i < MAX_NR_CONSOLES; i++) { if (!vc_cons[i].d) continue; console_lock(); - if (vlin) - vc_cons[i].d->vc_scan_lines = vlin; - if (clin) - vc_cons[i].d->vc_font.height = clin; + if (v.v_vlin) + vc_cons[i].d->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vc_cons[i].d->vc_font.height = v.v_clin; vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, cc, ll); + vc_resize(vc_cons[i].d, v.v_cols, v.v_rows); console_unlock(); } break; -- GitLab From 69931c044c9de837602cfd4bcfc28123ce4987e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2020 11:07:21 -0800 Subject: [PATCH 0257/1278] vt: vt_ioctl: fix race in VT_RESIZEX [ Upstream commit 6cd1ed50efd88261298577cd92a14f2768eddeeb ] We need to make sure vc_cons[i].d is not NULL after grabbing console_lock(), or risk a crash. general protection fault, probably for non-canonical address 0xdffffc0000000068: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000340-0x0000000000000347] CPU: 1 PID: 19462 Comm: syz-executor.5 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vt_ioctl+0x1f96/0x26d0 drivers/tty/vt/vt_ioctl.c:883 Code: 74 41 e8 bd a6 84 fd 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 e4 04 00 00 48 8b 03 48 8d b8 40 03 00 00 48 89 fa 48 c1 ea 03 <42> 0f b6 14 2a 84 d2 74 09 80 fa 03 0f 8e b1 05 00 00 44 89 b8 40 RSP: 0018:ffffc900086d7bb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffffff8c34ee88 RCX: ffffc9001415c000 RDX: 0000000000000068 RSI: ffffffff83f0e6e3 RDI: 0000000000000340 RBP: ffffc900086d7cd0 R08: ffff888054ce0100 R09: fffffbfff16a2f6d R10: ffff888054ce0998 R11: ffff888054ce0100 R12: 000000000000001d R13: dffffc0000000000 R14: 1ffff920010daf79 R15: 000000000000ff7f FS: 00007f7d13c12700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd477e3c38 CR3: 0000000095d0a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tty_ioctl+0xa37/0x14f0 drivers/tty/tty_io.c:2660 vfs_ioctl fs/ioctl.c:47 [inline] ksys_ioctl+0x123/0x180 fs/ioctl.c:763 __do_sys_ioctl fs/ioctl.c:772 [inline] __se_sys_ioctl fs/ioctl.c:770 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:770 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45b399 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f7d13c11c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f7d13c126d4 RCX: 000000000045b399 RDX: 0000000020000080 RSI: 000000000000560a RDI: 0000000000000003 RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000666 R14: 00000000004c7f04 R15: 000000000075bf2c Modules linked in: ---[ end trace 80970faf7a67eb77 ]--- RIP: 0010:vt_ioctl+0x1f96/0x26d0 drivers/tty/vt/vt_ioctl.c:883 Code: 74 41 e8 bd a6 84 fd 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 e4 04 00 00 48 8b 03 48 8d b8 40 03 00 00 48 89 fa 48 c1 ea 03 <42> 0f b6 14 2a 84 d2 74 09 80 fa 03 0f 8e b1 05 00 00 44 89 b8 40 RSP: 0018:ffffc900086d7bb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffffff8c34ee88 RCX: ffffc9001415c000 RDX: 0000000000000068 RSI: ffffffff83f0e6e3 RDI: 0000000000000340 RBP: ffffc900086d7cd0 R08: ffff888054ce0100 R09: fffffbfff16a2f6d R10: ffff888054ce0998 R11: ffff888054ce0100 R12: 000000000000001d R13: dffffc0000000000 R14: 1ffff920010daf79 R15: 000000000000ff7f FS: 00007f7d13c12700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd477e3c38 CR3: 0000000095d0a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: stable Reported-by: syzbot Link: https://lore.kernel.org/r/20200210190721.200418-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/vt/vt_ioctl.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index be7990548afe..c320fefab360 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -876,15 +876,20 @@ int vt_ioctl(struct tty_struct *tty, return -EINVAL; for (i = 0; i < MAX_NR_CONSOLES; i++) { + struct vc_data *vcp; + if (!vc_cons[i].d) continue; console_lock(); - if (v.v_vlin) - vc_cons[i].d->vc_scan_lines = v.v_vlin; - if (v.v_clin) - vc_cons[i].d->vc_font.height = v.v_clin; - vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, v.v_cols, v.v_rows); + vcp = vc_cons[i].d; + if (vcp) { + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_font.height = v.v_clin; + vcp->vc_resize_user = 1; + vc_resize(vcp, v.v_cols, v.v_rows); + } console_unlock(); } break; -- GitLab From b34e5f9f94d6c05a2c170c235382bf602abf3d1a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 Feb 2020 15:55:59 +0200 Subject: [PATCH 0258/1278] serial: 8250: Check UPF_IRQ_SHARED in advance [ Upstream commit 7febbcbc48fc92e3f33863b32ed715ba4aff18c4 ] The commit 54e53b2e8081 ("tty: serial: 8250: pass IRQ shared flag to UART ports") nicely explained the problem: ---8<---8<--- On some systems IRQ lines between multiple UARTs might be shared. If so, the irqflags have to be configured accordingly. The reason is: The 8250 port startup code performs IRQ tests *before* the IRQ handler for that particular port is registered. This is performed in serial8250_do_startup(). This function checks whether IRQF_SHARED is configured and only then disables the IRQ line while testing. This test is performed upon each open() of the UART device. Imagine two UARTs share the same IRQ line: On is already opened and the IRQ is active. When the second UART is opened, the IRQ line has to be disabled while performing IRQ tests. Otherwise an IRQ might handler might be invoked, but the IRQ itself cannot be handled, because the corresponding handler isn't registered, yet. That's because the 8250 code uses a chain-handler and invokes the corresponding port's IRQ handling routines himself. Unfortunately this IRQF_SHARED flag isn't configured for UARTs probed via device tree even if the IRQs are shared. This way, the actual and shared IRQ line isn't disabled while performing tests and the kernel correctly detects a spurious IRQ. So, adding this flag to the DT probe solves the issue. Note: The UPF_SHARE_IRQ flag is configured unconditionally. Therefore, the IRQF_SHARED flag can be set unconditionally as well. Example stack trace by performing `echo 1 > /dev/ttyS2` on a non-patched system: |irq 85: nobody cared (try booting with the "irqpoll" option) | [...] |handlers: |[] irq_default_primary_handler threaded [] serial8250_interrupt |Disabling IRQ #85 ---8<---8<--- But unfortunately didn't fix the root cause. Let's try again here by moving IRQ flag assignment from serial_link_irq_chain() to serial8250_do_startup(). This should fix the similar issue reported for 8250_pnp case. Since this change we don't need to have custom solutions in 8250_aspeed_vuart and 8250_of drivers, thus, drop them. Fixes: 1c2f04937b3e ("serial: 8250: add IRQ trigger support") Reported-by: Li RongQing Cc: Kurt Kanzenbach Cc: Vikram Pandita Signed-off-by: Andy Shevchenko Cc: stable Acked-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20200211135559.85960-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_aspeed_vuart.c | 1 - drivers/tty/serial/8250/8250_core.c | 5 ++--- drivers/tty/serial/8250/8250_port.c | 4 ++++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 33a801353114..0a89df390f24 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -256,7 +256,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.line = rc; port.port.irq = irq_of_parse_and_map(np, 0); - port.port.irqflags = IRQF_SHARED; port.port.iotype = UPIO_MEM; port.port.type = PORT_16550A; port.port.uartclk = clk; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index c698ebab6d3b..5017a0f46b82 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -181,7 +181,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) struct hlist_head *h; struct hlist_node *n; struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; + int ret; mutex_lock(&hash_mutex); @@ -216,9 +216,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up) INIT_LIST_HEAD(&up->list); i->head = &up->list; spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, up->port.name, i); + up->port.irqflags, up->port.name, i); if (ret < 0) serial_do_unlink(i, up); } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index a73d2bc4b685..90a93c001e16 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2258,6 +2258,10 @@ int serial8250_do_startup(struct uart_port *port) } } + /* Check if we need to have shared IRQs */ + if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) + up->port.irqflags |= IRQF_SHARED; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; /* -- GitLab From 719742a77e4733e99ea066363f4487fa7513bc5d Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 20 Feb 2020 20:04:30 -0800 Subject: [PATCH 0259/1278] lib/stackdepot.c: fix global out-of-bounds in stack_slabs [ Upstream commit 305e519ce48e935702c32241f07d393c3c8fed3e ] Walter Wu has reported a potential case in which init_stack_slab() is called after stack_slabs[STACK_ALLOC_MAX_SLABS - 1] has already been initialized. In that case init_stack_slab() will overwrite stack_slabs[STACK_ALLOC_MAX_SLABS], which may result in a memory corruption. Link: http://lkml.kernel.org/r/20200218102950.260263-1-glider@google.com Fixes: cd11016e5f521 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Alexander Potapenko Reported-by: Walter Wu Cc: Dmitry Vyukov Cc: Matthias Brugger Cc: Thomas Gleixner Cc: Josh Poimboeuf Cc: Kate Stewart Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/stackdepot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f87d138e9672..759ff419fe61 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -92,15 +92,19 @@ static bool init_stack_slab(void **prealloc) return true; if (stack_slabs[depot_index] == NULL) { stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; } else { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } /* * This smp_store_release pairs with smp_load_acquire() from * |next_slab_inited| above and in depot_save_stack(). */ smp_store_release(&next_slab_inited, 1); } - *prealloc = NULL; return true; } -- GitLab From ee1238c0285f40d8297e70f3aaa0d8a981ea0b53 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 4 Feb 2020 15:26:29 -0800 Subject: [PATCH 0260/1278] KVM: nVMX: Don't emulate instructions in guest mode [ Upstream commit 07721feee46b4b248402133228235318199b05ec ] vmx_check_intercept is not yet fully implemented. To avoid emulating instructions disallowed by the L1 hypervisor, refuse to emulate instructions by default. Cc: stable@vger.kernel.org [Made commit, added commit msg - Oliver] Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 86037cc11419..64837f07d955 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12340,7 +12340,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, } /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 -- GitLab From d63d922c9717df41e853df8ebb1756eea65eff1e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 7 Feb 2020 09:29:11 -0500 Subject: [PATCH 0261/1278] ext4: fix a data race in EXT4_I(inode)->i_disksize commit 35df4299a6487f323b0aca120ea3f485dfee2ae3 upstream. EXT4_I(inode)->i_disksize could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in ext4_write_end [ext4] / ext4_writepages [ext4] write to 0xffff91c6713b00f8 of 8 bytes by task 49268 on cpu 127: ext4_write_end+0x4e3/0x750 [ext4] ext4_update_i_disksize at fs/ext4/ext4.h:3032 (inlined by) ext4_update_inode_size at fs/ext4/ext4.h:3046 (inlined by) ext4_write_end at fs/ext4/inode.c:1287 generic_perform_write+0x208/0x2a0 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffff91c6713b00f8 of 8 bytes by task 24872 on cpu 37: ext4_writepages+0x10ac/0x1d00 [ext4] mpage_map_and_submit_extent at fs/ext4/inode.c:2468 (inlined by) ext4_writepages at fs/ext4/inode.c:2772 do_writepages+0x5e/0x130 __writeback_single_inode+0xeb/0xb20 writeback_sb_inodes+0x429/0x900 __writeback_inodes_wb+0xc4/0x150 wb_writeback+0x4bd/0x870 wb_workfn+0x6b4/0x960 process_one_work+0x54c/0xbe0 worker_thread+0x80/0x650 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 Reported by Kernel Concurrency Sanitizer on: CPU: 37 PID: 24872 Comm: kworker/u261:2 Tainted: G W O L 5.5.0-next-20200204+ #5 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 Workqueue: writeback wb_workfn (flush-7:0) Since only the read is operating as lockless (outside of the "i_data_sem"), load tearing could introduce a logic bug. Fix it by adding READ_ONCE() for the read and WRITE_ONCE() for the write. Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/1581085751-31793-1-git-send-email-cai@lca.pw Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fcee1f9c7fe3..f88be401befb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2867,7 +2867,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 57118ba82929..8bfd1ea28234 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2564,7 +2564,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; -- GitLab From 6538b08e720d93a92c2dcb1215e761db248931c9 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Sat, 15 Feb 2020 03:02:06 -0500 Subject: [PATCH 0262/1278] ext4: add cond_resched() to __ext4_find_entry() commit 9424ef56e13a1f14c57ea161eed3ecfdc7b2770e upstream. We tested a soft lockup problem in linux 4.19 which could also be found in linux 5.x. When dir inode takes up a large number of blocks, and if the directory is growing when we are searching, it's possible the restart branch could be called many times, and the do while loop could hold cpu a long time. Here is the call trace in linux 4.19. [ 473.756186] Call trace: [ 473.756196] dump_backtrace+0x0/0x198 [ 473.756199] show_stack+0x24/0x30 [ 473.756205] dump_stack+0xa4/0xcc [ 473.756210] watchdog_timer_fn+0x300/0x3e8 [ 473.756215] __hrtimer_run_queues+0x114/0x358 [ 473.756217] hrtimer_interrupt+0x104/0x2d8 [ 473.756222] arch_timer_handler_virt+0x38/0x58 [ 473.756226] handle_percpu_devid_irq+0x90/0x248 [ 473.756231] generic_handle_irq+0x34/0x50 [ 473.756234] __handle_domain_irq+0x68/0xc0 [ 473.756236] gic_handle_irq+0x6c/0x150 [ 473.756238] el1_irq+0xb8/0x140 [ 473.756286] ext4_es_lookup_extent+0xdc/0x258 [ext4] [ 473.756310] ext4_map_blocks+0x64/0x5c0 [ext4] [ 473.756333] ext4_getblk+0x6c/0x1d0 [ext4] [ 473.756356] ext4_bread_batch+0x7c/0x1f8 [ext4] [ 473.756379] ext4_find_entry+0x124/0x3f8 [ext4] [ 473.756402] ext4_lookup+0x8c/0x258 [ext4] [ 473.756407] __lookup_hash+0x8c/0xe8 [ 473.756411] filename_create+0xa0/0x170 [ 473.756413] do_mkdirat+0x6c/0x140 [ 473.756415] __arm64_sys_mkdirat+0x28/0x38 [ 473.756419] el0_svc_common+0x78/0x130 [ 473.756421] el0_svc_handler+0x38/0x78 [ 473.756423] el0_svc+0x8/0xc [ 485.755156] watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [tmp:5149] Add cond_resched() to avoid soft lockup and to provide a better system responding. Link: https://lore.kernel.org/r/20200215080206.13293-1-luoshijie1@huawei.com Signed-off-by: Shijie Luo Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a7b7e0783eed..3f7b3836166c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1430,6 +1430,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; -- GitLab From 77ed838c16339be8ecb98e0da9b50b892820336c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2020 11:08:35 +0100 Subject: [PATCH 0263/1278] ext4: fix mount failure with quota configured as module commit 9db176bceb5c5df4990486709da386edadc6bd1d upstream. When CONFIG_QFMT_V2 is configured as a module, the test in ext4_feature_set_ok() fails and so mount of filesystems with quota or project features fails. Fix the test to use IS_ENABLED macro which works properly even for modules. Link: https://lore.kernel.org/r/20200221100835.9332-1-jack@suse.cz Fixes: d65d87a07476 ("ext4: improve explanation of a mount failure caused by a misconfigured kernel") Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5b9e7377f26e..8ce6b741cc5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2863,7 +2863,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#if !defined(CONFIG_QUOTA) || !defined(CONFIG_QFMT_V2) +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) || ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, -- GitLab From a3294abaf9006db9ead394064952763d2e552bec Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2020 10:30:46 -0800 Subject: [PATCH 0264/1278] ext4: rename s_journal_flag_rwsem to s_writepages_rwsem commit bbd55937de8f2754adc5792b0f8e5ff7d9c0420e upstream. In preparation for making s_journal_flag_rwsem synchronize ext4_writepages() with changes to both the EXTENTS and JOURNAL_DATA flags (rather than just JOURNAL_DATA as it does currently), rename it to s_writepages_rwsem. Link: https://lore.kernel.org/r/20200219183047.47417-2-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 10 +++++----- fs/ext4/super.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f88be401befb..1bffa484d8d1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1533,7 +1533,7 @@ struct ext4_sb_info { struct ratelimit_state s_msg_ratelimit_state; /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8bfd1ea28234..1e2edebd0929 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2744,7 +2744,7 @@ static int ext4_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); if (dax_mapping(mapping)) { @@ -2974,7 +2974,7 @@ static int ext4_writepages(struct address_space *mapping, out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -6050,7 +6050,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -6067,7 +6067,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -6076,7 +6076,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8ce6b741cc5a..09b443709bca 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -939,7 +939,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(get_qf_name(sb, sbi, i)); @@ -4396,7 +4396,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4490,7 +4490,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); -- GitLab From bcc1eab71a67c46b9e24544ac7923f44444174ce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2020 10:30:47 -0800 Subject: [PATCH 0265/1278] ext4: fix race between writepages and enabling EXT4_EXTENTS_FL commit cb85f4d23f794e24127f3e562cb3b54b0803f456 upstream. If EXT4_EXTENTS_FL is set on an inode while ext4_writepages() is running on it, the following warning in ext4_add_complete_io() can be hit: WARNING: CPU: 1 PID: 0 at fs/ext4/page-io.c:234 ext4_put_io_end_defer+0xf0/0x120 Here's a minimal reproducer (not 100% reliable) (root isn't required): while true; do sync done & while true; do rm -f file touch file chattr -e file echo X >> file chattr +e file done The problem is that in ext4_writepages(), ext4_should_dioread_nolock() (which only returns true on extent-based files) is checked once to set the number of reserved journal credits, and also again later to select the flags for ext4_map_blocks() and copy the reserved journal handle to ext4_io_end::handle. But if EXT4_EXTENTS_FL is being concurrently set, the first check can see dioread_nolock disabled while the later one can see it enabled, causing the reserved handle to unexpectedly be NULL. Since changing EXT4_EXTENTS_FL is uncommon, and there may be other races related to doing so as well, fix this by synchronizing changing EXT4_EXTENTS_FL with ext4_writepages() via the existing s_writepages_rwsem (previously called s_journal_flag_rwsem). This was originally reported by syzbot without a reproducer at https://syzkaller.appspot.com/bug?extid=2202a584a00fffd19fbf, but now that dioread_nolock is the default I also started seeing this when running syzkaller locally. Link: https://lore.kernel.org/r/20200219183047.47417-3-ebiggers@kernel.org Reported-by: syzbot+2202a584a00fffd19fbf@syzkaller.appspotmail.com Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 5 ++++- fs/ext4/migrate.c | 27 +++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1bffa484d8d1..b162f602c430 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1532,7 +1532,10 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 78d45c7d3fa7..0d785868cc50 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -602,10 +605,11 @@ int ext4_ext_migrate(struct inode *inode) /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -615,7 +619,8 @@ int ext4_ext_migrate(struct inode *inode) int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } -- GitLab From 02a67798862f1c848e91859a7f4b291bcfdcf563 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Feb 2020 15:26:30 -0800 Subject: [PATCH 0266/1278] KVM: nVMX: Refactor IO bitmap checks into helper function commit e71237d3ff1abf9f3388337cfebf53b96df2020d upstream. Checks against the IO bitmap are useful for both instruction emulation and VM-exit reflection. Refactor the IO bitmap checks into a helper function. Signed-off-by: Oliver Upton Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64837f07d955..1c0c0e87f7e6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4991,6 +4991,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) (ss.selector & SEGMENT_RPL_MASK)); } +static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, + unsigned int port, int size); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned int port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Check if guest state is valid. Returns true if valid, false if * not. @@ -8521,23 +8541,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; -- GitLab From 9effa0ed199c7a02ad4cfe50fcec772645664b18 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Feb 2020 15:26:31 -0800 Subject: [PATCH 0267/1278] KVM: nVMX: Check IO instruction VM-exit conditions commit 35a571346a94fb93b5b3b6a599675ef3384bc75c upstream. Consult the 'unconditional IO exiting' and 'use IO bitmaps' VM-execution controls when checking instruction interception. If the 'use IO bitmaps' VM-execution control is 1, check the instruction access against the IO bitmaps to determine if the instruction causes a VM-exit. Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1c0c0e87f7e6..b5fa8fc0014d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4997,7 +4997,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { unsigned long exit_qualification; - unsigned int port; + unsigned short port; int size; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) @@ -12335,6 +12335,39 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, to_vmx(vcpu)->nested.sync_shadow_vmcs = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -12342,18 +12375,30 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + switch (info->intercept) { /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); /* TODO: check more intercepts... */ + default: + break; + } + return X86EMUL_UNHANDLEABLE; } -- GitLab From e6bff0cdf51c26b6216f45f88e39aa3eb16b2115 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 20 Feb 2020 18:22:05 +0100 Subject: [PATCH 0268/1278] KVM: nVMX: handle nested posted interrupts when apicv is disabled for L1 commit 91a5f413af596ad01097e59bf487eb07cb3f1331 upstream. Even when APICv is disabled for L1 it can (and, actually, is) still available for L2, this means we need to always call vmx_deliver_nested_posted_interrupt() when attempting an interrupt delivery. Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 5 +---- arch/x86/kvm/svm.c | 7 ++++++- arch/x86/kvm/vmx.c | 13 +++++++++---- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d0e17813a9b0..2cdf654ed132 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1006,7 +1006,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8715711f2755..d8c3fa015432 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -993,11 +993,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 52edb8cf1c40..8e65a9b40c18 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4631,8 +4631,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -4641,6 +4644,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b5fa8fc0014d..acf72da288f9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5541,24 +5541,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* -- GitLab From 647bdd69c205143bbbd77c1053f6baee1455f434 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 21 Feb 2020 22:04:46 +0800 Subject: [PATCH 0269/1278] KVM: apic: avoid calculating pending eoi from an uninitialized val commit 23520b2def95205f132e167cf5b25c609975e959 upstream. When pv_eoi_get_user() fails, 'val' may remain uninitialized and the return value of pv_eoi_get_pending() becomes random. Fix the issue by initializing the variable. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Miaohe Lin Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d8c3fa015432..537c36b55b5d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -566,9 +566,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { apic_debug("Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } -- GitLab From 9b32172a1bc80715309cd28e624dc2babd0ed48d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:31 -0500 Subject: [PATCH 0270/1278] btrfs: fix bytes_may_use underflow in prealloc error condtition commit b778cf962d71a0e737923d55d0432f3bd287258e upstream. I hit the following warning while running my error injection stress testing: WARNING: CPU: 3 PID: 1453 at fs/btrfs/space-info.h:108 btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] RIP: 0010:btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] Call Trace: btrfs_free_reserved_data_space+0x4f/0x70 [btrfs] __btrfs_prealloc_file_range+0x378/0x470 [btrfs] elfcorehdr_read+0x40/0x40 ? elfcorehdr_read+0x40/0x40 ? btrfs_commit_transaction+0xca/0xa50 [btrfs] ? dput+0xb4/0x2a0 ? btrfs_log_dentry_safe+0x55/0x70 [btrfs] ? btrfs_sync_file+0x30e/0x420 [btrfs] ? do_fsync+0x38/0x70 ? __x64_sys_fdatasync+0x13/0x20 ? do_syscall_64+0x5b/0x1b0 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 This happens if we fail to insert our reserved file extent. At this point we've already converted our reservation from ->bytes_may_use to ->bytes_reserved. However once we break we will attempt to free everything from [cur_offset, end] from ->bytes_may_use, but our extent reservation will overlap part of this. Fix this problem by adding ins.offset (our extent allocation size) to cur_offset so we remove the actual remaining part from ->bytes_may_use. I validated this fix using my inject-error.py script python inject-error.py -o should_fail_bio -t cache_save_setup -t \ __btrfs_prealloc_file_range \ -t insert_reserved_file_extent.constprop.0 \ -r "-5" ./run-fsstress.sh where run-fsstress.sh simply mounts and runs fsstress on a disk. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index abecc4724a3b..2a196bb134d9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10639,6 +10639,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 clear_offset = start; u64 i_size; u64 cur_bytes; u64 last_alloc = (u64)-1; @@ -10673,6 +10674,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans); break; } + + /* + * We've reserved this space, and thus converted it from + * ->bytes_may_use to ->bytes_reserved. Any error that happens + * from here on out we will only need to clear our reservation + * for the remaining unreserved area, so advance our + * clear_offset by our extent size. + */ + clear_offset += ins.offset; btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; @@ -10753,9 +10763,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, if (own_trans) btrfs_end_transaction(trans); } - if (cur_offset < end) - btrfs_free_reserved_data_space(inode, NULL, cur_offset, - end - cur_offset + 1); + if (clear_offset < end) + btrfs_free_reserved_data_space(inode, NULL, clear_offset, + end - clear_offset + 1); return ret; } -- GitLab From b3df7d2f336b69725ffc98096092ab6b2f281c1e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:29 -0500 Subject: [PATCH 0271/1278] btrfs: do not check delayed items are empty for single transaction cleanup commit 1e90315149f3fe148e114a5de86f0196d1c21fa5 upstream. btrfs_assert_delayed_root_empty() will check if the delayed root is completely empty, but this is a filesystem-wide check. On cleanup we may have allowed other transactions to begin, for whatever reason, and thus the delayed root is not empty. So remove this check from cleanup_one_transation(). This however can stay in btrfs_cleanup_transaction(), because it checks only after all of the transactions have been properly cleaned up, and thus is valid. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8ab9c5a8b7d..6b4fee5c79f9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4394,7 +4394,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, wake_up(&fs_info->transaction_wait); btrfs_destroy_delayed_inodes(fs_info); - btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); -- GitLab From 7d08e0e026a9e3e2be99e18aaccf215aa37d34b1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 13 Feb 2020 12:29:50 +0000 Subject: [PATCH 0272/1278] Btrfs: fix btrfs_wait_ordered_range() so that it waits for all ordered extents commit e75fd33b3f744f644061a4f9662bd63f5434f806 upstream. In btrfs_wait_ordered_range() once we find an ordered extent that has finished with an error we exit the loop and don't wait for any other ordered extents that might be still in progress. All the users of btrfs_wait_ordered_range() expect that there are no more ordered extents in progress after that function returns. So past fixes such like the ones from the two following commits: ff612ba7849964 ("btrfs: fix panic during relocation after ENOSPC before writeback happens") 28aeeac1dd3080 ("Btrfs: fix panic when starting bg cache writeout after IO error") don't work when there are multiple ordered extents in the range. Fix that by making btrfs_wait_ordered_range() wait for all ordered extents even after it finds one that had an error. Link: https://github.com/kdave/btrfs-progs/issues/228#issuecomment-569777554 CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ordered-data.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a3aca495e33e..d2287ea9fc50 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -838,10 +838,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + /* + * If the ordered extent had an error save the error but don't + * exit without waiting first for all other ordered extents in + * the range to complete. + */ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } -- GitLab From b982e7bc19217e2b7ab97533cbfb11331769ab63 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:08:59 -0800 Subject: [PATCH 0273/1278] scsi: Revert "RDMA/isert: Fix a recently introduced regression related to logout" commit 76261ada16dcc3be610396a46d35acc3efbda682 upstream. Since commit 04060db41178 introduces soft lockups when toggling network interfaces, revert it. Link: https://marc.info/?l=target-devel&m=158157054906196 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: 04060db41178 ("scsi: RDMA/isert: Fix a recently introduced regression related to logout") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 12 ++++++++++++ drivers/target/iscsi/iscsi_target.c | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9b5691f306a2..ee3f630c9217 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2582,6 +2582,17 @@ isert_wait4logout(struct isert_conn *isert_conn) } } +static void +isert_wait4cmds(struct iscsi_conn *conn) +{ + isert_info("iscsi_conn %p\n", conn); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } +} + /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2629,6 +2640,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 37d64acea5e1..21ce92ee1652 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4155,6 +4155,9 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4240,9 +4243,6 @@ int iscsit_close_connection( target_sess_cmd_list_set_waiting(sess->se_sess); target_wait_for_sess_cmds(sess->se_sess); - if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); - ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { struct crypto_ahash *tfm; -- GitLab From 6b0cc7a954ab2774da34b6954f73ea9888458ebb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:09:00 -0800 Subject: [PATCH 0274/1278] scsi: Revert "target: iscsi: Wait for all commands to finish before freeing a session" commit 807b9515b7d044cf77df31f1af9d842a76ecd5cb upstream. Since commit e9d3009cb936 introduced a regression and since the fix for that regression was not perfect, revert this commit. Link: https://marc.info/?l=target-devel&m=158157054906195 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: e9d3009cb936 ("scsi: target: iscsi: Wait for all commands to finish before freeing a session") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 10 ++-------- include/scsi/iscsi_proto.h | 1 - 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 21ce92ee1652..fb7bd422e2e1 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1158,9 +1158,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -2006,9 +2004,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -4240,8 +4236,6 @@ int iscsit_close_connection( * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); - target_sess_cmd_list_set_waiting(sess->se_sess); - target_wait_for_sess_cmds(sess->se_sess); ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index f0a01a54bd15..df156f1d50b2 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -638,7 +638,6 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 -- GitLab From 220d4966d22dc505688abb8664cbd2834b1a79eb Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 30 Jan 2020 19:10:35 -0800 Subject: [PATCH 0275/1278] usb: gadget: composite: Fix bMaxPower for SuperSpeedPlus commit c724417baf162bd3e035659e22cdf990cfb0d917 upstream. SuperSpeedPlus peripherals must report their bMaxPower of the configuration descriptor in units of 8mA as per the USB 3.2 specification. The current switch statement in encode_bMaxPower() only checks for USB_SPEED_SUPER but not USB_SPEED_SUPER_PLUS so the latter falls back to USB 2.0 encoding which uses 2mA units. Replace the switch with a simple if/else. Fixes: eae5820b852f ("usb: gadget: composite: Write SuperSpeedPlus config descriptors") Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index b29cd3979391..6e30b177aa22 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -440,12 +440,10 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, val = CONFIG_USB_GADGET_VBUS_DRAW; if (!val) return 0; - switch (speed) { - case USB_SPEED_SUPER: - return DIV_ROUND_UP(val, 8); - default: + if (speed < USB_SPEED_SUPER) return DIV_ROUND_UP(val, 2); - } + else + return DIV_ROUND_UP(val, 8); } static int config_buf(struct usb_configuration *config, -- GitLab From edd606c03aeaa06cff00ca9b80f6efdff71f2f6c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 26 Jan 2020 22:05:49 +0000 Subject: [PATCH 0276/1278] staging: rtl8723bs: fix copy of overlapping memory commit 8ae9a588ca35eb9c32dc03299c5e1f4a1e9a9617 upstream. Currently the rtw_sprintf prints the contents of thread_name onto thread_name and this can lead to a potential copy of a string over itself. Avoid this by printing the literal string RTWHALXT instread of the contents of thread_name. Addresses-Coverity: ("copy of overlapping memory") Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Colin Ian King Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20200126220549.9849-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index d0b317077511..f92f9073c507 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -486,14 +486,13 @@ int rtl8723bs_xmit_thread(void *context) s32 ret; struct adapter *padapter; struct xmit_priv *pxmitpriv; - u8 thread_name[20] = "RTWHALXT"; - + u8 thread_name[20]; ret = _SUCCESS; padapter = context; pxmitpriv = &padapter->xmitpriv; - rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter)); + rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter)); thread_enter(thread_name); DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter)); -- GitLab From f39b1f511ef3febde59f51a49e9f3fb58c06202a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Feb 2020 15:32:17 +0300 Subject: [PATCH 0277/1278] staging: greybus: use after free in gb_audio_manager_remove_all() commit b7db58105b80fa9232719c8329b995b3addfab55 upstream. When we call kobject_put() and it's the last reference to the kobject then it calls gb_audio_module_release() and frees module. We dereference "module" on the next line which is a use after free. Fixes: c77f85bbc91a ("greybus: audio: Fix incorrect counting of 'ida'") Signed-off-by: Dan Carpenter Acked-by: Viresh Kumar Reviewed-by: Vaibhav Agarwal Link: https://lore.kernel.org/r/20200205123217.jreendkyxulqsool@kili.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/audio_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c index aa6508b44fab..ed7c32542cb3 100644 --- a/drivers/staging/greybus/audio_manager.c +++ b/drivers/staging/greybus/audio_manager.c @@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void) list_for_each_entry_safe(module, next, &modules_list, list) { list_del(&module->list); - kobject_put(&module->kobj); ida_simple_remove(&module_id, module->id); + kobject_put(&module->kobj); } is_empty = list_empty(&modules_list); -- GitLab From 591f3bc646edf4622f86f9266e4e215bde32538b Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Fri, 14 Feb 2020 12:21:01 -0600 Subject: [PATCH 0278/1278] ecryptfs: replace BUG_ON with error handling code commit 2c2a7552dd6465e8fde6bc9cccf8d66ed1c1eb72 upstream. In crypt_scatterlist, if the crypt_stat argument is not set up correctly, the kernel crashes. Instead, by returning an error code upstream, the error is handled safely. The issue is detected via a static analysis tool written by us. Fixes: 237fead619984 (ecryptfs: fs/Makefile and fs/Kconfig) Signed-off-by: Aditya Pakki Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index bd25ab837011..eed38ae86c6c 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); + if (!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + return -EINVAL; + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); -- GitLab From 837f007098b2521b8b10ced2fdf07fd1d0af66f9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 10 Feb 2020 10:36:56 +0100 Subject: [PATCH 0279/1278] iommu/vt-d: Fix compile warning from intel-svm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7598fac323aad0e502415edeffd567315994dd6 upstream. The intel_svm_is_pasid_valid() needs to be marked inline, otherwise it causes the compile warning below: CC [M] drivers/dma/idxd/cdev.o In file included from drivers/dma/idxd/cdev.c:9:0: ./include/linux/intel-svm.h:125:12: warning: ‘intel_svm_is_pasid_valid’ defined but not used [-Wunused-function] static int intel_svm_is_pasid_valid(struct device *dev, int pasid) ^~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Borislav Petkov Fixes: 15060aba71711 ('iommu/vt-d: Helper function to query if a pasid has any active users') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e..733eaf95e207 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -130,7 +130,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } -- GitLab From d3daa3edcf879828fe6767f71b00fc44e24bdd6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 12 Feb 2020 12:19:41 +0100 Subject: [PATCH 0280/1278] genirq/proc: Reject invalid affinity masks (again) commit cba6437a1854fde5934098ec3bd0ee83af3129f5 upstream. Qian Cai reported that the WARN_ON() in the x86/msi affinity setting code, which catches cases where the affinity setting is not done on the CPU which is the current target of the interrupt, triggers during CPU hotplug stress testing. It turns out that the warning which was added with the commit addressing the MSI affinity race unearthed yet another long standing bug. If user space writes a bogus affinity mask, i.e. it contains no online CPUs, then it calls irq_select_affinity_usr(). This was introduced for ALPHA in eee45269b0f5 ("[PATCH] Alpha: convert to generic irq framework (generic part)") and subsequently made available for all architectures in 18404756765c ("genirq: Expose default irq affinity mask (take 3)") which introduced the circumvention of the affinity setting restrictions for interrupt which cannot be moved in process context. The whole exercise is bogus in various aspects: 1) If the interrupt is already started up then there is absolutely no point to honour a bogus interrupt affinity setting from user space. The interrupt is already assigned to an online CPU and it does not make any sense to reassign it to some other randomly chosen online CPU. 2) If the interupt is not yet started up then there is no point either. A subsequent startup of the interrupt will invoke irq_setup_affinity() anyway which will chose a valid target CPU. So the only correct solution is to just return -EINVAL in case user space wrote an affinity mask which does not contain any online CPUs, except for ALPHA which has it's own magic sauce for this. Fixes: 18404756765c ("genirq: Expose default irq affinity mask (take 3)") Reported-by: Qian Cai Signed-off-by: Thomas Gleixner Tested-by: Qian Cai Link: https://lkml.kernel.org/r/878sl8xdbm.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/irq/internals.h | 2 -- kernel/irq/manage.c | 18 ++---------------- kernel/irq/proc.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4ef7f3b820ce..5230c47fc43e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -119,8 +119,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq); - extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c86a3e45110..037e8fc1b008 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -382,23 +382,9 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index c010cc0daf79..b031db9d56c6 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -117,6 +117,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) return show_irq_affinity(AFFINITY_LIST, m); } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) -- GitLab From 720c4bc2245c6e48644d21e3c2a4773054758e96 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:16 +0100 Subject: [PATCH 0281/1278] ALSA: rawmidi: Avoid bit fields for state flags commit dfa9a5efe8b932a84b3b319250aa3ac60c20f876 upstream. The rawmidi state flags (opened, append, active_sensing) are stored in bit fields that can be potentially racy when concurrently accessed without any locks. Although the current code should be fine, there is also no any real benefit by keeping the bitfields for this kind of short number of members. This patch changes those bit fields flags to the simple bool fields. There should be no size increase of the snd_rawmidi_substream by this change. Reported-by: syzbot+576cc007eb9f2c968200@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-4-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/rawmidi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 6665cb29e1a2..c2a71fd8dfaf 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -92,9 +92,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; -- GitLab From 29238bccf63b8339a2b65bcbecb07c142f1d7073 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:14 +0100 Subject: [PATCH 0282/1278] ALSA: seq: Avoid concurrent access to queue flags commit bb51e669fa49feb5904f452b2991b240ef31bc97 upstream. The queue flags are represented in bit fields and the concurrent access may result in unexpected results. Although the current code should be mostly OK as it's only reading a field while writing other fields as KCSAN reported, it's safer to cover both with a proper spinlock protection. This patch fixes the possible concurrent read by protecting with q->owner_lock. Also the queue owner field is protected as well since it's the field to be protected by the lock itself. Reported-by: syzbot+65c6c92d04304d0a8efc@syzkaller.appspotmail.com Reported-by: syzbot+e60ddfa48717579799dd@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_queue.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 1a6dc4ff44a6..a9a0b2f2708e 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -415,6 +415,7 @@ int snd_seq_queue_check_access(int queueid, int client) int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); + unsigned long flags; if (q == NULL) return -EINVAL; @@ -424,8 +425,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked) return -EPERM; } + spin_lock_irqsave(&q->owner_lock, flags); q->locked = locked ? 1 : 0; q->owner = client; + spin_unlock_irqrestore(&q->owner_lock, flags); queue_access_unlock(q); queuefree(q); @@ -564,15 +567,17 @@ void snd_seq_queue_client_termination(int client) unsigned long flags; int i; struct snd_seq_queue *q; + bool matched; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) continue; spin_lock_irqsave(&q->owner_lock, flags); - if (q->owner == client) + matched = (q->owner == client); + if (matched) q->klocked = 1; spin_unlock_irqrestore(&q->owner_lock, flags); - if (q->owner == client) { + if (matched) { if (q->timer->running) snd_seq_timer_stop(q->timer); snd_seq_timer_reset(q->timer); @@ -764,6 +769,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; + bool locked; + int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) @@ -775,9 +782,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, else bpm = 0; + spin_lock_irq(&q->owner_lock); + locked = q->locked; + owner = q->owner; + spin_unlock_irq(&q->owner_lock); + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); - snd_iprintf(buffer, "owned by client : %d\n", q->owner); - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free"); + snd_iprintf(buffer, "owned by client : %d\n", owner); + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); -- GitLab From c33c14e30f3437d419761048f70dd88b7ec797c8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:15 +0100 Subject: [PATCH 0283/1278] ALSA: seq: Fix concurrent access to queue current tick/time commit dc7497795e014d84699c3b8809ed6df35352dd74 upstream. snd_seq_check_queue() passes the current tick and time of the given queue as a pointer to snd_seq_prioq_cell_out(), but those might be updated concurrently by the seq timer update. Fix it by retrieving the current tick and time via the proper helper functions at first, and pass those values to snd_seq_prioq_cell_out() later in the loops. snd_seq_timer_get_cur_time() takes a new argument and adjusts with the current system time only when it's requested so; this update isn't needed for snd_seq_check_queue(), as it's called either from the interrupt handler or right after queuing. Also, snd_seq_timer_get_cur_tick() is changed to read the value in the spinlock for the concurrency, too. Reported-by: syzbot+fd5e0eaa1a32999173b2@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 4 ++-- sound/core/seq/seq_queue.c | 9 ++++++--- sound/core/seq/seq_timer.c | 13 ++++++++++--- sound/core/seq/seq_timer.h | 3 ++- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 92b0d4523a07..6fe93d5f6f71 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event, event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { - event->time.time = snd_seq_timer_get_cur_time(q->timer); + event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); @@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; - status->time = snd_seq_timer_get_cur_time(tmr); + status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index a9a0b2f2708e..ea1aa0796276 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; + snd_seq_tick_time_t cur_tick; + snd_seq_real_time_t cur_time; if (q == NULL) return; @@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ + cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { - cell = snd_seq_prioq_cell_out(q->tickq, - &q->timer->tick.cur_tick); + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); } /* Process time queue... */ + cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 0e1feb597586..bd5e5a5d52a8 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr) } /* return current 'real' time. use timeofday() to get better granularity. */ -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime) { snd_seq_real_time_t cur_time; unsigned long flags; spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; - if (tmr->running) { + if (adjust_ktime && tmr->running) { struct timespec64 tm; ktime_get_ts64(&tm); @@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) high PPQ values) */ snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr) { - return tmr->tick.cur_tick; + snd_seq_tick_time_t cur_tick; + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); + cur_tick = tmr->tick.cur_tick; + spin_unlock_irqrestore(&tmr->lock, flags); + return cur_tick; } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 9506b661fe5b..5d47d559465e 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr); +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime); snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr); extern int seq_default_timer_class; -- GitLab From a86265edeb3314f9c3270a5bf18b4e72ebc65beb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 20:30:53 -0800 Subject: [PATCH 0284/1278] netfilter: xt_hashlimit: limit the max size of hashtable commit 8d0015a7ab76b8b1e89a3e5f5710a6e5103f2dd5 upstream. The user-specified hashtable size is unbound, this could easily lead to an OOM or a hung task as we hold the global mutex while allocating and initializing the new hashtable. Add a max value to cap both cfg->size and cfg->max, as suggested by Florian. Reported-and-tested-by: syzbot+adf6c6c2be1c3a718121@syzkaller.appspotmail.com Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_hashlimit.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index fe8e8a1622b5..186f97f1c6c0 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -845,6 +845,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -855,6 +857,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; -- GitLab From 8b6934200c2469c0726a709f93ad108573550c80 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Sat, 25 Jan 2020 03:37:29 +0000 Subject: [PATCH 0285/1278] ata: ahci: Add shutdown to freeze hardware resources of ahci commit 10a663a1b15134a5a714aa515e11425a44d4fdf7 upstream. device_shutdown() called from reboot or power_shutdown expect all devices to be shutdown. Same is true for even ahci pci driver. As no ahci shutdown function is implemented, the ata subsystem always remains alive with DMA & interrupt support. File system related calls should not be honored after device_shutdown(). So defining ahci pci driver shutdown to freeze hardware (mask interrupt, stop DMA engine and free DMA resources). Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 7 +++++++ drivers/ata/libata-core.c | 21 +++++++++++++++++++++ include/linux/libata.h | 1 + 3 files changed, 29 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index f003e301723a..0905c07b8c7e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -88,6 +88,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); +static void ahci_shutdown_one(struct pci_dev *dev); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -586,6 +587,7 @@ static struct pci_driver ahci_pci_driver = { .id_table = ahci_pci_tbl, .probe = ahci_init_one, .remove = ahci_remove_one, + .shutdown = ahci_shutdown_one, .driver = { .pm = &ahci_pci_pm_ops, }, @@ -1823,6 +1825,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void ahci_shutdown_one(struct pci_dev *pdev) +{ + ata_pci_shutdown_one(pdev); +} + static void ahci_remove_one(struct pci_dev *pdev) { pm_runtime_get_noresume(&pdev->dev); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 08f67c109429..33eb5e342a7a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6706,6 +6706,26 @@ void ata_pci_remove_one(struct pci_dev *pdev) ata_host_detach(host); } +void ata_pci_shutdown_one(struct pci_dev *pdev) +{ + struct ata_host *host = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ap->pflags |= ATA_PFLAG_FROZEN; + + /* Disable port interrupts */ + if (ap->ops->freeze) + ap->ops->freeze(ap); + + /* Stop the port DMA engines */ + if (ap->ops->port_stop) + ap->ops->port_stop(ap); + } +} + /* move to PCI subsystem */ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { @@ -7326,6 +7346,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); #ifdef CONFIG_PM EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); diff --git a/include/linux/libata.h b/include/linux/libata.h index c5188dc389c8..93838d98e3f3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1229,6 +1229,7 @@ struct pci_bits { }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); +extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM -- GitLab From f02017e021758c6dc3da591cac78ed512813dbb1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Feb 2020 18:30:26 +0100 Subject: [PATCH 0286/1278] xen: Enable interrupts when calling _cond_resched() commit 8645e56a4ad6dcbf504872db7f14a2f67db88ef2 upstream. xen_maybe_preempt_hcall() is called from the exception entry point xen_do_hypervisor_callback with interrupts disabled. _cond_resched() evades the might_sleep() check in cond_resched() which would have caught that and schedule_debug() unfortunately lacks a check for irqs_disabled(). Enable interrupts around the call and use cond_resched() to catch future issues. Fixes: fdfd811ddde3 ("x86/xen: allow privcmd hypercalls to be preempted") Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/878skypjrh.fsf@nanos.tec.linutronix.de Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- drivers/xen/preempt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index 08cb419eb4e6..5f6b77ea34fb 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void) * cpu. */ __this_cpu_write(xen_in_preemptible_hcall, false); - _cond_resched(); + local_irq_enable(); + cond_resched(); + local_irq_disable(); __this_cpu_write(xen_in_preemptible_hcall, true); } } -- GitLab From 2fc1b71474b926b1671c96265c6b9637f2b607f0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 13 Feb 2020 23:42:07 -0700 Subject: [PATCH 0287/1278] s390/mm: Explicitly compare PAGE_DEFAULT_KEY against zero in storage_key_init_range commit 380324734956c64cd060e1db4304f3117ac15809 upstream. Clang warns: In file included from ../arch/s390/purgatory/purgatory.c:10: In file included from ../include/linux/kexec.h:18: In file included from ../include/linux/crash_core.h:6: In file included from ../include/linux/elfcore.h:5: In file included from ../include/linux/user.h:1: In file included from ../arch/s390/include/asm/user.h:11: ../arch/s390/include/asm/page.h:45:6: warning: converting the result of '<<' to a boolean always evaluates to false [-Wtautological-constant-compare] if (PAGE_DEFAULT_KEY) ^ ../arch/s390/include/asm/page.h:23:44: note: expanded from macro 'PAGE_DEFAULT_KEY' #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) ^ 1 warning generated. Explicitly compare this against zero to silence the warning as it is intended to be used in a boolean context. Fixes: de3fa841e429 ("s390/mm: fix compile for PAGE_DEFAULT_KEY != 0") Link: https://github.com/ClangBuiltLinux/linux/issues/860 Link: https://lkml.kernel.org/r/20200214064207.10381-1-natechancellor@gmail.com Acked-by: Christian Borntraeger Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 779c589b7089..5f2e272895ff 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } -- GitLab From 78d697fc93f98054e36a3ab76dca1a88802ba7be Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 28 Feb 2020 16:36:17 +0100 Subject: [PATCH 0288/1278] Linux 4.14.172 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f2657f4838db..6d3cecad7f1e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 171 +SUBLEVEL = 172 EXTRAVERSION = NAME = Petit Gorille -- GitLab From b05c992acd41b01629d2035d54ef6d4ed6334e0f Mon Sep 17 00:00:00 2001 From: "Daniel J. Ogorchock" Date: Sun, 29 Dec 2019 19:27:09 -0600 Subject: [PATCH 0289/1278] FROMLIST: HID: nintendo: add nintendo switch controller driver The hid-nintendo driver supports the Nintendo Switch Pro Controllers and the Joy-Cons. The Pro Controllers can be used over USB or Bluetooth. The Joy-Cons each create their own, independent input devices, so it is up to userspace to combine them if desired. Signed-off-by: Daniel J. Ogorchock Test: tested via custom test app Test: atest NintendoSwitchProTest Bug: 135136477 Link: https://patchwork.kernel.org/patch/11312547/ Link: https://lore.kernel.org/linux-input/20191230012720.2368987-2-djogorchock@gmail.com/ Change-Id: I179da1092faedc2ad25336224cf5ec8ff00e0d3f Signed-off-by: Siarhei Vishniakou --- MAINTAINERS | 6 + drivers/hid/Kconfig | 11 + drivers/hid/Makefile | 1 + drivers/hid/hid-core.c | 10 + drivers/hid/hid-ids.h | 3 + drivers/hid/hid-nintendo.c | 820 +++++++++++++++++++++++++++++++++++++ 6 files changed, 851 insertions(+) create mode 100644 drivers/hid/hid-nintendo.c diff --git a/MAINTAINERS b/MAINTAINERS index 77c68f63e29f..621734fe3603 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9591,6 +9591,12 @@ S: Maintained F: Documentation/scsi/NinjaSCSI.txt F: drivers/scsi/nsp32* +NINTENDO HID DRIVER +M: Daniel J. Ogorchock +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/hid-nintendo* + NIOS2 ARCHITECTURE M: Ley Foon Tan L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index e51c529035cb..d9185240239a 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -609,6 +609,17 @@ config HID_MULTITOUCH To compile this driver as a module, choose M here: the module will be called hid-multitouch. +config HID_NINTENDO + tristate "Nintendo Joy-Con and Pro Controller support" + depends on HID + help + Adds support for the Nintendo Switch Joy-Cons and Pro Controller. + All controllers support bluetooth, and the Pro Controller also supports + its USB mode. + + To compile this driver as a module, choose M here: the + module will be called hid-nintendo. + config HID_NTI tristate "NTI keyboard adapters" ---help--- diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index e146c257285a..aa7497748bb1 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_HID_MAYFLASH) += hid-mf.o obj-$(CONFIG_HID_MICROSOFT) += hid-microsoft.o obj-$(CONFIG_HID_MONTEREY) += hid-monterey.o obj-$(CONFIG_HID_MULTITOUCH) += hid-multitouch.o +obj-$(CONFIG_HID_NINTENDO) += hid-nintendo.o obj-$(CONFIG_HID_NTI) += hid-nti.o obj-$(CONFIG_HID_NTRIG) += hid-ntrig.o obj-$(CONFIG_HID_ORTEK) += hid-ortek.o diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 58b73eb9de3a..69229225a190 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2259,6 +2259,16 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) }, #endif +#if IS_ENABLED(CONFIG_HID_NINTENDO) + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONL) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONR) }, +#endif #if IS_ENABLED(CONFIG_HID_NTI) { HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) }, #endif diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 245a0cff0b87..71342b5cb703 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -816,6 +816,9 @@ #define USB_VENDOR_ID_NINTENDO 0x057e #define USB_DEVICE_ID_NINTENDO_WIIMOTE 0x0306 #define USB_DEVICE_ID_NINTENDO_WIIMOTE2 0x0330 +#define USB_DEVICE_ID_NINTENDO_JOYCONL 0x2006 +#define USB_DEVICE_ID_NINTENDO_JOYCONR 0x2007 +#define USB_DEVICE_ID_NINTENDO_PROCON 0x2009 #define USB_VENDOR_ID_NOVATEK 0x0603 #define USB_DEVICE_ID_NOVATEK_PCT 0x0600 diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c new file mode 100644 index 000000000000..3695b96694bd --- /dev/null +++ b/drivers/hid/hid-nintendo.c @@ -0,0 +1,820 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HID driver for Nintendo Switch Joy-Cons and Pro Controllers + * + * Copyright (c) 2019 Daniel J. Ogorchock + * + * The following resources/projects were referenced for this driver: + * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering + * https://gitlab.com/pjranki/joycon-linux-kernel (Peter Rankin) + * https://github.com/FrotBot/SwitchProConLinuxUSB + * https://github.com/MTCKC/ProconXInput + * hid-wiimote kernel hid driver + * hid-logitech-hidpp driver + * + * This driver supports the Nintendo Switch Joy-Cons and Pro Controllers. The + * Pro Controllers can either be used over USB or Bluetooth. + * + * The driver will retrieve the factory calibration info from the controllers, + * so little to no user calibration should be required. + * + */ + +#include "hid-ids.h" +#include +#include +#include +#include +#include +#include + +/* + * Reference the url below for the following HID report defines: + * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering + */ + +/* Output Reports */ +static const u8 JC_OUTPUT_RUMBLE_AND_SUBCMD = 0x01; +static const u8 JC_OUTPUT_FW_UPDATE_PKT = 0x03; +static const u8 JC_OUTPUT_RUMBLE_ONLY = 0x10; +static const u8 JC_OUTPUT_MCU_DATA = 0x11; +static const u8 JC_OUTPUT_USB_CMD = 0x80; + +/* Subcommand IDs */ +static const u8 JC_SUBCMD_STATE /*= 0x00*/; +static const u8 JC_SUBCMD_MANUAL_BT_PAIRING = 0x01; +static const u8 JC_SUBCMD_REQ_DEV_INFO = 0x02; +static const u8 JC_SUBCMD_SET_REPORT_MODE = 0x03; +static const u8 JC_SUBCMD_TRIGGERS_ELAPSED = 0x04; +static const u8 JC_SUBCMD_GET_PAGE_LIST_STATE = 0x05; +static const u8 JC_SUBCMD_SET_HCI_STATE = 0x06; +static const u8 JC_SUBCMD_RESET_PAIRING_INFO = 0x07; +static const u8 JC_SUBCMD_LOW_POWER_MODE = 0x08; +static const u8 JC_SUBCMD_SPI_FLASH_READ = 0x10; +static const u8 JC_SUBCMD_SPI_FLASH_WRITE = 0x11; +static const u8 JC_SUBCMD_RESET_MCU = 0x20; +static const u8 JC_SUBCMD_SET_MCU_CONFIG = 0x21; +static const u8 JC_SUBCMD_SET_MCU_STATE = 0x22; +static const u8 JC_SUBCMD_SET_PLAYER_LIGHTS = 0x30; +static const u8 JC_SUBCMD_GET_PLAYER_LIGHTS = 0x31; +static const u8 JC_SUBCMD_SET_HOME_LIGHT = 0x38; +static const u8 JC_SUBCMD_ENABLE_IMU = 0x40; +static const u8 JC_SUBCMD_SET_IMU_SENSITIVITY = 0x41; +static const u8 JC_SUBCMD_WRITE_IMU_REG = 0x42; +static const u8 JC_SUBCMD_READ_IMU_REG = 0x43; +static const u8 JC_SUBCMD_ENABLE_VIBRATION = 0x48; +static const u8 JC_SUBCMD_GET_REGULATED_VOLTAGE = 0x50; + +/* Input Reports */ +static const u8 JC_INPUT_BUTTON_EVENT = 0x3F; +static const u8 JC_INPUT_SUBCMD_REPLY = 0x21; +static const u8 JC_INPUT_IMU_DATA = 0x30; +static const u8 JC_INPUT_MCU_DATA = 0x31; +static const u8 JC_INPUT_USB_RESPONSE = 0x81; + +/* Feature Reports */ +static const u8 JC_FEATURE_LAST_SUBCMD = 0x02; +static const u8 JC_FEATURE_OTA_FW_UPGRADE = 0x70; +static const u8 JC_FEATURE_SETUP_MEM_READ = 0x71; +static const u8 JC_FEATURE_MEM_READ = 0x72; +static const u8 JC_FEATURE_ERASE_MEM_SECTOR = 0x73; +static const u8 JC_FEATURE_MEM_WRITE = 0x74; +static const u8 JC_FEATURE_LAUNCH = 0x75; + +/* USB Commands */ +static const u8 JC_USB_CMD_CONN_STATUS = 0x01; +static const u8 JC_USB_CMD_HANDSHAKE = 0x02; +static const u8 JC_USB_CMD_BAUDRATE_3M = 0x03; +static const u8 JC_USB_CMD_NO_TIMEOUT = 0x04; +static const u8 JC_USB_CMD_EN_TIMEOUT = 0x05; +static const u8 JC_USB_RESET = 0x06; +static const u8 JC_USB_PRE_HANDSHAKE = 0x91; +static const u8 JC_USB_SEND_UART = 0x92; + +/* SPI storage addresses of factory calibration data */ +static const u16 JC_CAL_DATA_START = 0x603d; +static const u16 JC_CAL_DATA_END = 0x604e; +#define JC_CAL_DATA_SIZE (JC_CAL_DATA_END - JC_CAL_DATA_START + 1) + + +/* The raw analog joystick values will be mapped in terms of this magnitude */ +static const u16 JC_MAX_STICK_MAG = 32767; +static const u16 JC_STICK_FUZZ = 250; +static const u16 JC_STICK_FLAT = 500; + +/* States for controller state machine */ +enum joycon_ctlr_state { + JOYCON_CTLR_STATE_INIT, + JOYCON_CTLR_STATE_READ, +}; + +struct joycon_stick_cal { + s32 max; + s32 min; + s32 center; +}; + +/* + * All the controller's button values are stored in a u32. + * They can be accessed with bitwise ANDs. + */ +static const u32 JC_BTN_Y = BIT(0); +static const u32 JC_BTN_X = BIT(1); +static const u32 JC_BTN_B = BIT(2); +static const u32 JC_BTN_A = BIT(3); +static const u32 JC_BTN_SR_R = BIT(4); +static const u32 JC_BTN_SL_R = BIT(5); +static const u32 JC_BTN_R = BIT(6); +static const u32 JC_BTN_ZR = BIT(7); +static const u32 JC_BTN_MINUS = BIT(8); +static const u32 JC_BTN_PLUS = BIT(9); +static const u32 JC_BTN_RSTICK = BIT(10); +static const u32 JC_BTN_LSTICK = BIT(11); +static const u32 JC_BTN_HOME = BIT(12); +static const u32 JC_BTN_CAP = BIT(13); /* capture button */ +static const u32 JC_BTN_DOWN = BIT(16); +static const u32 JC_BTN_UP = BIT(17); +static const u32 JC_BTN_RIGHT = BIT(18); +static const u32 JC_BTN_LEFT = BIT(19); +static const u32 JC_BTN_SR_L = BIT(20); +static const u32 JC_BTN_SL_L = BIT(21); +static const u32 JC_BTN_L = BIT(22); +static const u32 JC_BTN_ZL = BIT(23); + +enum joycon_msg_type { + JOYCON_MSG_TYPE_NONE, + JOYCON_MSG_TYPE_USB, + JOYCON_MSG_TYPE_SUBCMD, +}; + +struct joycon_subcmd_request { + u8 output_id; /* must be 0x01 for subcommand, 0x10 for rumble only */ + u8 packet_num; /* incremented every send */ + u8 rumble_data[8]; + u8 subcmd_id; + u8 data[0]; /* length depends on the subcommand */ +} __packed; + +struct joycon_subcmd_reply { + u8 ack; /* MSB 1 for ACK, 0 for NACK */ + u8 id; /* id of requested subcmd */ + u8 data[0]; /* will be at most 35 bytes */ +} __packed; + +struct joycon_input_report { + u8 id; + u8 timer; + u8 bat_con; /* battery and connection info */ + u8 button_status[3]; + u8 left_stick[3]; + u8 right_stick[3]; + u8 vibrator_report; + + /* + * If support for firmware updates, gyroscope data, and/or NFC/IR + * are added in the future, this can be swapped for a union. + */ + struct joycon_subcmd_reply reply; +} __packed; + +#define JC_MAX_RESP_SIZE (sizeof(struct joycon_input_report) + 35) + +/* Each physical controller is associated with a joycon_ctlr struct */ +struct joycon_ctlr { + struct hid_device *hdev; + struct input_dev *input; + enum joycon_ctlr_state ctlr_state; + + /* The following members are used for synchronous sends/receives */ + enum joycon_msg_type msg_type; + u8 subcmd_num; + struct mutex output_mutex; + u8 input_buf[JC_MAX_RESP_SIZE]; + wait_queue_head_t wait; + bool received_resp; + u8 usb_ack_match; + u8 subcmd_ack_match; + + /* factory calibration data */ + struct joycon_stick_cal left_stick_cal_x; + struct joycon_stick_cal left_stick_cal_y; + struct joycon_stick_cal right_stick_cal_x; + struct joycon_stick_cal right_stick_cal_y; + +}; + +static int __joycon_hid_send(struct hid_device *hdev, u8 *data, size_t len) +{ + u8 *buf; + int ret; + + buf = kmemdup(data, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = hid_hw_output_report(hdev, buf, len); + kfree(buf); + if (ret < 0) + hid_dbg(hdev, "Failed to send output report ret=%d\n", ret); + return ret; +} + +static int joycon_hid_send_sync(struct joycon_ctlr *ctlr, u8 *data, size_t len) +{ + int ret; + + ret = __joycon_hid_send(ctlr->hdev, data, len); + if (ret < 0) { + memset(ctlr->input_buf, 0, JC_MAX_RESP_SIZE); + return ret; + } + + if (!wait_event_timeout(ctlr->wait, ctlr->received_resp, HZ)) { + hid_dbg(ctlr->hdev, "synchronous send/receive timed out\n"); + memset(ctlr->input_buf, 0, JC_MAX_RESP_SIZE); + return -ETIMEDOUT; + } + + ctlr->received_resp = false; + return 0; +} + +static int joycon_send_usb(struct joycon_ctlr *ctlr, u8 cmd) +{ + int ret; + u8 buf[2] = {JC_OUTPUT_USB_CMD}; + + buf[1] = cmd; + ctlr->usb_ack_match = cmd; + ctlr->msg_type = JOYCON_MSG_TYPE_USB; + ret = joycon_hid_send_sync(ctlr, buf, sizeof(buf)); + if (ret) + hid_dbg(ctlr->hdev, "send usb command failed; ret=%d\n", ret); + return ret; +} + +static int joycon_send_subcmd(struct joycon_ctlr *ctlr, + struct joycon_subcmd_request *subcmd, + size_t data_len) +{ + int ret; + + subcmd->output_id = JC_OUTPUT_RUMBLE_AND_SUBCMD; + subcmd->packet_num = ctlr->subcmd_num; + if (++ctlr->subcmd_num > 0xF) + ctlr->subcmd_num = 0; + ctlr->subcmd_ack_match = subcmd->subcmd_id; + ctlr->msg_type = JOYCON_MSG_TYPE_SUBCMD; + + ret = joycon_hid_send_sync(ctlr, (u8 *)subcmd, + sizeof(*subcmd) + data_len); + if (ret < 0) + hid_dbg(ctlr->hdev, "send subcommand failed; ret=%d\n", ret); + else + ret = 0; + return ret; +} + +/* Supply nibbles for flash and on. Ones correspond to active */ +static int joycon_set_player_leds(struct joycon_ctlr *ctlr, u8 flash, u8 on) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 1] = { 0 }; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SET_PLAYER_LIGHTS; + req->data[0] = (flash << 4) | on; + + hid_dbg(ctlr->hdev, "setting player leds\n"); + return joycon_send_subcmd(ctlr, req, 1); +} + +static const u16 DFLT_STICK_CAL_CEN = 2000; +static const u16 DFLT_STICK_CAL_MAX = 3500; +static const u16 DFLT_STICK_CAL_MIN = 500; +static int joycon_request_calibration(struct joycon_ctlr *ctlr) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 5] = { 0 }; + struct joycon_input_report *report; + struct joycon_stick_cal *cal_x; + struct joycon_stick_cal *cal_y; + s32 x_max_above; + s32 x_min_below; + s32 y_max_above; + s32 y_min_below; + u8 *data; + u8 *raw_cal; + int ret; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SPI_FLASH_READ; + data = req->data; + data[0] = 0xFF & JC_CAL_DATA_START; + data[1] = 0xFF & (JC_CAL_DATA_START >> 8); + data[2] = 0xFF & (JC_CAL_DATA_START >> 16); + data[3] = 0xFF & (JC_CAL_DATA_START >> 24); + data[4] = JC_CAL_DATA_SIZE; + + hid_dbg(ctlr->hdev, "requesting cal data\n"); + ret = joycon_send_subcmd(ctlr, req, 5); + if (ret) { + hid_warn(ctlr->hdev, + "Failed to read stick cal, using defaults; ret=%d\n", + ret); + + ctlr->left_stick_cal_x.center = DFLT_STICK_CAL_CEN; + ctlr->left_stick_cal_x.max = DFLT_STICK_CAL_MAX; + ctlr->left_stick_cal_x.min = DFLT_STICK_CAL_MIN; + + ctlr->left_stick_cal_y.center = DFLT_STICK_CAL_CEN; + ctlr->left_stick_cal_y.max = DFLT_STICK_CAL_MAX; + ctlr->left_stick_cal_y.min = DFLT_STICK_CAL_MIN; + + ctlr->right_stick_cal_x.center = DFLT_STICK_CAL_CEN; + ctlr->right_stick_cal_x.max = DFLT_STICK_CAL_MAX; + ctlr->right_stick_cal_x.min = DFLT_STICK_CAL_MIN; + + ctlr->right_stick_cal_y.center = DFLT_STICK_CAL_CEN; + ctlr->right_stick_cal_y.max = DFLT_STICK_CAL_MAX; + ctlr->right_stick_cal_y.min = DFLT_STICK_CAL_MIN; + + return ret; + } + + report = (struct joycon_input_report *)ctlr->input_buf; + raw_cal = &report->reply.data[5]; + + /* left stick calibration parsing */ + cal_x = &ctlr->left_stick_cal_x; + cal_y = &ctlr->left_stick_cal_y; + + x_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 0), 0, 12); + y_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 1), 4, 12); + cal_x->center = hid_field_extract(ctlr->hdev, (raw_cal + 3), 0, 12); + cal_y->center = hid_field_extract(ctlr->hdev, (raw_cal + 4), 4, 12); + x_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 6), 0, 12); + y_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 7), 4, 12); + cal_x->max = cal_x->center + x_max_above; + cal_x->min = cal_x->center - x_min_below; + cal_y->max = cal_y->center + y_max_above; + cal_y->min = cal_y->center - y_min_below; + + /* right stick calibration parsing */ + raw_cal += 9; + cal_x = &ctlr->right_stick_cal_x; + cal_y = &ctlr->right_stick_cal_y; + + cal_x->center = hid_field_extract(ctlr->hdev, (raw_cal + 0), 0, 12); + cal_y->center = hid_field_extract(ctlr->hdev, (raw_cal + 1), 4, 12); + x_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 3), 0, 12); + y_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 4), 4, 12); + x_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 6), 0, 12); + y_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 7), 4, 12); + cal_x->max = cal_x->center + x_max_above; + cal_x->min = cal_x->center - x_min_below; + cal_y->max = cal_y->center + y_max_above; + cal_y->min = cal_y->center - y_min_below; + + hid_dbg(ctlr->hdev, "calibration:\n" + "l_x_c=%d l_x_max=%d l_x_min=%d\n" + "l_y_c=%d l_y_max=%d l_y_min=%d\n" + "r_x_c=%d r_x_max=%d r_x_min=%d\n" + "r_y_c=%d r_y_max=%d r_y_min=%d\n", + ctlr->left_stick_cal_x.center, + ctlr->left_stick_cal_x.max, + ctlr->left_stick_cal_x.min, + ctlr->left_stick_cal_y.center, + ctlr->left_stick_cal_y.max, + ctlr->left_stick_cal_y.min, + ctlr->right_stick_cal_x.center, + ctlr->right_stick_cal_x.max, + ctlr->right_stick_cal_x.min, + ctlr->right_stick_cal_y.center, + ctlr->right_stick_cal_y.max, + ctlr->right_stick_cal_y.min); + + return 0; +} + +static int joycon_set_report_mode(struct joycon_ctlr *ctlr) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 1] = { 0 }; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SET_REPORT_MODE; + req->data[0] = 0x30; /* standard, full report mode */ + + hid_dbg(ctlr->hdev, "setting controller report mode\n"); + return joycon_send_subcmd(ctlr, req, 1); +} + +static s32 joycon_map_stick_val(struct joycon_stick_cal *cal, s32 val) +{ + s32 center = cal->center; + s32 min = cal->min; + s32 max = cal->max; + s32 new_val; + + if (val > center) { + new_val = (val - center) * JC_MAX_STICK_MAG; + new_val /= (max - center); + } else { + new_val = (center - val) * -JC_MAX_STICK_MAG; + new_val /= (center - min); + } + new_val = clamp(new_val, (s32)-JC_MAX_STICK_MAG, (s32)JC_MAX_STICK_MAG); + return new_val; +} + +static void joycon_parse_report(struct joycon_ctlr *ctlr, + struct joycon_input_report *rep) +{ + struct input_dev *dev = ctlr->input; + u32 btns; + u32 id = ctlr->hdev->product; + + btns = hid_field_extract(ctlr->hdev, rep->button_status, 0, 24); + + if (id != USB_DEVICE_ID_NINTENDO_JOYCONR) { + u16 raw_x; + u16 raw_y; + s32 x; + s32 y; + + /* get raw stick values */ + raw_x = hid_field_extract(ctlr->hdev, rep->left_stick, 0, 12); + raw_y = hid_field_extract(ctlr->hdev, + rep->left_stick + 1, 4, 12); + /* map the stick values */ + x = joycon_map_stick_val(&ctlr->left_stick_cal_x, raw_x); + y = -joycon_map_stick_val(&ctlr->left_stick_cal_y, raw_y); + /* report sticks */ + input_report_abs(dev, ABS_X, x); + input_report_abs(dev, ABS_Y, y); + + /* report buttons */ + input_report_key(dev, BTN_TL, btns & JC_BTN_L); + input_report_key(dev, BTN_TL2, btns & JC_BTN_ZL); + if (id != USB_DEVICE_ID_NINTENDO_PROCON) { + /* Report the S buttons as the non-existent triggers */ + input_report_key(dev, BTN_TR, btns & JC_BTN_SL_L); + input_report_key(dev, BTN_TR2, btns & JC_BTN_SR_L); + } + input_report_key(dev, BTN_SELECT, btns & JC_BTN_MINUS); + input_report_key(dev, BTN_THUMBL, btns & JC_BTN_LSTICK); + input_report_key(dev, BTN_Z, btns & JC_BTN_CAP); + input_report_key(dev, BTN_DPAD_DOWN, btns & JC_BTN_DOWN); + input_report_key(dev, BTN_DPAD_UP, btns & JC_BTN_UP); + input_report_key(dev, BTN_DPAD_RIGHT, btns & JC_BTN_RIGHT); + input_report_key(dev, BTN_DPAD_LEFT, btns & JC_BTN_LEFT); + } + if (id != USB_DEVICE_ID_NINTENDO_JOYCONL) { + u16 raw_x; + u16 raw_y; + s32 x; + s32 y; + + /* get raw stick values */ + raw_x = hid_field_extract(ctlr->hdev, rep->right_stick, 0, 12); + raw_y = hid_field_extract(ctlr->hdev, + rep->right_stick + 1, 4, 12); + /* map stick values */ + x = joycon_map_stick_val(&ctlr->right_stick_cal_x, raw_x); + y = -joycon_map_stick_val(&ctlr->right_stick_cal_y, raw_y); + /* report sticks */ + input_report_abs(dev, ABS_RX, x); + input_report_abs(dev, ABS_RY, y); + + /* report buttons */ + input_report_key(dev, BTN_TR, btns & JC_BTN_R); + input_report_key(dev, BTN_TR2, btns & JC_BTN_ZR); + if (id != USB_DEVICE_ID_NINTENDO_PROCON) { + /* Report the S buttons as the non-existent triggers */ + input_report_key(dev, BTN_TL, btns & JC_BTN_SL_R); + input_report_key(dev, BTN_TL2, btns & JC_BTN_SR_R); + } + input_report_key(dev, BTN_START, btns & JC_BTN_PLUS); + input_report_key(dev, BTN_THUMBR, btns & JC_BTN_RSTICK); + input_report_key(dev, BTN_MODE, btns & JC_BTN_HOME); + input_report_key(dev, BTN_WEST, btns & JC_BTN_Y); + input_report_key(dev, BTN_NORTH, btns & JC_BTN_X); + input_report_key(dev, BTN_EAST, btns & JC_BTN_A); + input_report_key(dev, BTN_SOUTH, btns & JC_BTN_B); + } + + input_sync(dev); +} + + +static const unsigned int joycon_button_inputs_l[] = { + BTN_SELECT, BTN_Z, BTN_THUMBL, + BTN_DPAD_UP, BTN_DPAD_DOWN, BTN_DPAD_LEFT, BTN_DPAD_RIGHT, + BTN_TL, BTN_TL2, + 0 /* 0 signals end of array */ +}; + +static const unsigned int joycon_button_inputs_r[] = { + BTN_START, BTN_MODE, BTN_THUMBR, + BTN_SOUTH, BTN_EAST, BTN_NORTH, BTN_WEST, + BTN_TR, BTN_TR2, + 0 /* 0 signals end of array */ +}; + +static DEFINE_MUTEX(joycon_input_num_mutex); +static int joycon_input_create(struct joycon_ctlr *ctlr) +{ + struct hid_device *hdev; + static int input_num = 1; + const char *name; + int ret; + int i; + + hdev = ctlr->hdev; + + switch (hdev->product) { + case USB_DEVICE_ID_NINTENDO_PROCON: + name = "Nintendo Switch Pro Controller"; + break; + case USB_DEVICE_ID_NINTENDO_JOYCONL: + name = "Nintendo Switch Left Joy-Con"; + break; + case USB_DEVICE_ID_NINTENDO_JOYCONR: + name = "Nintendo Switch Right Joy-Con"; + break; + default: /* Should be impossible */ + hid_err(hdev, "Invalid hid product\n"); + return -EINVAL; + } + + ctlr->input = devm_input_allocate_device(&hdev->dev); + if (!ctlr->input) + return -ENOMEM; + ctlr->input->id.bustype = hdev->bus; + ctlr->input->id.vendor = hdev->vendor; + ctlr->input->id.product = hdev->product; + ctlr->input->id.version = hdev->version; + ctlr->input->name = name; + input_set_drvdata(ctlr->input, ctlr); + + + /* set up sticks */ + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONR) { + input_set_abs_params(ctlr->input, ABS_X, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + input_set_abs_params(ctlr->input, ABS_Y, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + } + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONL) { + input_set_abs_params(ctlr->input, ABS_RX, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + input_set_abs_params(ctlr->input, ABS_RY, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + } + + /* set up buttons */ + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONR) { + for (i = 0; joycon_button_inputs_l[i] > 0; i++) + input_set_capability(ctlr->input, EV_KEY, + joycon_button_inputs_l[i]); + } + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONL) { + for (i = 0; joycon_button_inputs_r[i] > 0; i++) + input_set_capability(ctlr->input, EV_KEY, + joycon_button_inputs_r[i]); + } + + ret = input_register_device(ctlr->input); + if (ret) + return ret; + + /* Set the default controller player leds based on controller number */ + mutex_lock(&joycon_input_num_mutex); + mutex_lock(&ctlr->output_mutex); + ret = joycon_set_player_leds(ctlr, 0, 0xF >> (4 - input_num)); + if (ret) + hid_warn(ctlr->hdev, "Failed to set leds; ret=%d\n", ret); + mutex_unlock(&ctlr->output_mutex); + if (++input_num > 4) + input_num = 1; + mutex_unlock(&joycon_input_num_mutex); + + return 0; +} + +/* Common handler for parsing inputs */ +static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data, + int size) +{ + int ret = 0; + + if (data[0] == JC_INPUT_SUBCMD_REPLY || data[0] == JC_INPUT_IMU_DATA || + data[0] == JC_INPUT_MCU_DATA) { + if (size >= 12) /* make sure it contains the input report */ + joycon_parse_report(ctlr, + (struct joycon_input_report *)data); + } + + return ret; +} + +static int joycon_ctlr_handle_event(struct joycon_ctlr *ctlr, u8 *data, + int size) +{ + int ret = 0; + bool match = false; + struct joycon_input_report *report; + + if (unlikely(mutex_is_locked(&ctlr->output_mutex)) && + ctlr->msg_type != JOYCON_MSG_TYPE_NONE) { + switch (ctlr->msg_type) { + case JOYCON_MSG_TYPE_USB: + if (size < 2) + break; + if (data[0] == JC_INPUT_USB_RESPONSE && + data[1] == ctlr->usb_ack_match) + match = true; + break; + case JOYCON_MSG_TYPE_SUBCMD: + if (size < sizeof(struct joycon_input_report) || + data[0] != JC_INPUT_SUBCMD_REPLY) + break; + report = (struct joycon_input_report *)data; + if (report->reply.id == ctlr->subcmd_ack_match) + match = true; + break; + default: + break; + } + + if (match) { + memcpy(ctlr->input_buf, data, + min(size, (int)JC_MAX_RESP_SIZE)); + ctlr->msg_type = JOYCON_MSG_TYPE_NONE; + ctlr->received_resp = true; + wake_up(&ctlr->wait); + + /* This message has been handled */ + return 1; + } + } + + if (ctlr->ctlr_state == JOYCON_CTLR_STATE_READ) + ret = joycon_ctlr_read_handler(ctlr, data, size); + + return ret; +} + +static int nintendo_hid_event(struct hid_device *hdev, + struct hid_report *report, u8 *raw_data, int size) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + + if (size < 1) + return -EINVAL; + + return joycon_ctlr_handle_event(ctlr, raw_data, size); +} + +static int nintendo_hid_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + struct joycon_ctlr *ctlr; + + hid_dbg(hdev, "probe - start\n"); + + ctlr = devm_kzalloc(&hdev->dev, sizeof(*ctlr), GFP_KERNEL); + if (!ctlr) { + ret = -ENOMEM; + goto err; + } + + ctlr->hdev = hdev; + ctlr->ctlr_state = JOYCON_CTLR_STATE_INIT; + hid_set_drvdata(hdev, ctlr); + mutex_init(&ctlr->output_mutex); + init_waitqueue_head(&ctlr->wait); + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "HID parse failed\n"); + goto err; + } + + ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); + if (ret) { + hid_err(hdev, "HW start failed\n"); + goto err; + } + + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, "cannot start hardware I/O\n"); + goto err_stop; + } + + hid_device_io_start(hdev); + + /* Initialize the controller */ + mutex_lock(&ctlr->output_mutex); + /* if handshake command fails, assume ble pro controller */ + if (hdev->product == USB_DEVICE_ID_NINTENDO_PROCON && + !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE)) { + hid_dbg(hdev, "detected USB controller\n"); + /* set baudrate for improved latency */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M); + if (ret) { + hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); + goto err_mutex; + } + /* handshake */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE); + if (ret) { + hid_err(hdev, "Failed handshake; ret=%d\n", ret); + goto err_mutex; + } + /* + * Set no timeout (to keep controller in USB mode). + * This doesn't send a response, so ignore the timeout. + */ + joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT); + } + + /* get controller calibration data, and parse it */ + ret = joycon_request_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Analog stick positions may be inaccurate\n"); + } + + /* Set the reporting mode to 0x30, which is the full report mode */ + ret = joycon_set_report_mode(ctlr); + if (ret) { + hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); + goto err_mutex; + } + + mutex_unlock(&ctlr->output_mutex); + + ret = joycon_input_create(ctlr); + if (ret) { + hid_err(hdev, "Failed to create input device; ret=%d\n", ret); + goto err_close; + } + + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; + + hid_dbg(hdev, "probe - success\n"); + return 0; + +err_mutex: + mutex_unlock(&ctlr->output_mutex); +err_close: + hid_hw_close(hdev); +err_stop: + hid_hw_stop(hdev); +err: + hid_err(hdev, "probe - fail = %d\n", ret); + return ret; +} + +static void nintendo_hid_remove(struct hid_device *hdev) +{ + hid_dbg(hdev, "remove\n"); + hid_hw_close(hdev); + hid_hw_stop(hdev); +} + +static const struct hid_device_id nintendo_hid_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONL) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONR) }, + { } +}; +MODULE_DEVICE_TABLE(hid, nintendo_hid_devices); + +static struct hid_driver nintendo_hid_driver = { + .name = "nintendo", + .id_table = nintendo_hid_devices, + .probe = nintendo_hid_probe, + .remove = nintendo_hid_remove, + .raw_event = nintendo_hid_event, +}; +module_hid_driver(nintendo_hid_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel J. Ogorchock "); +MODULE_DESCRIPTION("Driver for Nintendo Switch Controllers"); -- GitLab From 8c0adf270ac072b49b147babfa937ef6dfd40ad4 Mon Sep 17 00:00:00 2001 From: Siarhei Vishniakou Date: Tue, 17 Dec 2019 10:07:39 -0800 Subject: [PATCH 0290/1278] ANDROID: Enable HID_NINTENDO as y This config will enable the Nintendo Switch Pro controller driver. Change-Id: I50645a611566928e20a1afd4024f71803ed5fefa Signed-off-by: Siarhei Vishniakou Bug: 135136477 Test: tested via custom test app Test: atest NintendoSwitchProTest --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 8b5ebe05fe9f..4dd213de909e 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -361,6 +361,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 43c457eb9446..644a3391c246 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -385,6 +385,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y -- GitLab From 25a911618ee5643f63c81fe69f9b67abd50c33f1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 20 Feb 2020 17:50:12 -0800 Subject: [PATCH 0291/1278] ANDROID: cuttlefish_defconfig: Disable CONFIG_RT_GROUP_SCHED Disable CONFIG_RT_GROUP_SCHED to control RT cpu allowance globally. Bug: 149954332 Change-Id: I9487bd113502e52f19637e43109433cb13e97a23 Signed-off-by: Suren Baghdasaryan --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 4dd213de909e..70ee6a310ed6 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -12,7 +12,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_IKHEADERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 644a3391c246..a9979a58d769 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y -- GitLab From 1cb89b322d016b6a227f38947369475fe2359b9d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 2 Mar 2020 09:30:14 +0100 Subject: [PATCH 0292/1278] ANDROID: fix build issue in security/selinux/avc.c This came up due to interaction with the backport of commit 6b6bc6205d98 ("selinux: wrap AVC state") and 4.14.172 causing a build error. Bug: 140252993 Signed-off-by: Greg Kroah-Hartman Change-Id: I894f4e8144c1ad28fbbea114ff830a730497b05d --- security/selinux/avc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index ce5465bb79e3..634b75e60669 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -895,7 +895,7 @@ static int avc_update_node(struct selinux_avc *avc, if (orig->ae.xp_node) { rc = avc_xperms_populate(node, orig->ae.xp_node); if (rc) { - avc_node_kill(node); + avc_node_kill(avc, node); goto out_unlock; } } -- GitLab From a13dee535e7c54f8e0ffcd934432eeb9e19f180d Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 2 Jul 2018 16:03:46 -0700 Subject: [PATCH 0293/1278] BACKPORT: loop: Add LOOP_SET_BLOCK_SIZE in compat ioctl This change adds LOOP_SET_BLOCK_SIZE as one of the supported ioctls in lo_compat_ioctl. It only takes an unsigned long argument, and in practice a 32-bit value works fine. Reviewed-by: Omar Sandoval Signed-off-by: Evan Green Signed-off-by: Jens Axboe (cherry picked from commit 9fea4b395260175de4016b42982f45a3e6e03d0b) [adelva: trivially backported around another backport conflict] Bug: 150622092 Change-Id: I98e14d3ab60ca638c3aebcea942e2207c738af95 Signed-off-by: Alistair Delva --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 453e3728e657..ca912eecc74e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1615,6 +1615,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_BLOCK_SIZE: case LOOP_SET_DIRECT_IO: err = lo_ioctl(bdev, mode, cmd, arg); break; -- GitLab From e5f689a1e13ab2f99e1488180d2d8099a9246582 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Wed, 19 Feb 2020 10:16:33 +0900 Subject: [PATCH 0294/1278] FROMLIST: ufs: fix a bug on printing PRDT In some architectures, an unit of PRDTO and PRDTL in UFSHCI spec assume bytes, not double word specified in the spec. W/o this patch, when the driver executes this, kernel panic occurres because of abnormal accesses. Bug: 149797634 Link: https://lore.kernel.org/linux-scsi/20200218224307.8017-1-kwmad.kim@samsung.com/ Signed-off-by: Kiwoong Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from android-mainline commit 8ec7bddd873f393ea94a3bc9dde9781e5e0fbfe1) Change-Id: I58ffa07535df8011b8d357135b80030833e725f9 Signed-off-by: Eric Biggers --- drivers/scsi/ufs/ufshcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index d360d0669527..11ae1e689f41 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -403,8 +403,11 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) ufshcd_hex_dump("UPIU RSP: ", lrbp->ucd_rsp_ptr, sizeof(struct utp_upiu_rsp)); - prdt_length = le16_to_cpu( - lrbp->utr_descriptor_ptr->prd_table_length); + prdt_length = + le16_to_cpu(lrbp->utr_descriptor_ptr->prd_table_length); + if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) + prdt_length /= sizeof(struct ufshcd_sg_entry); + dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries phys@0x%llx\n", tag, prdt_length, -- GitLab From 167db78a71a2518f53eb17871876393d704f3572 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Feb 2020 14:37:12 -0800 Subject: [PATCH 0295/1278] ANDROID: scsi: ufs: allow ufs variants to override sg entry size Modify the UFSHCD core to allow 'struct ufshcd_sg_entry' to be variable-length. The default is the standard length, but variants can override ufs_hba::sg_entry_size with a larger value if there are vendor-specific fields following the standard ones. This is needed to support inline encryption with ufs-exynos (FMP). Bug: 129991660 Signed-off-by: Eric Biggers (cherry picked from android-mainline commit 8de80df7d7e407369d1f8c2971daf29348d1a643) (resolved trivial merge conflict in ufshcd_alloc_host()) Change-Id: I6ab9458d5c23331013e6b736d6fea378a6b5b86c Signed-off-by: Eric Biggers --- drivers/scsi/ufs/ufshcd.c | 32 +++++++++++++++++--------------- drivers/scsi/ufs/ufshcd.h | 2 ++ drivers/scsi/ufs/ufshci.h | 12 ++++++++++-- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 11ae1e689f41..74be1791e135 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -406,7 +406,7 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) prdt_length = le16_to_cpu(lrbp->utr_descriptor_ptr->prd_table_length); if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) - prdt_length /= sizeof(struct ufshcd_sg_entry); + prdt_length /= hba->sg_entry_size; dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries phys@0x%llx\n", @@ -415,7 +415,7 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) if (pr_prdt) ufshcd_hex_dump("UPIU PRDT: ", lrbp->ucd_prdt_ptr, - sizeof(struct ufshcd_sg_entry) * prdt_length); + hba->sg_entry_size * prdt_length); } } @@ -1982,7 +1982,7 @@ ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) */ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) { - struct ufshcd_sg_entry *prd_table; + struct ufshcd_sg_entry *prd; struct scatterlist *sg; struct scsi_cmnd *cmd; int sg_segments; @@ -1997,21 +1997,22 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16)(sg_segments * - sizeof(struct ufshcd_sg_entry))); + hba->sg_entry_size)); else lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16) (sg_segments)); - prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; + prd = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; scsi_for_each_sg(cmd, sg, sg_segments, i) { - prd_table[i].size = + prd->size = cpu_to_le32(((u32) sg_dma_len(sg))-1); - prd_table[i].base_addr = + prd->base_addr = cpu_to_le32(lower_32_bits(sg->dma_address)); - prd_table[i].upper_addr = + prd->upper_addr = cpu_to_le32(upper_32_bits(sg->dma_address)); - prd_table[i].reserved = 0; + prd->reserved = 0; + prd = (void *)prd + hba->sg_entry_size; } } else { lrbp->utr_descriptor_ptr->prd_table_length = 0; @@ -3247,7 +3248,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) size_t utmrdl_size, utrdl_size, ucdl_size; /* Allocate memory for UTP command descriptors */ - ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); + ucdl_size = (sizeof_utp_transfer_cmd_desc(hba) * hba->nutrs); hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev, ucdl_size, &hba->ucdl_dma_addr, @@ -3343,7 +3344,7 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table); - cmd_desc_size = sizeof(struct utp_transfer_cmd_desc); + cmd_desc_size = sizeof_utp_transfer_cmd_desc(hba); cmd_desc_dma_addr = hba->ucdl_dma_addr; for (i = 0; i < hba->nutrs; i++) { @@ -3375,17 +3376,17 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) hba->lrb[i].utr_descriptor_ptr = (utrdlp + i); hba->lrb[i].utrd_dma_addr = hba->utrdl_dma_addr + (i * sizeof(struct utp_transfer_req_desc)); - hba->lrb[i].ucd_req_ptr = - (struct utp_upiu_req *)(cmd_descp + i); + hba->lrb[i].ucd_req_ptr = (struct utp_upiu_req *)cmd_descp; hba->lrb[i].ucd_req_dma_addr = cmd_desc_element_addr; hba->lrb[i].ucd_rsp_ptr = - (struct utp_upiu_rsp *)cmd_descp[i].response_upiu; + (struct utp_upiu_rsp *)cmd_descp->response_upiu; hba->lrb[i].ucd_rsp_dma_addr = cmd_desc_element_addr + response_offset; hba->lrb[i].ucd_prdt_ptr = - (struct ufshcd_sg_entry *)cmd_descp[i].prd_table; + (struct ufshcd_sg_entry *)cmd_descp->prd_table; hba->lrb[i].ucd_prdt_dma_addr = cmd_desc_element_addr + prdt_offset; + cmd_descp = (void *)cmd_descp + cmd_desc_size; } } @@ -7921,6 +7922,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) hba->host = host; hba->dev = dev; *hba_handle = hba; + hba->sg_entry_size = sizeof(struct ufshcd_sg_entry); INIT_LIST_HEAD(&hba->clk_list_head); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index a755ad430a4f..73124e85dada 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -499,6 +499,7 @@ struct ufs_stats { * @ufs_version: UFS Version to which controller complies * @vops: pointer to variant specific operations * @priv: pointer to variant specific private data + * @sg_entry_size: size of struct ufshcd_sg_entry (may include variant fields) * @irq: Irq number of the controller * @active_uic_cmd: handle of active UIC command * @uic_cmd_mutex: mutex for uic command @@ -581,6 +582,7 @@ struct ufs_hba { struct ufs_hba_variant_ops *vops; void *priv; const struct ufs_hba_crypto_variant_ops *crypto_vops; + size_t sg_entry_size; unsigned int irq; bool is_irq_enabled; diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 97006b324ee7..8a032c557df9 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -417,20 +417,28 @@ struct ufshcd_sg_entry { __le32 upper_addr; __le32 reserved; __le32 size; + /* + * followed by variant-specific fields if + * hba->sg_entry_size != sizeof(struct ufshcd_sg_entry) + */ }; /** * struct utp_transfer_cmd_desc - UFS Command Descriptor structure * @command_upiu: Command UPIU Frame address * @response_upiu: Response UPIU Frame address - * @prd_table: Physical Region Descriptor + * @prd_table: Physical Region Descriptor: an array of SG_ALL struct + * ufshcd_sg_entry's. Variant-specific fields may be present after each. */ struct utp_transfer_cmd_desc { u8 command_upiu[ALIGNED_UPIU_SIZE]; u8 response_upiu[ALIGNED_UPIU_SIZE]; - struct ufshcd_sg_entry prd_table[SG_ALL]; + u8 prd_table[]; }; +#define sizeof_utp_transfer_cmd_desc(hba) \ + (sizeof(struct utp_transfer_cmd_desc) + SG_ALL * (hba)->sg_entry_size) + /** * struct request_desc_header - Descriptor Header common to both UTRD and UTMRD * @dword0: Descriptor Header DW0 -- GitLab From a58373148e201645e9b23631845c695b9a66ce58 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 4 Mar 2020 13:58:07 -0800 Subject: [PATCH 0296/1278] ANDROID: cuttlefish: disable KPROBES Security teams from Google and Qualcomm request disabling due to "large attack vector". Bug: 149659981 Signed-off-by: Todd Kjos Change-Id: Iee4ace633f28cf4359cb0495be59546a8d3951cd --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 70ee6a310ed6..bd0df26e4416 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -38,7 +38,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB_MERGE_DEFAULT is not set CONFIG_PROFILING=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_LTO_CLANG=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index a9979a58d769..ad10c87900ca 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -38,7 +38,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_REFCOUNT_FULL=y -- GitLab From a69ee7eebc234606238164f4f952b612c331507d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 14:02:32 -0800 Subject: [PATCH 0297/1278] FROMLIST: lib: test_stackinit.c: XFAIL switch variable init tests The tests for initializing a variable defined between a switch statement's test and its first "case" statement are currently not initialized in Clang[1] nor the proposed auto-initialization feature in GCC. We should retain the test (so that we can evaluate compiler fixes), but mark it as an "expected fail". The rest of the kernel source will be adjusted to avoid this corner case. Also disable -Wswitch-unreachable for the test so that the intentionally broken code won't trigger warnings for GCC (nor future Clang) when initialization happens this unhandled place. [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Suggested-by: Alexander Potapenko Signed-off-by: Kees Cook [adelva: cherry picking to avoid boot test flakes] Bug: 144999193 Link: https://lore.kernel.org/lkml/202002191358.2897A07C6@keescook/ Change-Id: I0e691f2299ab42526ea306a92551a1188c469136 Signed-off-by: Alistair Delva --- lib/Makefile | 1 + lib/test_stackinit.c | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index d200f404946b..d3ab213bc1f0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -78,6 +78,7 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index b7e586d559ee..c589bfa120f3 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -92,8 +92,9 @@ static bool range_contains(char *haystack_start, size_t haystack_size, * @var_type: type to be tested for zeroing initialization * @which: is this a SCALAR, STRING, or STRUCT type? * @init_level: what kind of initialization is performed + * @xfail: is this test expected to fail? */ -#define DEFINE_TEST_DRIVER(name, var_type, which) \ +#define DEFINE_TEST_DRIVER(name, var_type, which, xfail) \ /* Returns 0 on success, 1 on failure. */ \ static noinline __init int test_ ## name (void) \ { \ @@ -139,13 +140,14 @@ static noinline __init int test_ ## name (void) \ for (sum = 0, i = 0; i < target_size; i++) \ sum += (check_buf[i] == 0xFF); \ \ - if (sum == 0) \ + if (sum == 0) { \ pr_info(#name " ok\n"); \ - else \ - pr_warn(#name " FAIL (uninit bytes: %d)\n", \ - sum); \ - \ - return (sum != 0); \ + return 0; \ + } else { \ + pr_warn(#name " %sFAIL (uninit bytes: %d)\n", \ + (xfail) ? "X" : "", sum); \ + return (xfail) ? 0 : 1; \ + } \ } #define DEFINE_TEST(name, var_type, which, init_level) \ /* no-op to force compiler into ignoring "uninitialized" vars */\ @@ -189,7 +191,7 @@ static noinline __init int leaf_ ## name(unsigned long sp, \ \ return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ } \ -DEFINE_TEST_DRIVER(name, var_type, which) +DEFINE_TEST_DRIVER(name, var_type, which, 0) /* Structure with no padding. */ struct test_packed { @@ -327,8 +329,14 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, return __leaf_switch_none(2, fill); } -DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR); -DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR); +/* + * These are expected to fail for most configurations because neither + * GCC nor Clang have a way to perform initialization of variables in + * non-code areas (i.e. in a switch statement before the first "case"). + * https://bugs.llvm.org/show_bug.cgi?id=44916 + */ +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, 1); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, 1); static int __init test_stackinit_init(void) { -- GitLab From 0e97eac233791a20b588ba35b53073b9a915cc7d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 21 Feb 2020 19:01:24 +0100 Subject: [PATCH 0298/1278] UPSTREAM: binder: prevent UAF for binderfs devices On binder_release(), binder_defer_work(proc, BINDER_DEFERRED_RELEASE) is called which punts the actual cleanup operation to a workqueue. At some point, binder_deferred_func() will be called which will end up calling binder_deferred_release() which will retrieve and cleanup the binder_context attach to this struct binder_proc. If we trace back where this binder_context is attached to binder_proc we see that it is set in binder_open() and is taken from the struct binder_device it is associated with. This obviously assumes that the struct binder_device that context is attached to is _never_ freed. While that might be true for devtmpfs binder devices it is most certainly wrong for binderfs binder devices. So, assume binder_open() is called on a binderfs binder devices. We now stash away the struct binder_context associated with that struct binder_devices: proc->context = &binder_dev->context; /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { . . . proc->context = &binder_dev->context; Now let's assume that the binderfs instance for that binder devices is shutdown via umount() and/or the mount namespace associated with it goes away. As long as there is still an fd open for that binderfs binder device things are fine. But let's assume we now close the last fd for that binderfs binder device. Now binder_release() is called and punts to the workqueue. Assume that the workqueue has quite a bit of stuff to do and doesn't get to cleaning up the struct binder_proc and the associated struct binder_context with it for that binderfs binder device right away. In the meantime, the VFS is killing the super block and is ultimately calling sb->evict_inode() which means it will call binderfs_evict_inode() which does: static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); kfree(device->context.name); kfree(device); } thereby freeing the struct binder_device including struct binder_context. Now the workqueue finally has time to get around to cleaning up struct binder_proc and is now trying to access the associate struct binder_context. Since it's already freed it will OOPs. Fix this by holding an additional reference to the inode that is only released once the workqueue is done cleaning up struct binder_proc. This is an easy alternative to introducing separate refcounting on struct binder_device which we can always do later if it becomes necessary. This is an alternative fix to 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()"). Fixes: 3ad20fe393b3 ("binder: implement binderfs") Fixes: 03e2e07e3814 ("binder: Make transaction_log available in binderfs") Related: 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2669b8b0c798fbe1a31d49e07aa33233d469ad9b) Signed-off-by: Greg Kroah-Hartman Change-Id: I047a1e360b4146872bbc1d206dce7a864bb4588b --- drivers/android/binder.c | 5 ++++- drivers/android/binder_internal.h | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 920b1ca35bf0..f4b32cd8b274 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5185,7 +5185,7 @@ static int binder_open(struct inode *nodp, struct file *filp) /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { - binder_dev = nodp->i_private; + binder_dev = binderfs_device_get(nodp->i_private); info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { @@ -5369,6 +5369,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; + struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -5450,6 +5451,8 @@ static void binder_deferred_release(struct binder_proc *proc) outgoing_refs, active_transactions); binder_proc_dec_tmpref(proc); + device = container_of(proc->context, struct binder_device, context); + binderfs_device_put(device); } static void binder_deferred_func(struct work_struct *work) diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index bd47f7f72075..8d0f73c39116 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -35,6 +35,19 @@ struct binder_device { struct inode *binderfs_inode; }; +static inline struct binder_device *binderfs_device_get(struct binder_device *dev) +{ + if (dev->binderfs_inode) + ihold(dev->binderfs_inode); + return dev; +} + +static inline void binderfs_device_put(struct binder_device *dev) +{ + if (dev->binderfs_inode) + iput(dev->binderfs_inode); +} + /** * binderfs_mount_opts - mount options for binderfs * @max: maximum number of allocatable binderfs binder devices -- GitLab From bbfd216527f774dbb1a1084d163af97050962fca Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Mar 2020 17:43:40 +0100 Subject: [PATCH 0299/1278] UPSTREAM: binder: prevent UAF for binderfs devices II This is a necessary follow up to the first fix I proposed and we merged in 2669b8b0c79 ("binder: prevent UAF for binderfs devices"). I have been overly optimistic that the simple fix I proposed would work. But alas, ihold() + iput() won't work since the inodes won't survive the destruction of the superblock. So all we get with my prior fix is a different race with a tinier race-window but it doesn't solve the issue. Fwiw, the problem lies with generic_shutdown_super(). It even has this cozy Al-style comment: if (!list_empty(&sb->s_inodes)) { printk("VFS: Busy inodes after unmount of %s. " "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } On binder_release(), binder_defer_work(proc, BINDER_DEFERRED_RELEASE) is called which punts the actual cleanup operation to a workqueue. At some point, binder_deferred_func() will be called which will end up calling binder_deferred_release() which will retrieve and cleanup the binder_context attach to this struct binder_proc. If we trace back where this binder_context is attached to binder_proc we see that it is set in binder_open() and is taken from the struct binder_device it is associated with. This obviously assumes that the struct binder_device that context is attached to is _never_ freed. While that might be true for devtmpfs binder devices it is most certainly wrong for binderfs binder devices. So, assume binder_open() is called on a binderfs binder devices. We now stash away the struct binder_context associated with that struct binder_devices: proc->context = &binder_dev->context; /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { . . . proc->context = &binder_dev->context; Now let's assume that the binderfs instance for that binder devices is shutdown via umount() and/or the mount namespace associated with it goes away. As long as there is still an fd open for that binderfs binder device things are fine. But let's assume we now close the last fd for that binderfs binder device. Now binder_release() is called and punts to the workqueue. Assume that the workqueue has quite a bit of stuff to do and doesn't get to cleaning up the struct binder_proc and the associated struct binder_context with it for that binderfs binder device right away. In the meantime, the VFS is killing the super block and is ultimately calling sb->evict_inode() which means it will call binderfs_evict_inode() which does: static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); kfree(device->context.name); kfree(device); } thereby freeing the struct binder_device including struct binder_context. Now the workqueue finally has time to get around to cleaning up struct binder_proc and is now trying to access the associate struct binder_context. Since it's already freed it will OOPs. Fix this by introducing a refounct on binder devices. This is an alternative fix to 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()"). Fixes: 3ad20fe393b3 ("binder: implement binderfs") Fixes: 2669b8b0c798 ("binder: prevent UAF for binderfs devices") Fixes: 03e2e07e3814 ("binder: Make transaction_log available in binderfs") Related: 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20200303164340.670054-1-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f0fe2c0f050d31babcad7d65f1d550d462a40064) Signed-off-by: Greg Kroah-Hartman Change-Id: I54a6c910002bf1077ba0c34c48fb96f4ffbf012e Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 12 +++++++++--- drivers/android/binder_internal.h | 15 ++------------- drivers/android/binderfs.c | 7 +++++-- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f4b32cd8b274..7bd038edc1f7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5185,13 +5185,14 @@ static int binder_open(struct inode *nodp, struct file *filp) /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { - binder_dev = binderfs_device_get(nodp->i_private); + binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { binder_dev = container_of(filp->private_data, struct binder_device, miscdev); } + refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -5388,6 +5389,12 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(context->name); + kfree(device); + } + proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we @@ -5451,8 +5458,6 @@ static void binder_deferred_release(struct binder_proc *proc) outgoing_refs, active_transactions); binder_proc_dec_tmpref(proc); - device = container_of(proc->context, struct binder_device, context); - binderfs_device_put(device); } static void binder_deferred_func(struct work_struct *work) @@ -6061,6 +6066,7 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; + refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 8d0f73c39116..8d0bffcc9e27 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -33,21 +34,9 @@ struct binder_device { struct miscdevice miscdev; struct binder_context context; struct inode *binderfs_inode; + refcount_t ref; }; -static inline struct binder_device *binderfs_device_get(struct binder_device *dev) -{ - if (dev->binderfs_inode) - ihold(dev->binderfs_inode); - return dev; -} - -static inline void binderfs_device_put(struct binder_device *dev) -{ - if (dev->binderfs_inode) - iput(dev->binderfs_inode); -} - /** * binderfs_mount_opts - mount options for binderfs * @max: maximum number of allocatable binderfs binder devices diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index a4f73af4fa4e..b0bad544052f 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -154,6 +154,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, if (!name) goto err; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -257,8 +258,10 @@ static void binderfs_evict_inode(struct inode *inode) ida_remove(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); - kfree(device->context.name); - kfree(device); + if (refcount_dec_and_test(&device->ref)) { + kfree(device->context.name); + kfree(device); + } } /** -- GitLab From 4583acebefaf9923617bfa7c9741db69d8ea6a24 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 9 Mar 2020 14:22:07 -0700 Subject: [PATCH 0300/1278] FROMLIST: f2fs: fix wrong check on F2FS_IOC_FSSETXATTR This fixes the incorrect failure when enabling project quota on casefold-enabled file. Cc: Daniel Rosenberg Cc: kernel-team@android.com Signed-off-by: Jaegeuk Kim Link: https://lore.kernel.org/linux-f2fs-devel/20200307002440.GA7944@google.com/ Signed-off-by: Daniel Rosenberg Change-Id: Ib2f8db5e20aabf5a4e6e9ebbc071166e8bd3220f --- fs/f2fs/file.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f5724c62334f..f77fa8430d6f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1801,12 +1801,15 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); + u32 masked_flags = fi->i_flags & mask; + + f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_empty_dir(inode)) @@ -1820,9 +1823,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -EINVAL; } - if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { + if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { if (S_ISREG(inode->i_mode) && - (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || + (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || F2FS_HAS_BLOCKS(inode))) return -EINVAL; if (iflags & F2FS_NOCOMP_FL) @@ -1839,8 +1842,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) set_compress_context(inode); } } - if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { - if (fi->i_flags & F2FS_COMPR_FL) + if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { + if (masked_flags & F2FS_COMPR_FL) return -EINVAL; } -- GitLab From 864ef06aaef5b164669a5292eb822c454c46dd69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 29 Feb 2020 04:54:53 +0530 Subject: [PATCH 0301/1278] iwlwifi: pcie: fix rb_allocator workqueue allocation commit 8188a18ee2e48c9a7461139838048363bfce3fef upstream We don't handle failures in the rb_allocator workqueue allocation correctly. To fix that, move the code earlier so the cleanup is easier and we don't have to undo all the interrupt allocations in this case. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho [Ajay: Modified to apply on v4.19.y and v4.14.y] Signed-off-by: Ajay Kaher Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index dffa697d71e0..8a074a516fb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3023,6 +3023,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3195,10 +3204,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CONFIG_IWLWIFI_PCIE_RTPM trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3; #else @@ -3211,6 +3216,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } -- GitLab From da79135a05ca0b601cf4ce485cf7c2b37636955a Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Fri, 28 Feb 2020 00:57:38 +0000 Subject: [PATCH 0302/1278] netfilter: nf_conntrack: resolve clash for matching conntracks [ Upstream commit ed07d9a021df6da53456663a76999189badc432a ] This patch enables the clash resolution for NAT (disabled in "590b52e10d41") if clashing conntracks match (i.e. both tuples are equal) and a protocol allows it. The clash might happen for a connections-less protocol (e.g. UDP) when two threads in parallel writes to the same socket and consequent calls to "get_unique_tuple" return the same tuples (incl. reply tuples). In this case it is safe to perform the resolution, as the losing CT describes the same mangling as the winning CT, so no modifications to the packet are needed, and the result of rules traversal for the loser's packet stays valid. Signed-off-by: Martynas Pumputis Signed-off-by: Pablo Neira Ayuso Signed-off-by: Andy Strohman Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e65271bed01..a79f5a89cab1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -543,6 +543,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +static inline bool +nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) +{ + return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, + &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && + net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); +} + /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { @@ -736,19 +748,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && - ((ct->status & IPS_NAT_DONE_MASK) == 0) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - enum ip_conntrack_info oldinfo; - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); - - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, oldinfo); - return NF_ACCEPT; + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, oldinfo); + return NF_ACCEPT; + } + nf_ct_put(ct); } NF_CT_STAT_INC(net, drop); return NF_DROP; -- GitLab From 2afeb56881da66c09e8d2b5f6965eabeb75ce834 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 28 Feb 2020 16:48:15 -0800 Subject: [PATCH 0303/1278] ext4: fix potential race between online resizing and write operations commit 1d0c3924a92e69bfa91163bda83c12a994b4d106 upstream. During an online resize an array of pointers to buffer heads gets replaced so it can get enlarged. If there is a racing block allocation or deallocation which uses the old array, and the old array has gotten reused this can lead to a GPF or some other random kernel memory getting modified. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu Reported-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.14.x Signed-off-by: Sasha Levin --- fs/ext4/balloc.c | 14 +++++++++--- fs/ext4/ext4.h | 20 +++++++++++++++++- fs/ext4/resize.c | 55 ++++++++++++++++++++++++++++++++++++++---------- fs/ext4/super.c | 31 +++++++++++++++++++-------- 4 files changed, 96 insertions(+), 24 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 70266a3355dc..fb38f20f869e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,6 +280,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -290,7 +291,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -298,10 +306,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b162f602c430..94f4f6d55c1a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1382,7 +1382,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1556,6 +1556,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + /* * Inode dynamic state flags */ @@ -2569,6 +2586,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4f7cd78d0364..16e3830da548 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -17,6 +17,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -545,8 +572,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -854,13 +881,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -904,9 +933,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); @@ -917,9 +948,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1183,7 +1214,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1265,7 +1297,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1492,7 +1524,8 @@ static int ext4_flex_group_add(struct super_block *sb, for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 09b443709bca..b14a0c5638e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -900,6 +900,7 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; int aborted = 0; int i, err; @@ -931,9 +932,12 @@ static void ext4_put_super(struct super_block *sb) if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -3489,7 +3493,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ext4_fsblk_t block; @@ -4104,9 +4108,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = kvmalloc(db_count * + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -4122,14 +4127,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4521,9 +4531,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -- GitLab From 4d830e847378b37294652da7dd4746019991f37a Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 28 Feb 2020 16:48:17 -0800 Subject: [PATCH 0304/1278] ext4: fix potential race between s_flex_groups online resizing and access commit 7c990728b99ed6fbe9c75fc202fce1172d9916da upstream. During an online resize an array of s_flex_groups structures gets replaced so it can get enlarged. If there is a concurrent access to the array and this memory has been reused then this can lead to an invalid memory access. The s_flex_group array has been converted into an array of pointers rather than an array of structures. This is to ensure that the information contained in the structures cannot get out of sync during a resize due to an accessor updating the value in the old structure after it has been copied but before the array pointer is updated. Since the structures them- selves are no longer copied but only the pointers to them this case is mitigated. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-4-tytso@mit.edu Signed-off-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.14.x Signed-off-by: Sasha Levin --- fs/ext4/ext4.h | 2 +- fs/ext4/ialloc.c | 23 +++++++++------ fs/ext4/mballoc.c | 9 ++++-- fs/ext4/resize.c | 7 +++-- fs/ext4/super.c | 72 ++++++++++++++++++++++++++++++++--------------- 5 files changed, 76 insertions(+), 37 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 94f4f6d55c1a..8b55abdd7249 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1492,7 +1492,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2f46564d3fca..2a480c0ef1bc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -333,11 +333,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -378,12 +380,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -1062,7 +1065,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1085,7 +1089,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3ba9a4ae4eac..fb865216edb9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3052,7 +3052,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4947,7 +4948,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) @@ -5092,7 +5094,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 16e3830da548..d42f7471fd5b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1425,11 +1425,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b14a0c5638e7..f1c1c180d267 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -901,6 +901,7 @@ static void ext4_put_super(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -937,8 +938,13 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(group_desc[i]); kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } rcu_read_unlock(); - kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -2231,8 +2237,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i; if (!sbi->s_log_groups_per_flex) return 0; @@ -2241,22 +2247,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (i--; i >= sbi->s_flex_groups_allocated; i--) + kvfree(new_groups[i]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2264,6 +2285,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2281,12 +2303,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -3496,6 +3517,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -4494,8 +4516,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); -- GitLab From 453cc452833f4225443689e89e0500c419697b9c Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 18 Feb 2020 19:08:50 -0800 Subject: [PATCH 0305/1278] ext4: fix potential race between s_group_info online resizing and access [ Upstream commit df3da4ea5a0fc5d115c90d5aa6caa4dd433750a7 ] During an online resize an array of pointers to s_group_info gets replaced so it can get enlarged. If there is a concurrent access to the array in ext4_get_group_info() and this memory has been reused then this can lead to an invalid memory access. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-3-tytso@mit.edu Signed-off-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Reviewed-by: Balbir Singh Cc: stable@kernel.org Signed-off-by: Sasha Levin --- fs/ext4/ext4.h | 8 ++++---- fs/ext4/mballoc.c | 52 +++++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8b55abdd7249..4aa0f8f7d9a0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1442,7 +1442,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -2832,13 +2832,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fb865216edb9..745a89d30a57 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2389,7 +2389,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2402,13 +2402,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2420,6 +2423,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2438,12 +2442,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2491,8 +2495,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2505,6 +2514,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2539,11 +2549,16 @@ static int ext4_mb_init_backend(struct super_block *sb) while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2733,7 +2748,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2751,9 +2766,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); -- GitLab From 45004f2ef0837d7bc1098ee12cc88f4e94c598e6 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 23 Dec 2019 10:42:19 -0600 Subject: [PATCH 0306/1278] ipmi:ssif: Handle a possible NULL pointer reference [ Upstream commit 6b8526d3abc02c08a2f888e8c20b7ac9e5776dfe ] In error cases a NULL can be passed to memcpy. The length will always be zero, so it doesn't really matter, but go ahead and check for NULL, anyway, to be more precise and avoid static analysis errors. Reported-by: kbuild test robot Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 941bffd9b49c..0146bc3252c5 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -750,10 +750,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); msg = ssif_info->curr_msg; if (msg) { + if (data) { + if (len > IPMI_MAX_MSG_LENGTH) + len = IPMI_MAX_MSG_LENGTH; + memcpy(msg->rsp, data, len); + } else { + len = 0; + } msg->rsp_size = len; - if (msg->rsp_size > IPMI_MAX_MSG_LENGTH) - msg->rsp_size = IPMI_MAX_MSG_LENGTH; - memcpy(msg->rsp, data, msg->rsp_size); ssif_info->curr_msg = NULL; } -- GitLab From 313810964cacf555c88c77b47f837d5cf232992b Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 21 Jan 2020 11:18:48 -0800 Subject: [PATCH 0307/1278] drm/msm: Set dma maximum segment size for mdss [ Upstream commit db735fc4036bbe1fbe606819b5f0ff26cc76cdff ] Turning on CONFIG_DMA_API_DEBUG_SG results in the following error: [ 12.078665] msm ae00000.mdss: DMA-API: mapping sg segment longer than device claims to support [len=3526656] [max=65536] [ 12.089870] WARNING: CPU: 6 PID: 334 at /mnt/host/source/src/third_party/kernel/v4.19/kernel/dma/debug.c:1301 debug_dma_map_sg+0x1dc/0x318 [ 12.102655] Modules linked in: joydev [ 12.106442] CPU: 6 PID: 334 Comm: frecon Not tainted 4.19.0 #2 [ 12.112450] Hardware name: Google Cheza (rev3+) (DT) [ 12.117566] pstate: 60400009 (nZCv daif +PAN -UAO) [ 12.122506] pc : debug_dma_map_sg+0x1dc/0x318 [ 12.126995] lr : debug_dma_map_sg+0x1dc/0x318 [ 12.131487] sp : ffffff800cc3ba80 [ 12.134913] x29: ffffff800cc3ba80 x28: 0000000000000000 [ 12.140395] x27: 0000000000000004 x26: 0000000000000004 [ 12.145868] x25: ffffff8008e55b18 x24: 0000000000000000 [ 12.151337] x23: 00000000ffffffff x22: ffffff800921c000 [ 12.156809] x21: ffffffc0fa75b080 x20: ffffffc0f7195090 [ 12.162280] x19: ffffffc0f1c53280 x18: 0000000000000000 [ 12.167749] x17: 0000000000000000 x16: 0000000000000000 [ 12.173218] x15: 0000000000000000 x14: 0720072007200720 [ 12.178689] x13: 0720072007200720 x12: 0720072007200720 [ 12.184161] x11: 0720072007200720 x10: 0720072007200720 [ 12.189641] x9 : ffffffc0f1fc6b60 x8 : 0000000000000000 [ 12.195110] x7 : ffffff8008132ce0 x6 : 0000000000000000 [ 12.200585] x5 : 0000000000000000 x4 : ffffff8008134734 [ 12.206058] x3 : ffffff800cc3b830 x2 : ffffffc0f1fc6240 [ 12.211532] x1 : 25045a74f48a7400 x0 : 25045a74f48a7400 [ 12.217006] Call trace: [ 12.219535] debug_dma_map_sg+0x1dc/0x318 [ 12.223671] get_pages+0x19c/0x20c [ 12.227177] msm_gem_fault+0x64/0xfc [ 12.230874] __do_fault+0x3c/0x140 [ 12.234383] __handle_mm_fault+0x70c/0xdb8 [ 12.238603] handle_mm_fault+0xac/0xc4 [ 12.242473] do_page_fault+0x1bc/0x3d4 [ 12.246342] do_translation_fault+0x54/0x88 [ 12.250652] do_mem_abort+0x60/0xf0 [ 12.254250] el0_da+0x20/0x24 [ 12.257317] irq event stamp: 67260 [ 12.260828] hardirqs last enabled at (67259): [] console_unlock+0x214/0x608 [ 12.269693] hardirqs last disabled at (67260): [] do_debug_exception+0x5c/0x178 [ 12.278820] softirqs last enabled at (67256): [] __do_softirq+0x4d4/0x520 [ 12.287510] softirqs last disabled at (67249): [] irq_exit+0xa8/0x100 [ 12.295742] ---[ end trace e63cfc40c313ffab ]--- The root of the problem is that the default segment size for sgt is (UINT_MAX & PAGE_MASK), and the default segment size for device dma is 64K. As such, if you compare the 2, you would deduce that the sg segment will overflow the device's capacity. In reality, the hardware can accommodate the larger sg segments, it's just not initializing its max segment properly. This patch initializes the max segment size for the mdss device, which gets rid of that pesky warning. Reported-by: Stephen Boyd Tested-by: Stephen Boyd Tested-by: Sai Prakash Ranjan Reviewed-by: Rob Clark Signed-off-by: Sean Paul Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20200121111813.REPOST.1.I92c66a35fb13f368095b05287bdabdbe88ca6922@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 77c45a2ebd83..d9c0687435a0 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -420,6 +420,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) if (ret) goto fail; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + msm_gem_shrinker_init(ddev); switch (get_mdp_ver(pdev)) { -- GitLab From de020833f8f4bd95a4e73ef59a7b01ce3c78447b Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 5 Feb 2020 14:15:58 -0500 Subject: [PATCH 0308/1278] dax: pass NOWAIT flag to iomap_apply [ Upstream commit 96222d53842dfe54869ec4e1b9d4856daf9105a2 ] fstests generic/471 reports a failure when run with MOUNT_OPTIONS="-o dax". The reason is that the initial pwrite to an empty file with the RWF_NOWAIT flag set does not return -EAGAIN. It turns out that dax_iomap_rw doesn't pass that flag through to iomap_apply. With this patch applied, generic/471 passes for me. Signed-off-by: Jeff Moyer Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/x49r1z86e1d.fsf@segfault.boston.devel.redhat.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- fs/dax.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index ddb4981ae32e..34a55754164f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1057,6 +1057,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); -- GitLab From 518b947b807c6a1c34c07a90840ae91915628992 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:12:58 +0200 Subject: [PATCH 0309/1278] mac80211: consider more elements in parsing CRC [ Upstream commit a04564c99bb4a92f805a58e56b2d22cc4978f152 ] We only use the parsing CRC for checking if a beacon changed, and elements with an ID > 63 cannot be represented in the filter. Thus, like we did before with WMM and Cisco vendor elements, just statically add these forgotten items to the CRC: - WLAN_EID_VHT_OPERATION - WLAN_EID_OPMODE_NOTIF I guess that in most cases when VHT/HE operation change, the HT operation also changed, and so the change was picked up, but we did notice that pure operating mode notification changes were ignored. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-22-luca@coelho.fi [restrict to VHT for the mac80211 branch] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/util.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 81f120466c38..cd3cdd1a0b57 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -944,16 +944,22 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: - if (elen >= sizeof(struct ieee80211_vht_operation)) + if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: - if (elen > 0) + if (elen > 0) { elems->opmode_notif = pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; -- GitLab From 6306a605b36139c883900548f189fc04a4a1615c Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 3 Feb 2020 10:56:50 +0000 Subject: [PATCH 0310/1278] cfg80211: check wiphy driver existence for drvinfo report [ Upstream commit bfb7bac3a8f47100ebe7961bd14e924c96e21ca7 ] When preparing ethtool drvinfo, check if wiphy driver is defined before dereferencing it. Driver may not exist, e.g. if wiphy is attached to a virtual platform device. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200203105644.28875-1-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index a9c0f368db5d..24e18405cdb4 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -7,9 +7,13 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct device *pdev = wiphy_dev(wdev->wiphy); - strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, - sizeof(info->driver)); + if (pdev->driver) + strlcpy(info->driver, pdev->driver->name, + sizeof(info->driver)); + else + strlcpy(info->driver, "N/A", sizeof(info->driver)); strlcpy(info->version, init_utsname()->release, sizeof(info->version)); -- GitLab From 83b3ac0eebb521124a097a613df7fe50efcb8f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 8 Feb 2020 15:50:36 +0100 Subject: [PATCH 0311/1278] qmi_wwan: re-add DW5821e pre-production variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 88bf54603f6f2c137dfee1abf6436ceac3528d2d ] Commit f25e1392fdb5 removed the support for the pre-production variant of the Dell DW5821e to avoid probing another USB interface unnecessarily. However, the pre-production samples are found in the wild, and this lack of support is causing problems for users of such samples. It is therefore necessary to support both variants. Matching on both interfaces 0 and 1 is not expected to cause any problem with either variant, as only the QMI function will be probed successfully on either. Interface 1 will be rejected based on the HID class for the production variant: T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option And interface 0 will be rejected based on too few endpoints for the pre-production variant: T: Bus=01 Lev=02 Prnt=02 Port=03 Cnt=03 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev= 3.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver= I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option Fixes: f25e1392fdb5 ("qmi_wwan: fix interface number for DW5821e production firmware") Link: https://whrl.pl/Rf0vNk Reported-by: Lars Melin Cc: Aleksander Morgado Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index db70d4c5778a..08215a9f6145 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1287,6 +1287,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ -- GitLab From c4806d0e9b56b3836478f60a2acca971028c4684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 8 Feb 2020 16:55:04 +0100 Subject: [PATCH 0312/1278] qmi_wwan: unconditionally reject 2 ep interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 00516d13d4cfa56ce39da144db2dbf08b09b9357 ] We have been using the fact that the QMI and DIAG functions usually are the only ones with class/subclass/protocol being ff/ff/ff on Quectel modems. This has allowed us to match the QMI function without knowing the exact interface number, which can vary depending on firmware configuration. The ability to silently reject the DIAG function, which is usually handled by the option driver, is important for this method to work. This is done based on the knowledge that it has exactly 2 bulk endpoints. QMI function control interfaces will have either 3 or 1 endpoint. This rule is universal so the quirk condition can be removed. The fixed layouts known from the Gobi1k and Gobi2k modems have been gradually replaced by more dynamic layouts, and many vendors now use configurable layouts without changing device IDs. Renaming the class/subclass/protocol matching macro makes it more obvious that this is now not Quectel specific anymore. Cc: Kristian Evensen Cc: Aleksander Morgado Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 42 ++++++++++++++------------------------ 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 08215a9f6145..189715438328 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -63,7 +63,6 @@ enum qmi_wwan_flags { enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ - QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */ }; struct qmimux_hdr { @@ -853,16 +852,6 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = { .data = QMI_WWAN_QUIRK_DTR, }; -static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { - .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, - .bind = qmi_wwan_bind, - .unbind = qmi_wwan_unbind, - .manage_power = qmi_wwan_manage_power, - .rx_fixup = qmi_wwan_rx_fixup, - .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG, -}; - #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ @@ -883,14 +872,18 @@ static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0) -/* Quectel does not use fixed interface numbers on at least some of their - * devices. We need to check the number of endpoints to ensure that we bind to - * the correct interface. +/* Many devices have QMI and DIAG functions which are distinguishable + * from other vendor specific functions by class, subclass and + * protocol all being 0xff. The DIAG function has exactly 2 endpoints + * and is silently rejected when probed. + * + * This makes it possible to match dynamically numbered QMI functions + * as seen on e.g. many Quectel modems. */ -#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \ +#define QMI_MATCH_FF_FF_FF(vend, prod) \ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ USB_SUBCLASS_VENDOR_SPEC, 0xff), \ - .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ @@ -996,10 +989,10 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1379,7 +1372,6 @@ static int qmi_wwan_probe(struct usb_interface *intf, { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; - const struct driver_info *info; /* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be @@ -1415,12 +1407,8 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)id->driver_info; - - if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { - if (desc->bNumEndpoints == 2) - return -ENODEV; - } + if (desc->bNumEndpoints == 2) + return -ENODEV; return usbnet_probe(intf, id); } -- GitLab From 670f5c64a37751cd54fa298d38f8adc9d00a7d9d Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:40 +0000 Subject: [PATCH 0313/1278] net: ena: fix potential crash when rxfh key is NULL [ Upstream commit 91a65b7d3ed8450f31ab717a65dcb5f9ceb5ab02 ] When ethtool -X is called without an hkey, ena_com_fill_hash_function() is called with key=NULL, which is passed to memcpy causing a crash. This commit fixes this issue by checking key is not NULL. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 10e6053f6671..f2dde1ab424a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2069,15 +2069,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, switch (func) { case ENA_ADMIN_TOEPLITZ: - if (key_len > sizeof(hash_key->key)) { - pr_err("key len (%hu) is bigger than the max supported (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; + if (key) { + if (key_len != sizeof(hash_key->key)) { + pr_err("key len (%hu) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; } - - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->keys_num = key_len >> 2; break; case ENA_ADMIN_CRC32: rss->hash_init_val = init_val; -- GitLab From 482c613e4fb841a149e117555d59255dd1de03d1 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:41 +0000 Subject: [PATCH 0314/1278] net: ena: fix uses of round_jiffies() [ Upstream commit 2a6e5fa2f4c25b66c763428a3e65363214946931 ] >From the documentation of round_jiffies(): "Rounds a time delta in the future (in jiffies) up or down to (approximately) full seconds. This is useful for timers for which the exact time they fire does not matter too much, as long as they fire approximately every X seconds. By rounding these timers to whole seconds, all such timers will fire at the same time, rather than at various times spread out. The goal of this is to have the CPU wake up less, which saves power." There are 2 parts to this patch: ================================ Part 1: ------- In our case we need timer_service to be called approximately every X=1 seconds, and the exact time does not matter, so using round_jiffies() is the right way to go. Therefore we add round_jiffies() to the mod_timer() in ena_timer_service(). Part 2: ------- round_jiffies() is used in check_for_missing_keep_alive() when getting the jiffies of the expiration of the keep_alive timeout. Here it is actually a mistake to use round_jiffies() because we want the exact time when keep_alive should expire and not an approximate rounded time, which can cause early, false positive, timeouts. Therefore we remove round_jiffies() in the calculation of keep_alive_expired() in check_for_missing_keep_alive(). Fixes: 82ef30f13be0 ("net: ena: add hardware hints capability to the driver") Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 518ff393a026..d9ece9ac6f53 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2803,8 +2803,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) return; - keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + - adapter->keep_alive_timeout); + keep_alive_expired = adapter->last_keep_alive_jiffies + + adapter->keep_alive_timeout; if (unlikely(time_is_before_jiffies(keep_alive_expired))) { netif_err(adapter, drv, adapter->netdev, "Keep alive watchdog timeout.\n"); @@ -2906,7 +2906,7 @@ static void ena_timer_service(unsigned long data) } /* Reset the timer */ - mod_timer(&adapter->timer_service, jiffies + HZ); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } static int ena_calc_io_queue_num(struct pci_dev *pdev, -- GitLab From d13a5be10364c7985ea4a987a64b5f00768f1ec8 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:42 +0000 Subject: [PATCH 0315/1278] net: ena: add missing ethtool TX timestamping indication [ Upstream commit cf6d17fde93bdda23c9b02dd5906a12bf8c55209 ] Current implementation of the driver calls skb_tx_timestamp()to add a software tx timestamp to the skb, however the software-transmit capability is not reported in ethtool -T. This commit updates the ethtool structure to report the software-transmit capability in ethtool -T using the standard ethtool_op_get_ts_info(). This function reports all software timestamping capabilities (tx and rx), as well as setting phc_index = -1. phc_index is the index of the PTP hardware clock device that will be used for hardware timestamps. Since we don't have such a device in ENA, using the default -1 value is the correct setting. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Ezequiel Lara Gomez Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index a2f02c23fe14..c58ad6190951 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -816,6 +816,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_channels = ena_get_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, + .get_ts_info = ethtool_op_get_ts_info, }; void ena_set_ethtool_ops(struct net_device *netdev) -- GitLab From 304b62b5f4c816b67080c1e21ce3a2f56952a3cb Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:43 +0000 Subject: [PATCH 0316/1278] net: ena: fix incorrect default RSS key [ Upstream commit 0d1c3de7b8c78a5e44b74b62ede4a63629f5d811 ] Bug description: When running "ethtool -x " the key shows up as all zeros. When we use "ethtool -X hfunc toeplitz hkey " to set the key and then try to retrieve it using "ethtool -x " then we return the correct key because we return the one we saved. Bug cause: We don't fetch the key from the device but instead return the key that we have saved internally which is by default set to zero upon allocation. Fix: This commit fixes the issue by initializing the key to a random value using netdev_rss_key_fill(). Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 15 +++++++++++++++ drivers/net/ethernet/amazon/ena/ena_com.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index f2dde1ab424a..c5df80f31005 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -843,6 +843,19 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev, 0); } +static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +{ + struct ena_admin_feature_rss_flow_hash_control *hash_key = + (ena_dev->rss).hash_key; + + netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key)); + /* The key is stored in the device in u32 array + * as well as the API requires the key to be passed in this + * format. Thus the size of our array should be divided by 4 + */ + hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; @@ -2403,6 +2416,8 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) if (unlikely(rc)) goto err_hash_key; + ena_com_hash_key_fill_default_key(ena_dev); + rc = ena_com_hash_ctrl_init(ena_dev); if (unlikely(rc)) goto err_hash_ctrl; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 7b784f8a06a6..90fce5c0ca48 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "ena_common_defs.h" #include "ena_admin_defs.h" -- GitLab From 092a63301b92e0452433df1f6d6e314d6944cf1f Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Tue, 11 Feb 2020 15:17:45 +0000 Subject: [PATCH 0317/1278] net: ena: rss: fix failure to get indirection table [ Upstream commit 0c8923c0a64fb5d14bebb9a9065d2dc25ac5e600 ] On old hardware, getting / setting the hash function is not supported while gettting / setting the indirection table is. This commit enables us to still show the indirection table on older hardwares by setting the hash function and key to NULL. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index c58ad6190951..7ca7bade1c09 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -660,7 +660,21 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (rc) return rc; + /* We call this function in order to check if the device + * supports getting/setting the hash function. + */ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + + if (rc) { + if (rc == -EOPNOTSUPP) { + key = NULL; + hfunc = NULL; + rc = 0; + } + + return rc; + } + if (rc) return rc; -- GitLab From ef28d60a09846f564f2bae01fa8820c028016c26 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:46 +0000 Subject: [PATCH 0318/1278] net: ena: rss: store hash function as values and not bits [ Upstream commit 4844470d472d660c26149ad764da2406adb13423 ] The device receives, stores and retrieves the hash function value as bits and not as their enum value. The bug: * In ena_com_set_hash_function() we set cmd.u.flow_hash_func.selected_func to the bit value of rss->hash_func. (1 << rss->hash_func) * In ena_com_get_hash_function() we retrieve the hash function and store it's bit value in rss->hash_func. (Now the bit value of rss->hash_func is stored in rss->hash_func instead of it's enum value) The fix: This commit fixes the issue by converting the retrieved hash function values from the device to the matching enum value of the set bit using ffs(). ffs() finds the first set bit's index in a word. Since the function returns 1 for the LSB's index, we need to subtract 1 from the returned value (note that BIT(0) is 1). Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index c5df80f31005..552db5399503 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2128,7 +2128,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - rss->hash_func = get_resp.u.flow_hash_func.selected_func; + /* ffs() returns 1 in case the lsb is set */ + rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func); + if (rss->hash_func) + rss->hash_func--; + if (func) *func = rss->hash_func; -- GitLab From 135c5af29b5a4a7da0949251afb722079afb5a77 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:47 +0000 Subject: [PATCH 0319/1278] net: ena: fix incorrectly saving queue numbers when setting RSS indirection table [ Upstream commit 92569fd27f5cb0ccbdf7c7d70044b690e89a0277 ] The indirection table has the indices of the Rx queues. When we store it during set indirection operation, we convert the indices to our internal representation of the indices. Our internal representation of the indices is: even indices for Tx and uneven indices for Rx, where every Tx/Rx pair are in a consecutive order starting from 0. For example if the driver has 3 queues (3 for Tx and 3 for Rx) then the indices are as follows: 0 1 2 3 4 5 Tx Rx Tx Rx Tx Rx The BUG: The issue is that when we satisfy a get request for the indirection table, we don't convert the indices back to the original representation. The FIX: Simply apply the inverse function for the indices of the indirection table after we set it. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 24 ++++++++++++++++++- drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 ++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 7ca7bade1c09..9601ddc27427 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -648,6 +648,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev) return ENA_HASH_KEY_SIZE; } +static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int i, rc; + + if (!indir) + return 0; + + rc = ena_com_indirect_table_get(ena_dev, indir); + if (rc) + return rc; + + /* Our internal representation of the indices is: even indices + * for Tx and uneven indices for Rx. We need to convert the Rx + * indices to be consecutive + */ + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) + indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]); + + return rc; +} + static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -656,7 +678,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 func; int rc; - rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + rc = ena_indirection_table_get(adapter, indir); if (rc) return rc; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 3404376c28ca..5a72267b858b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -113,6 +113,8 @@ #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) +#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) +#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 -- GitLab From ce54eb55ee12733fa2cd63c5479d548b69e45ed8 Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Tue, 11 Feb 2020 15:17:50 +0000 Subject: [PATCH 0320/1278] net: ena: ethtool: use correct value for crc32 hash [ Upstream commit 886d2089276e40d460731765083a741c5c762461 ] Up till kernel 4.11 there was no enum defined for crc32 hash in ethtool, thus the xor enum was used for supporting crc32. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 9601ddc27427..22238f25e071 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -705,7 +705,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, func = ETH_RSS_HASH_TOP; break; case ENA_ADMIN_CRC32: - func = ETH_RSS_HASH_XOR; + func = ETH_RSS_HASH_CRC32; break; default: netif_err(adapter, drv, netdev, @@ -751,7 +751,7 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; - case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_CRC32: func = ENA_ADMIN_CRC32; break; default: -- GitLab From 3f4d2bba01b6ac0fd36e61be06a7b844f18bd89c Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:51 +0000 Subject: [PATCH 0321/1278] net: ena: ena-com.c: prevent NULL pointer dereference [ Upstream commit c207979f5ae10ed70aff1bb13f39f0736973de99 ] comp_ctx can be NULL in a very rare case when an admin command is executed during the execution of ena_remove(). The bug scenario is as follows: * ena_destroy_device() sets the comp_ctx to be NULL * An admin command is executed before executing unregister_netdev(), this can still happen because our device can still receive callbacks from the netdev infrastructure such as ethtool commands. * When attempting to access the comp_ctx, the bug occurs since it's set to NULL Fix: Added a check that comp_ctx is not NULL Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 552db5399503..31e0cf144201 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -199,6 +199,11 @@ static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); -- GitLab From 125ccba46eca5150045418cd7ee88a9aa4daa8e4 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Wed, 12 Feb 2020 15:31:48 -0600 Subject: [PATCH 0322/1278] cifs: Fix mode output in debugging statements [ Upstream commit f52aa79df43c4509146140de0241bc21a4a3b4c7 ] A number of the debug statements output file or directory mode in hex. Change these to print using octal. Signed-off-by: Frank Sorenson Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsacl.c | 4 ++-- fs/cifs/connect.c | 2 +- fs/cifs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index b98436f5c7c7..73d428af97a9 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -603,7 +603,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) *pmode |= (S_IXUGO & (*pbits_to_set)); - cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode); + cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode); return; } @@ -632,7 +632,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, if (mode & S_IXUGO) *pace_flags |= SET_FILE_EXEC_RIGHTS; - cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n", + cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n", mode, *pace_flags); return; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6e5ecf70996a..697edc92dff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3521,7 +3521,7 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_gid = pvolume_info->linux_gid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; - cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n", + cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); cifs_sb->actimeo = pvolume_info->actimeo; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a35c14105906..3a10d405362e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1581,7 +1581,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct TCP_Server_Info *server; char *full_path; - cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n", + cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n", mode, inode); cifs_sb = CIFS_SB(inode->i_sb); -- GitLab From ffe09a48bd96094786c303f1cf29b32a29aa6dc8 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Thu, 13 Feb 2020 13:16:16 +0000 Subject: [PATCH 0323/1278] cfg80211: add missing policy for NL80211_ATTR_STATUS_CODE [ Upstream commit ea75080110a4c1fa011b0a73cb8f42227143ee3e ] The nl80211_policy is missing for NL80211_ATTR_STATUS_CODE attribute. As a result, for strictly validated commands, it's assumed to not be supported. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200213131608.10541-2-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df8c5312f26a..b248578aeb7b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -321,6 +321,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, -- GitLab From 28178a88942819c19d9bc15e3a0703f60de1f2bb Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 11 Jan 2019 13:45:15 +0100 Subject: [PATCH 0324/1278] sysrq: Restore original console_loglevel when sysrq disabled commit 075e1a0c50f59ea210561d0d0fedbd945615df78 upstream. The sysrq header line is printed with an increased loglevel to provide users some positive feedback. The original loglevel is not restored when the sysrq operation is disabled. This bug was introduced in 2.6.12 (pre-git-history) by the commit ("Allow admin to enable only some of the Magic-Sysrq functions"). Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Reviewed-by: Steven Rostedt (VMware) Cc: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 377b3592384e..a5516523f816 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -560,6 +560,7 @@ void __handle_sysrq(int key, bool check_mask) op_p->handler(key); } else { pr_cont("This sysrq operation is disabled.\n"); + console_loglevel = orig_log_level; } } else { pr_cont("HELP : "); -- GitLab From ac5ea065eefd755173d5d51ca45f8bf837d0485b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 11 Jan 2019 17:20:37 +0100 Subject: [PATCH 0325/1278] sysrq: Remove duplicated sysrq message commit c3fee60908db4a8594f2e4a2131998384b8fa006 upstream. The commit 97f5f0cd8cd0a0544 ("Input: implement SysRq as a separate input handler") added pr_fmt() definition. It caused a duplicated message prefix in the sysrq header messages, for example: [ 177.053931] sysrq: SysRq : Show backtrace of all active CPUs [ 742.864776] sysrq: SysRq : HELP : loglevel(0-9) reboot(b) crash(c) Fixes: 97f5f0cd8cd0a05 ("Input: implement SysRq as a separate input handler") Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Cc: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index a5516523f816..4c716ddd6599 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -546,7 +546,6 @@ void __handle_sysrq(int key, bool check_mask) */ orig_log_level = console_loglevel; console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; - pr_info("SysRq : "); op_p = __sysrq_get_key_op(key); if (op_p) { @@ -555,15 +554,15 @@ void __handle_sysrq(int key, bool check_mask) * should not) and is the invoked operation enabled? */ if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { - pr_cont("%s\n", op_p->action_msg); + pr_info("%s\n", op_p->action_msg); console_loglevel = orig_log_level; op_p->handler(key); } else { - pr_cont("This sysrq operation is disabled.\n"); + pr_info("This sysrq operation is disabled.\n"); console_loglevel = orig_log_level; } } else { - pr_cont("HELP : "); + pr_info("HELP : "); /* Only print the help msg once per handler */ for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) { if (sysrq_key_table[i]) { -- GitLab From 4c5b7276d7d1b13f04915900ca02ac7e1b670f67 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 12 Feb 2020 16:43:41 +0100 Subject: [PATCH 0326/1278] net: fib_rules: Correctly set table field when table number exceeds 8 bits [ Upstream commit 540e585a79e9d643ede077b73bcc7aa2d7b4d919 ] In 709772e6e06564ed94ba740de70185ac3d792773, RT_TABLE_COMPAT was added to allow legacy software to deal with routing table numbers >= 256, but the same change to FIB rule queries was overlooked. Signed-off-by: Jethro Beekman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a6d97c1d810..9bb321df0869 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -799,7 +799,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) -- GitLab From 385b8a8b0618d94a6d968f616bff93f6a2419fa1 Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Fri, 14 Feb 2020 13:47:46 -0800 Subject: [PATCH 0327/1278] net: phy: restore mdio regs in the iproc mdio driver commit 6f08e98d62799e53c89dbf2c9a49d77e20ca648c upstream. The mii management register in iproc mdio block does not have a retention register so it is lost on suspend. Save and restore value of register while resuming from suspend. Fixes: bb1a619735b4 ("net: phy: Initialize mdio clock at probe function") Signed-off-by: Arun Parameswaran Signed-off-by: Scott Branden Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-bcm-iproc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 46fe1ae919a3..51ce3ea17fb3 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -188,6 +188,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -198,6 +215,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, -- GitLab From a1229fc80189667f6f5e6bf4a8c271ed3b973958 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 19 Feb 2020 18:01:22 +0300 Subject: [PATCH 0328/1278] nfc: pn544: Fix occasional HW initialization failure [ Upstream commit c3331d2fe3fd4d5e321f2467d01f72de7edfb5d0 ] The PN544 driver checks the "enable" polarity during of driver's probe and it's doing that by turning ON and OFF NFC with different polarities until enabling succeeds. It takes some time for the hardware to power-down, and thus, to deassert the IRQ that is raised by turning ON the hardware. Since the delay after last power-down of the polarity-checking process is missed in the code, the interrupt may trigger immediately after installing the IRQ handler (right after the checking is done), which results in IRQ handler trying to touch the disabled HW and ends with marking NFC as 'DEAD' during of the driver's probe: pn544_hci_i2c 1-002a: NFC: nfc_en polarity : active high pn544_hci_i2c 1-002a: NFC: invalid len byte shdlc: llc_shdlc_recv_frame: NULL Frame -> link is dead This patch fixes the occasional NFC initialization failure on Nexus 7 device. Signed-off-by: Dmitry Osipenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn544/i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 4b14740edb67..8ba5a6d6329e 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -236,6 +236,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) -- GitLab From 4dfb4833a549dfdf3b6771d722c08140fbe9c137 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 18 Feb 2020 12:07:53 +0800 Subject: [PATCH 0329/1278] sctp: move the format error check out of __sctp_sf_do_9_1_abort [ Upstream commit 245709ec8be89af46ea7ef0444c9c80913999d99 ] When T2 timer is to be stopped, the asoc should also be deleted, otherwise, there will be no chance to call sctp_association_free and the asoc could last in memory forever. However, in sctp_sf_shutdown_sent_abort(), after adding the cmd SCTP_CMD_TIMER_STOP for T2 timer, it may return error due to the format error from __sctp_sf_do_9_1_abort() and miss adding SCTP_CMD_ASSOC_FAILED where the asoc will be deleted. This patch is to fix it by moving the format error check out of __sctp_sf_do_9_1_abort(), and do it before adding the cmd SCTP_CMD_TIMER_STOP for T2 timer. Thanks Hangbin for reporting this issue by the fuzz testing. v1->v2: - improve the comment in the code as Marcelo's suggestion. Fixes: 96ca468b86b0 ("sctp: check invalid value of length parameter in error cause") Reported-by: Hangbin Liu Acked-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_statefuns.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a2e058127ef7..ba29d782af30 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -182,6 +182,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2202,6 +2212,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2245,6 +2258,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2512,6 +2528,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2529,16 +2548,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ -- GitLab From 14d5cc5ca2cef873b7caa991c3d7d38799a38ffa Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 12 Feb 2020 10:41:07 +0900 Subject: [PATCH 0330/1278] ipv6: Fix nlmsg_flags when splitting a multipath route [ Upstream commit afecdb376bd81d7e16578f0cfe82a1aec7ae18f3 ] When splitting an RTA_MULTIPATH request into multiple routes and adding the second and later components, we must not simply remove NLM_F_REPLACE but instead replace it by NLM_F_CREATE. Otherwise, it may look like the netlink message was malformed. For example, ip route add 2001:db8::1/128 dev dummy0 ip route change 2001:db8::1/128 nexthop via fe80::30:1 dev dummy0 \ nexthop via fe80::30:2 dev dummy0 results in the following warnings: [ 1035.057019] IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE [ 1035.057517] IPv6: NLM_F_CREATE should be set when creating new route This patch makes the nlmsg sequence look equivalent for __ip6_ins_rt() to what it would get if the multipath route had been added in multiple netlink operations: ip route add 2001:db8::1/128 dev dummy0 ip route change 2001:db8::1/128 nexthop via fe80::30:1 dev dummy0 ip route append 2001:db8::1/128 nexthop via fe80::30:2 dev dummy0 Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Benjamin Poirier Reviewed-by: Michal Kubecek Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b81522bcf223..a4079ed56803 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3283,6 +3283,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } -- GitLab From 84c4d6642d7bb79e19458bb337c982209c3a46ac Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 12 Feb 2020 10:41:06 +0900 Subject: [PATCH 0331/1278] ipv6: Fix route replacement with dev-only route [ Upstream commit e404b8c7cfb31654c9024d497cec58a501501692 ] After commit 27596472473a ("ipv6: fix ECMP route replacement") it is no longer possible to replace an ECMP-able route by a non ECMP-able route. For example, ip route add 2001:db8::1/128 via fe80::1 dev dummy0 ip route replace 2001:db8::1/128 dev dummy0 does not work as expected. Tweak the replacement logic so that point 3 in the log of the above commit becomes: 3. If the new route is not ECMP-able, and no matching non-ECMP-able route exists, replace matching ECMP-able route (if any) or add the new route. We can now summarize the entire replace semantics to: When doing a replace, prefer replacing a matching route of the same "ECMP-able-ness" as the replace argument. If there is no such candidate, fallback to the first route found. Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Benjamin Poirier Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e5308d7cbd75..d43abeb1e415 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -893,8 +893,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -934,7 +933,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = *ins; found++; -- GitLab From eca56a6d55e82256426df9328215d4c67fe4d271 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 17 Feb 2020 13:37:18 +0200 Subject: [PATCH 0332/1278] qede: Fix race between rdma destroy workqueue and link change event [ Upstream commit af6565adb02d3129d3fae4d9d5da945abaf4417a ] If an event is added while the rdma workqueue is being destroyed it could lead to several races, list corruption, null pointer dereference during queue_work or init_queue. This fixes the race between the two flows which can occur during shutdown. A kref object and a completion object are added to the rdma_dev structure, these are initialized before the workqueue is created. The refcnt is used to indicate work is being added to the workqueue and ensures the cleanup flow won't start while we're in the middle of adding the event. Once the work is added, the refcnt is decreased and the cleanup flow is safe to run. Fixes: cee9fbd8e2e ("qede: Add qedr framework") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qede/qede.h | 2 ++ drivers/net/ethernet/qlogic/qede/qede_rdma.c | 29 +++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa..a80531b5aecc 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -156,6 +156,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; }; struct qede_ptp; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 1900bf7e67d1..cd12fb919ad5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -57,6 +57,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -81,8 +84,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -287,15 +305,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) -- GitLab From 7c9fbd9447bcfc59d58ebcd9de92567569b78190 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 17 Feb 2020 15:38:09 -0500 Subject: [PATCH 0333/1278] net: sched: correct flower port blocking [ Upstream commit 8a9093c79863b58cc2f9874d7ae788f0d622a596 ] tc flower rules that are based on src or dst port blocking are sometimes ineffective due to uninitialized stack data. __skb_flow_dissect() extracts ports from the skb for tc flower to match against. However, the port dissection is not done when when the FLOW_DIS_IS_FRAGMENT bit is set in key_control->flags. All callers of __skb_flow_dissect(), zero-out the key_control field except for fl_classify() as used by the flower classifier. Thus, the FLOW_DIS_IS_FRAGMENT may be set on entry to __skb_flow_dissect(), since key_control is allocated on the stack and may not be initialized. Since key_basic and key_control are present for all flow keys, let's make sure they are initialized. Fixes: 62230715fd24 ("flow_dissector: do not dissect l4 ports for fragments") Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Acked-by: Cong Wang Signed-off-by: Jason Baron Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/flow_dissector.h | 9 +++++++++ net/sched/cls_flower.c | 1 + 2 files changed, 10 insertions(+) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 227dc0a84172..ddf916e5e57d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /** @@ -282,4 +283,12 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 80a5a6d503c8..8974bd25c71e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -160,6 +160,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (!atomic_read(&head->ht.nelems)) return -1; + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, &head->mask); info = skb_tunnel_info(skb); -- GitLab From 267e0a91b898619f9e747f944bb80e198910f7b4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2020 12:22:56 +0300 Subject: [PATCH 0334/1278] ext4: potential crash on allocation error in ext4_alloc_flex_bg_array() commit 37b0b6b8b99c0e1c1f11abbe7cf49b6d03795b3f upstream. If sbi->s_flex_groups_allocated is zero and the first allocation fails then this code will crash. The problem is that "i--" will set "i" to -1 but when we compare "i >= sbi->s_flex_groups_allocated" then the -1 is type promoted to unsigned and becomes UINT_MAX. Since UINT_MAX is more than zero, the condition is true so we call kvfree(new_groups[-1]). The loop will carry on freeing invalid memory until it crashes. Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access") Reviewed-by: Suraj Jitindar Singh Signed-off-by: Dan Carpenter Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200228092142.7irbc44yaz3by7nb@kili.mountain Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f1c1c180d267..f5646bcad770 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2238,7 +2238,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **old_groups, **new_groups; - int size, i; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2259,8 +2259,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) sizeof(struct flex_groups)), GFP_KERNEL); if (!new_groups[i]) { - for (i--; i >= sbi->s_flex_groups_allocated; i--) - kvfree(new_groups[i]); + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); kvfree(new_groups); ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", size); -- GitLab From edde9fcd5f41b2e22e455250214de0c4b126b255 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sat, 22 Feb 2020 20:36:47 -0500 Subject: [PATCH 0335/1278] audit: fix error handling in audit_data_to_entry() commit 2ad3e17ebf94b7b7f3f64c050ff168f9915345eb upstream. Commit 219ca39427bf ("audit: use union for audit_field values since they are mutually exclusive") combined a number of separate fields in the audit_field struct into a single union. Generally this worked just fine because they are generally mutually exclusive. Unfortunately in audit_data_to_entry() the overlap can be a problem when a specific error case is triggered that causes the error path code to attempt to cleanup an audit_field struct and the cleanup involves attempting to free a stored LSM string (the lsm_str field). Currently the code always has a non-NULL value in the audit_field.lsm_str field as the top of the for-loop transfers a value into audit_field.val (both .lsm_str and .val are part of the same union); if audit_data_to_entry() fails and the audit_field struct is specified to contain a LSM string, but the audit_field.lsm_str has not yet been properly set, the error handling code will attempt to free the bogus audit_field.lsm_str value that was set with audit_field.val at the top of the for-loop. This patch corrects this by ensuring that the audit_field.val is only set when needed (it is cleared when the audit_field struct is allocated with kcalloc()). It also corrects a few other issues to ensure that in case of error the proper error code is returned. Cc: stable@vger.kernel.org Fixes: 219ca39427bf ("audit: use union for audit_field values since they are mutually exclusive") Reported-by: syzbot+1f4d90ead370d72e450b@syzkaller.appspotmail.com Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/auditfilter.c | 71 ++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 215c6e1ee026..16cf396ea738 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -435,6 +435,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, bufp = data->buf; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 f_val; err = -EINVAL; @@ -443,12 +444,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; f->type = data->fields[i]; - f->val = data->values[i]; + f_val = data->values[i]; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { + if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; - f->val = 0; + f_val = 0; entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } @@ -464,7 +465,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SUID: case AUDIT_FSUID: case AUDIT_OBJ_UID: - f->uid = make_kuid(current_user_ns(), f->val); + f->uid = make_kuid(current_user_ns(), f_val); if (!uid_valid(f->uid)) goto exit_free; break; @@ -473,12 +474,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SGID: case AUDIT_FSGID: case AUDIT_OBJ_GID: - f->gid = make_kgid(current_user_ns(), f->val); + f->gid = make_kgid(current_user_ns(), f_val); if (!gid_valid(f->gid)) goto exit_free; break; case AUDIT_SESSIONID: case AUDIT_ARCH: + f->val = f_val; entry->rule.arch_f = f; break; case AUDIT_SUBJ_USER: @@ -491,11 +493,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } + entry->rule.buflen += f_val; + f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they @@ -504,68 +508,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, pr_warn("audit rule for LSM \'%s\' is invalid\n", str); err = 0; - } - if (err) { - kfree(str); + } else if (err) goto exit_free; - } else - f->lsm_str = str; break; case AUDIT_WATCH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - - err = audit_to_watch(&entry->rule, str, f->val, f->op); + } + err = audit_to_watch(&entry->rule, str, f_val, f->op); if (err) { kfree(str); goto exit_free; } + entry->rule.buflen += f_val; break; case AUDIT_DIR: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } err = audit_make_tree(&entry->rule, str, f->op); kfree(str); if (err) goto exit_free; + entry->rule.buflen += f_val; break; case AUDIT_INODE: + f->val = f_val; err = audit_to_inode(&entry->rule, f); if (err) goto exit_free; break; case AUDIT_FILTERKEY: - if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) + if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; + } + entry->rule.buflen += f_val; entry->rule.filterkey = str; break; case AUDIT_EXE: - if (entry->rule.exe || f->val > PATH_MAX) + if (entry->rule.exe || f_val > PATH_MAX) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); + str = audit_unpack_string(&bufp, &remain, f_val); if (IS_ERR(str)) { err = PTR_ERR(str); goto exit_free; } - entry->rule.buflen += f->val; - - audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + audit_mark = audit_alloc_mark(&entry->rule, str, f_val); if (IS_ERR(audit_mark)) { kfree(str); err = PTR_ERR(audit_mark); goto exit_free; } + entry->rule.buflen += f_val; entry->rule.exe = audit_mark; break; + default: + f->val = f_val; + break; } } -- GitLab From 0a94e100b4fe9bd250e2c1f7624a70de2cdf4bc8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:39 +0300 Subject: [PATCH 0336/1278] ACPICA: Introduce ACPI_ACCESS_BYTE_WIDTH() macro commit 1dade3a7048ccfc675650cd2cf13d578b095e5fb upstream. Sometimes it is useful to find the access_width field value in bytes and not in bits so add a helper that can be used for this purpose. Suggested-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/acpi/actypes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 4f077edb9b81..71fadbe77e21 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -556,11 +556,12 @@ typedef u64 acpi_integer; #define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * -- GitLab From dbaf976ebef18921b88e2f360f91c5fd3ef89f8f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:40 +0300 Subject: [PATCH 0337/1278] ACPI: watchdog: Fix gas->access_width usage commit 2ba33a4e9e22ac4dda928d3e9b5978a3a2ded4e0 upstream. ACPI Generic Address Structure (GAS) access_width field is not in bytes as the driver seems to expect in few places so fix this by using the newly introduced macro ACPI_ACCESS_BYTE_WIDTH(). Fixes: b1abf6fc4982 ("ACPI / watchdog: Fix off-by-one error at resource assignment") Fixes: 058dfc767008 ("ACPI / watchdog: Add support for WDAT hardware watchdog") Reported-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_watchdog.c | 3 +-- drivers/watchdog/wdat_wdt.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 95600309ce42..23cde3d8e8fb 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -129,12 +129,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 0da9943d405f..c310e841561c 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { -- GitLab From 634572f8c0aa43f63bb203fd4627e9b487393562 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 29 Feb 2020 11:30:14 -0800 Subject: [PATCH 0338/1278] KVM: VMX: check descriptor table exits on instruction emulation commit 86f7e90ce840aa1db407d3ea6e9b3a52b2ce923c upstream. KVM emulates UMIP on hardware that doesn't support it by setting the 'descriptor table exiting' VM-execution control and performing instruction emulation. When running nested, this emulation is broken as KVM refuses to emulate L2 instructions by default. Correct this regression by allowing the emulation of descriptor table instructions if L1 hasn't requested 'descriptor table exiting'. Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Reported-by: Jan Kiszka Cc: stable@vger.kernel.org Cc: Paolo Bonzini Cc: Jim Mattson Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index acf72da288f9..f85680b86524 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12370,6 +12370,7 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, else intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; } @@ -12399,6 +12400,20 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, case x86_intercept_outs: return vmx_check_intercept_io(vcpu, info); + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; + /* TODO: check more intercepts... */ default: break; -- GitLab From b011c72eda318297e84af2eec1dd32c1d06bd1b8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 1 Feb 2020 12:56:48 +0100 Subject: [PATCH 0339/1278] HID: ite: Only bind to keyboard USB interface on Acer SW5-012 keyboard dock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit beae56192a2570578ae45050e73c5ff9254f63e6 upstream. Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock") added the USB id for the Acer SW5-012's keyboard dock to the hid-ite driver to fix the rfkill driver not working. Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the "Wireless Radio Control" bits which need the special hid-ite driver on the second USB interface (the mouse interface) and their touchpad only supports mouse emulation, so using generic hid-input handling for anything but the "Wireless Radio Control" bits is fine. On these devices we simply bind to all USB interfaces. But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10 (SW5-012)'s touchpad not only does mouse emulation it also supports HID-multitouch and all the keys including the "Wireless Radio Control" bits have been moved to the first USB interface (the keyboard intf). So we need hid-ite to handle the first (keyboard) USB interface and have it NOT bind to the second (mouse) USB interface so that that can be handled by hid-multitouch.c and we get proper multi-touch support. This commit changes the hid_device_id for the SW5-012 keyboard dock to only match on hid devices from the HID_GROUP_GENERIC group, this way hid-ite will not bind the the mouse/multi-touch interface which has HID_GROUP_MULTITOUCH_WIN_8 as group. This fixes the regression to mouse-emulation mode introduced by adding the keyboard dock USB id. Cc: stable@vger.kernel.org Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock") Reported-by: Zdeněk Rampas Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ite.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 2ce1eb0c9212..f2e23f81601e 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -44,8 +44,9 @@ static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); -- GitLab From 67e43711b143dd933c98934747d4167657966af9 Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:35 +0100 Subject: [PATCH 0340/1278] HID: core: fix off-by-one memset in hid_report_raw_event() commit 5ebdffd25098898aff1249ae2f7dbfddd76d8f8f upstream. In case a report is greater than HID_MAX_BUFFER_SIZE, it is truncated, but the report-number byte is not correctly handled. This results in a off-by-one in the following memset, causing a kernel Oops and ensuing system crash. Note: With commit 8ec321e96e05 ("HID: Fix slab-out-of-bounds read in hid_field_extract") I no longer hit the kernel Oops as we instead fail "controlled" at probe if there is a report too long in the HID report-descriptor. hid_report_raw_event() is an exported symbol, so presumabely we cannot always rely on this being the case. Fixes: 966922f26c7f ("HID: fix a crash in hid_report_raw_event() function.") Signed-off-by: Johan Korsnes Cc: Armando Visconti Cc: Jiri Kosina Cc: Alan Stern Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 2d089d3954e3..75b0a337114d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1567,7 +1567,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, rsize = ((report->size - 1) >> 3) + 1; - if (rsize > HID_MAX_BUFFER_SIZE) + if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) + rsize = HID_MAX_BUFFER_SIZE - 1; + else if (rsize > HID_MAX_BUFFER_SIZE) rsize = HID_MAX_BUFFER_SIZE; if (csize < rsize) { -- GitLab From 33533185aa222d30946017f8f689fa7abc9384ff Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:36 +0100 Subject: [PATCH 0341/1278] HID: core: increase HID report buffer size to 8KiB commit 84a4062632462c4320704fcdf8e99e89e94c0aba upstream. We have a HID touch device that reports its opens and shorts test results in HID buffers of size 8184 bytes. The maximum size of the HID buffer is currently set to 4096 bytes, causing probe of this device to fail. With this patch we increase the maximum size of the HID buffer to 8192 bytes, making device probe and acquisition of said buffers succeed. Signed-off-by: Johan Korsnes Cc: Alan Stern Cc: Armando Visconti Cc: Jiri Kosina Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hid.h b/include/linux/hid.h index 3656a04d764b..ba1f67559831 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -477,7 +477,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 -- GitLab From 581695e615571f68d9c63d82d88a8172ef160d62 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 20 Feb 2020 15:38:01 -0500 Subject: [PATCH 0342/1278] tracing: Disable trace_printk() on post poned tests commit 78041c0c9e935d9ce4086feeff6c569ed88ddfd4 upstream. The tracing seftests checks various aspects of the tracing infrastructure, and one is filtering. If trace_printk() is active during a self test, it can cause the filtering to fail, which will disable that part of the trace. To keep the selftests from failing because of trace_printk() calls, trace_printk() checks the variable tracing_selftest_running, and if set, it does not write to the tracing buffer. As some tracers were registered earlier in boot, the selftest they triggered would fail because not all the infrastructure was set up for the full selftest. Thus, some of the tests were post poned to when their infrastructure was ready (namely file system code). The postpone code did not set the tracing_seftest_running variable, and could fail if a trace_printk() was added and executed during their run. Cc: stable@vger.kernel.org Fixes: 9afecfbb95198 ("tracing: Postpone tracer start-up tests till the system is more robust") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c456c2b06277..207d7c35214f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1543,6 +1543,7 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); + tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ @@ -1561,6 +1562,7 @@ static __init int init_trace_selftests(void) list_del(&p->list); kfree(p); } + tracing_selftest_running = false; out: mutex_unlock(&trace_types_lock); -- GitLab From 596e029dfcc6bfa8236f4e6e64b00dab8e19051a Mon Sep 17 00:00:00 2001 From: Orson Zhai Date: Fri, 21 Feb 2020 01:37:04 +0800 Subject: [PATCH 0343/1278] Revert "PM / devfreq: Modify the device name as devfreq(X) for sysfs" commit 66d0e797bf095d407479c89952d42b1d96ef0a7f upstream. This reverts commit 4585fbcb5331fc910b7e553ad3efd0dd7b320d14. The name changing as devfreq(X) breaks some user space applications, such as Android HAL from Unisoc and Hikey [1]. The device name will be changed unexpectly after every boot depending on module init sequence. It will make trouble to setup some system configuration like selinux for Android. So we'd like to revert it back to old naming rule before any better way being found. [1] https://lkml.org/lkml/2018/5/8/1042 Cc: John Stultz Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Orson Zhai Acked-by: Greg Kroah-Hartman Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 58ec3abfd321..b05e6a15221c 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -513,7 +513,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - static atomic_t devfreq_no = ATOMIC_INIT(-1); int err = 0; if (!dev || !profile || !governor_name) { @@ -556,8 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq->lock); } - dev_set_name(&devfreq->dev, "devfreq%d", - atomic_inc_return(&devfreq_no)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); -- GitLab From 9539c5cd71b0a506d556f056284403cff5bf8e72 Mon Sep 17 00:00:00 2001 From: "dan.carpenter@oracle.com" Date: Wed, 15 Jan 2020 20:46:28 +0300 Subject: [PATCH 0344/1278] HID: hiddev: Fix race in in hiddev_disconnect() commit 5c02c447eaeda29d3da121a2e17b97ccaf579b51 upstream. Syzbot reports that "hiddev" is used after it's free in hiddev_disconnect(). The hiddev_disconnect() function sets "hiddev->exist = 0;" so hiddev_release() can free it as soon as we drop the "existancelock" lock. This patch moves the mutex_unlock(&hiddev->existancelock) until after we have finished using it. Reported-by: syzbot+784ccb935f9900cc7c9e@syzkaller.appspotmail.com Fixes: 7f77897ef2b6 ("HID: hiddev: fix potential use-after-free") Suggested-by: Alan Stern Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index bccd97cdc53f..d9602f3a359e 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -954,9 +954,9 @@ void hiddev_disconnect(struct hid_device *hid) hiddev->exist = 0; if (hiddev->open) { - mutex_unlock(&hiddev->existancelock); hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); + mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); -- GitLab From dea270c28f815590cf4376da29e4e8341dd5b98d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Feb 2020 21:19:22 +0100 Subject: [PATCH 0345/1278] MIPS: VPE: Fix a double free and a memory leak in 'release_vpe()' commit bef8e2dfceed6daeb6ca3e8d33f9c9d43b926580 upstream. Pointer on the memory allocated by 'alloc_progmem()' is stored in 'v->load_addr'. So this is this memory that should be freed by 'release_progmem()'. 'release_progmem()' is only a call to 'kfree()'. With the current code, there is both a double free and a memory leak. Fix it by passing the correct pointer to 'release_progmem()'. Fixes: e01402b115ccc ("More AP / SP bits for the 34K, the Malta bits and things. Still wants") Signed-off-by: Christophe JAILLET Signed-off-by: Paul Burton Cc: ralf@linux-mips.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: kernel-janitors@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/vpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 544ea21bfef9..b2683aca401f 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } -- GitLab From ee850f01b3a36d172624f178a20ca434d33240d8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Feb 2020 08:47:04 -0600 Subject: [PATCH 0346/1278] i2c: altera: Fix potential integer overflow commit 54498e8070e19e74498a72c7331348143e7e1f8c upstream. Factor out 100 from the equation and do 32-bit arithmetic (3 * clk_mhz / 10) instead of 64-bit. Notice that clk_mhz is MHz, so the multiplication will never wrap 32 bits and there is no need for div_u64(). Addresses-Coverity: 1458369 ("Unintentional integer overflow") Fixes: 0560ad576268 ("i2c: altera: Add Altera I2C Controller driver") Suggested-by: David Laight Signed-off-by: Gustavo A. R. Silva Reviewed-by: Thor Thayer Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-altera.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index f5e1941e65b5..a1cdcfc74acf 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -182,7 +182,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); -- GitLab From 60493d08647814112f2941c37643109b8aa8f6c3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 12 Feb 2020 10:35:30 +0100 Subject: [PATCH 0347/1278] i2c: jz4780: silence log flood on txabrt commit 9e661cedcc0a072d91a32cb88e0515ea26e35711 upstream. The printout for txabrt is way too talkative and is highly annoying with scanning programs like 'i2cdetect'. Reduce it to the minimum, the rest can be gained by I2C core debugging and datasheet information. Also, make it a debug printout, it won't help the regular user. Fixes: ba92222ed63a ("i2c: jz4780: Add i2c bus controller driver for Ingenic JZ4780") Reported-by: H. Nikolaus Schaller Tested-by: H. Nikolaus Schaller Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-jz4780.c | 36 ++------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 30132c3957cd..41ca9ff7b5da 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -82,25 +82,6 @@ #define JZ4780_I2C_STA_TFNF BIT(1) #define JZ4780_I2C_STA_ACT BIT(0) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -538,21 +519,8 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id) static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, -- GitLab From b67d1c342e1329d7a44d3306242e4dfe3a601552 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 21 Feb 2020 10:32:34 +0800 Subject: [PATCH 0348/1278] drm/i915/gvt: Separate display reset from ALL_ENGINES reset commit 3eb55e6f753a379e293395de8d5f3be28351a7f8 upstream. ALL_ENGINES reset doesn't clobber display with the current gvt-g supported platforms. Thus ALL_ENGINES reset shouldn't reset the display engine registers emulated by gvt-g. This fixes guest warning like [ 14.622026] [drm] Initialized i915 1.6.0 20200114 for 0000:00:03.0 on minor 0 [ 14.967917] fbcon: i915drmfb (fb0) is primary device [ 25.100188] [drm:drm_atomic_helper_wait_for_dependencies [drm_kms_helper]] E RROR [CRTC:51:pipe A] flip_done timed out [ 25.100860] -----------[ cut here ]----------- [ 25.100861] pll on state mismatch (expected 0, found 1) [ 25.101024] WARNING: CPU: 1 PID: 30 at drivers/gpu/drm/i915/display/intel_dis play.c:14382 verify_single_dpll_state.isra.115+0x28f/0x320 [i915] [ 25.101025] Modules linked in: intel_rapl_msr intel_rapl_common kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel i915 aesni_intel cr ypto_simd cryptd glue_helper cec rc_core video drm_kms_helper joydev drm input_l eds i2c_algo_bit serio_raw fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid qemu_fw_cfg sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 e1000 psmouse i2c_piix4 pata_acpi floppy [ 25.101052] CPU: 1 PID: 30 Comm: kworker/u4:1 Not tainted 5.5.0+ #1 [ 25.101053] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1 .12.1-0-ga5cab58 04/01/2014 [ 25.101055] Workqueue: events_unbound async_run_entry_fn [ 25.101092] RIP: 0010:verify_single_dpll_state.isra.115+0x28f/0x320 [i915] [ 25.101093] Code: e0 d9 ff e9 a3 fe ff ff 80 3d e9 c2 11 00 00 44 89 f6 48 c7 c7 c0 9d 88 c0 75 3b e8 eb df d9 ff e9 c7 fe ff ff e8 d1 e0 ae c4 <0f> 0b e9 7a fe ff ff 80 3d c0 c2 11 00 00 8d 71 41 89 c2 48 c7 c7 [ 25.101093] RSP: 0018:ffffb1de80107878 EFLAGS: 00010286 [ 25.101094] RAX: 0000000000000000 RBX: ffffb1de80107884 RCX: 0000000000000007 [ 25.101095] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff94fdfdd19740 [ 25.101095] RBP: ffffb1de80107938 R08: 0000000d6bfdc7b4 R09: 000000000000002b [ 25.101096] R10: ffff94fdf82dc000 R11: 0000000000000225 R12: 00000000000001f8 [ 25.101096] R13: ffff94fdb3ca6a90 R14: ffff94fdb3ca0000 R15: 0000000000000000 [ 25.101097] FS: 0000000000000000(0000) GS:ffff94fdfdd00000(0000) knlGS:00000 00000000000 [ 25.101098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 25.101098] CR2: 00007fbc3e2be9c8 CR3: 000000003339a003 CR4: 0000000000360ee0 [ 25.101101] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 25.101101] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 25.101102] Call Trace: [ 25.101139] intel_atomic_commit_tail+0xde4/0x1520 [i915] [ 25.101141] ? flush_workqueue_prep_pwqs+0xfa/0x130 [ 25.101142] ? flush_workqueue+0x198/0x3c0 [ 25.101174] intel_atomic_commit+0x2ad/0x320 [i915] [ 25.101209] drm_atomic_commit+0x4a/0x50 [drm] [ 25.101220] drm_client_modeset_commit_atomic+0x1c4/0x200 [drm] [ 25.101231] drm_client_modeset_commit_force+0x47/0x170 [drm] [ 25.101250] drm_fb_helper_restore_fbdev_mode_unlocked+0x4e/0xa0 [drm_kms_hel per] [ 25.101255] drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper] [ 25.101287] intel_fbdev_set_par+0x1a/0x40 [i915] [ 25.101289] ? con_is_visible+0x2e/0x60 [ 25.101290] fbcon_init+0x378/0x600 [ 25.101292] visual_init+0xd5/0x130 [ 25.101296] do_bind_con_driver+0x217/0x430 [ 25.101297] do_take_over_console+0x7d/0x1b0 [ 25.101298] do_fbcon_takeover+0x5c/0xb0 [ 25.101299] fbcon_fb_registered+0x199/0x1a0 [ 25.101301] register_framebuffer+0x22c/0x330 [ 25.101306] __drm_fb_helper_initial_config_and_unlock+0x31a/0x520 [drm_kms_h elper] [ 25.101311] drm_fb_helper_initial_config+0x35/0x40 [drm_kms_helper] [ 25.101341] intel_fbdev_initial_config+0x18/0x30 [i915] [ 25.101342] async_run_entry_fn+0x3c/0x150 [ 25.101343] process_one_work+0x1fd/0x3f0 [ 25.101344] worker_thread+0x34/0x410 [ 25.101346] kthread+0x121/0x140 [ 25.101346] ? process_one_work+0x3f0/0x3f0 [ 25.101347] ? kthread_park+0x90/0x90 [ 25.101350] ret_from_fork+0x35/0x40 [ 25.101351] --[ end trace b5b47d44cd998ba1 ]-- Fixes: 6294b61ba769 ("drm/i915/gvt: add missing display part reset for vGPU reset") Signed-off-by: Tina Zhang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200221023234.28635-1-tina.zhang@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 02c61a1ad56a..e9f9063dbf63 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -513,9 +513,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; -- GitLab From ebc406a766d51552ab4a5486d46c5780f0a60a09 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Sat, 1 Feb 2020 14:13:44 +0800 Subject: [PATCH 0349/1278] usb: charger: assign specific number for enum value commit ca4b43c14cd88d28cfc6467d2fa075aad6818f1d upstream. To work properly on every architectures and compilers, the enum value needs to be specific numbers. Suggested-by: Greg KH Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/1580537624-10179-1-git-send-email-peter.chen@nxp.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/charger.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed..ad22079125bf 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ -- GitLab From 35dc93efe51eae9abc7a031d33f139768d685e02 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 2 Mar 2020 13:39:13 -0700 Subject: [PATCH 0350/1278] ecryptfs: Fix up bad backport of fe2e082f5da5b4a0a92ae32978f81507ef37ec66 When doing the 4.9 merge into certain Android trees, I noticed a warning from Android's deprecated GCC 4.9.4, which causes a build failure in those trees due to basically -Werror: fs/ecryptfs/keystore.c: In function 'ecryptfs_parse_packet_set': fs/ecryptfs/keystore.c:1357:2: warning: 'auth_tok_list_item' may be used uninitialized in this function [-Wmaybe-uninitialized] memset(auth_tok_list_item, 0, ^ fs/ecryptfs/keystore.c:1260:38: note: 'auth_tok_list_item' was declared here struct ecryptfs_auth_tok_list_item *auth_tok_list_item; ^ GCC 9.2.0 was not able to pick up this warning when I tested it. Turns out that Clang warns as well when -Wuninitialized is used, which is not the case in older stable trees at the moment (but shows value in potentially backporting the various warning fixes currently in upstream to get more coverage). fs/ecryptfs/keystore.c:1284:6: warning: variable 'auth_tok_list_item' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ecryptfs/keystore.c:1360:4: note: uninitialized use occurs here auth_tok_list_item); ^~~~~~~~~~~~~~~~~~ fs/ecryptfs/keystore.c:1284:2: note: remove the 'if' if its condition is always false if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ecryptfs/keystore.c:1260:56: note: initialize the variable 'auth_tok_list_item' to silence this warning struct ecryptfs_auth_tok_list_item *auth_tok_list_item; ^ = NULL 1 warning generated. Somehow, commit fe2e082f5da5 ("ecryptfs: fix a memory leak bug in parse_tag_1_packet()") upstream was not applied in the correct if block in 4.4.215, 4.9.215, and 4.14.172, which will indeed lead to use of uninitialized memory. Fix it up by undoing the bad backport in those trees then reapplying the patch in the proper location. Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/keystore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3f3ec50bf773..b134315fb69d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n", ECRYPTFS_TAG_1_PACKET_TYPE); rc = -EINVAL; - goto out_free; + goto out; } /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ @@ -1335,7 +1335,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_WARNING "Tag 1 packet contains key larger " "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); rc = -EINVAL; - goto out; + goto out_free; } memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); -- GitLab From 04fb39b7539f2df050efd4972e9f7fe675489f48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Aug 2018 21:57:03 -0700 Subject: [PATCH 0351/1278] include/linux/bitops.h: introduce BITS_PER_TYPE commit 9144d75e22cad3c89e6b2ccab551db9ee28d250a upstream. net_dim.h has a rather useful extension to BITS_PER_BYTE to compute the number of bits in a type (BITS_PER_BYTE * sizeof(T)), so promote the macro to bitops.h, alongside BITS_PER_BYTE, for wider usage. Link: http://lkml.kernel.org/r/20180706094458.14116-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Cc: Randy Dunlap Cc: Andy Gospodarek Cc: David S. Miller Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [only take the bitops.h portion for stable kernels - gregkh] Signed-off-by: Greg Kroah-Hartman --- include/linux/bitops.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b767c7ad65c6..c51574fab0b0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,7 +4,8 @@ #include #include -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); -- GitLab From c05073e1c0f85e7f8de51789b1f61f0373973c40 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 20 Feb 2020 16:42:13 +0200 Subject: [PATCH 0352/1278] net: netlink: cap max groups which will be considered in netlink_bind() commit 3a20773beeeeadec41477a5ba872175b778ff752 upstream. Since nl_groups is a u32 we can't bind more groups via ->bind (netlink_bind) call, but netlink has supported more groups via setsockopt() for a long time and thus nlk->ngroups could be over 32. Recently I added support for per-vlan notifications and increased the groups to 33 for NETLINK_ROUTE which exposed an old bug in the netlink_bind() code causing out-of-bounds access on archs where unsigned long is 32 bits via test_bit() on a local variable. Fix this by capping the maximum groups in netlink_bind() to BITS_PER_TYPE(u32), effectively capping them at 32 which is the minimum of allocated groups and the maximum groups which can be bound via netlink_bind(). CC: Christophe Leroy CC: Richard Guy Briggs Fixes: 4f520900522f ("netlink: have netlink per-protocol bind function return an error code.") Reported-by: Erhard F. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c67abda5d639..07924559cb10 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -997,7 +997,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && groups) { int group; - for (group = 0; group < nlk->ngroups; group++) { + /* nl_groups is a u32, so cap the maximum groups we can bind */ + for (group = 0; group < BITS_PER_TYPE(u32); group++) { if (!test_bit(group, &groups)) continue; err = nlk->netlink_bind(net, group + 1); @@ -1016,7 +1017,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, sk); + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); goto unlock; } } -- GitLab From 31909a6e9d6866dbe6590c4492b99d8875bba2dc Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:56 +0300 Subject: [PATCH 0353/1278] net: atlantic: fix potential error handling commit 380ec5b9af7f0d57dbf6ac067fd9f33cff2fef71 upstream. Code inspection found that in case of mapping error we do return current 'ret' value. But beside error, it is used to count number of descriptors allocated for the packet. In that case map_skb function could return '1'. Changing it to return zero (number of mapped descriptors for skb) Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index a69f5f1ad32a..7a900f76c9ac 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -519,8 +519,10 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; -- GitLab From 713f6323190ca78f656eb8abf87c9d36988995c3 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:49 +0000 Subject: [PATCH 0354/1278] net: ena: make ena rxfh support ETH_RSS_HASH_NO_CHANGE commit 470793a78ce344bd53d31e0c2d537f71ba957547 upstream. As the name suggests ETH_RSS_HASH_NO_CHANGE is received upon changing the key or indirection table using ethtool while keeping the same hash function. Also add a function for retrieving the current hash function from the ena-com layer. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Saeed Bshara Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_com.c | 5 +++++ drivers/net/ethernet/amazon/ena/ena_com.h | 8 ++++++++ drivers/net/ethernet/amazon/ena/ena_ethtool.c | 3 +++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 31e0cf144201..dc9149a32f41 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -861,6 +861,11 @@ static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); } +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev) +{ + return ena_dev->rss.hash_func; +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 90fce5c0ca48..7272fb0d858d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -632,6 +632,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); */ void ena_com_rss_destroy(struct ena_com_dev *ena_dev); +/* ena_com_get_current_hash_function - Get RSS hash function + * @ena_dev: ENA communication layer struct + * + * Return the current hash function. + * @return: 0 or one of the ena_admin_hash_functions values. + */ +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev); + /* ena_com_fill_hash_function - Fill RSS hash function * @ena_dev: ENA communication layer struct * @func: The hash function (Toeplitz or crc) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 22238f25e071..d29e256bf610 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -748,6 +748,9 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, } switch (hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + func = ena_com_get_current_hash_function(ena_dev); + break; case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; -- GitLab From 7345a19c1ac2baac28c765eddd83a71dc296cf30 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 7 Dec 2019 01:13:26 +1100 Subject: [PATCH 0355/1278] namei: only return -ECHILD from follow_dotdot_rcu() commit 2b98149c2377bff12be5dd3ce02ae0506e2dd613 upstream. It's over-zealous to return hard errors under RCU-walk here, given that a REF-walk will be triggered for all other cases handling ".." under RCU. The original purpose of this check was to ensure that if a rename occurs such that a directory is moved outside of the bind-mount which the resolution started in, it would be detected and blocked to avoid being able to mess with paths outside of the bind-mount. However, triggering a new REF-walk is just as effective a solution. Cc: "Eric W. Biederman" Fixes: 397d425dc26d ("vfs: Test for and handle paths that are unreachable from their mnt_root") Suggested-by: Al Viro Signed-off-by: Aleksa Sarai Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index f421f8d80f4d..c5ebe33984f4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1382,7 +1382,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->path.dentry = parent; nd->seq = seq; if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + return -ECHILD; break; } else { struct mount *mnt = real_mount(nd->path.mnt); -- GitLab From 3b30e1920821ac5cd12cd7b993bec601278d9b75 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 6 Dec 2019 11:45:35 -0800 Subject: [PATCH 0356/1278] mwifiex: drop most magic numbers from mwifiex_process_tdls_action_frame() commit 70e5b8f445fd27fde0c5583460e82539a7242424 upstream. Before commit 1e58252e334d ("mwifiex: Fix heap overflow in mmwifiex_process_tdls_action_frame()"), mwifiex_process_tdls_action_frame() already had too many magic numbers. But this commit just added a ton more, in the name of checking for buffer overflows. That seems like a really bad idea. Let's make these magic numbers a little less magic, by (a) factoring out 'pos[1]' as 'ie_len' (b) using 'sizeof' on the appropriate source or destination fields where possible, instead of bare numbers (c) dropping redundant checks, per below. Regarding redundant checks: the beginning of the loop has this: if (pos + 2 + pos[1] > end) break; but then individual 'case's include stuff like this: if (pos > end - 3) return; if (pos[1] != 1) return; Note that the second 'return' (validating the length, pos[1]) combined with the above condition (ensuring 'pos + 2 + length' doesn't exceed 'end'), makes the first 'return' (whose 'if' can be reworded as 'pos > end - pos[1] - 2') redundant. Rather than unwind the magic numbers there, just drop those conditions. Fixes: 1e58252e334d ("mwifiex: Fix heap overflow in mmwifiex_process_tdls_action_frame()") Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/tdls.c | 75 ++++++++------------- 1 file changed, 28 insertions(+), 47 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index b5340af9fa5e..80d20fb6f348 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -897,7 +897,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, *pos, *end; u8 i, action, basic; u16 cap = 0; - int ie_len = 0; + int ies_len = 0; if (len < (sizeof(struct ethhdr) + 3)) return; @@ -919,7 +919,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, pos = buf + sizeof(struct ethhdr) + 4; /* payload 1+ category 1 + action 1 + dialog 1 */ cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; pos += 2; break; @@ -929,7 +929,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, /* payload 1+ category 1 + action 1 + dialog 1 + status code 2*/ pos = buf + sizeof(struct ethhdr) + 6; cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; pos += 2; break; @@ -937,7 +937,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, if (len < (sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN)) return; pos = buf + sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN; - ie_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; break; default: mwifiex_dbg(priv->adapter, ERROR, "Unknown TDLS frame type.\n"); @@ -950,33 +950,33 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->tdls_cap.capab = cpu_to_le16(cap); - for (end = pos + ie_len; pos + 1 < end; pos += 2 + pos[1]) { - if (pos + 2 + pos[1] > end) + for (end = pos + ies_len; pos + 1 < end; pos += 2 + pos[1]) { + u8 ie_len = pos[1]; + + if (pos + 2 + ie_len > end) break; switch (*pos) { case WLAN_EID_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; - sta_ptr->tdls_cap.rates_len = pos[1]; - for (i = 0; i < pos[1]; i++) + sta_ptr->tdls_cap.rates_len = ie_len; + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[i] = pos[i + 2]; break; case WLAN_EID_EXT_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; basic = sta_ptr->tdls_cap.rates_len; - if (pos[1] > 32 - basic) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates) - basic) return; - for (i = 0; i < pos[1]; i++) + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2]; - sta_ptr->tdls_cap.rates_len += pos[1]; + sta_ptr->tdls_cap.rates_len += ie_len; break; case WLAN_EID_HT_CAPABILITY: - if (pos > end - sizeof(struct ieee80211_ht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_cap)) + if (ie_len != sizeof(struct ieee80211_ht_cap)) return; /* copy the ie's value into ht_capb*/ memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2, @@ -984,59 +984,45 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->is_11n_enabled = 1; break; case WLAN_EID_HT_OPERATION: - if (pos > end - - sizeof(struct ieee80211_ht_operation) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_operation)) + if (ie_len != sizeof(struct ieee80211_ht_operation)) return; /* copy the ie's value into ht_oper*/ memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2, sizeof(struct ieee80211_ht_operation)); break; case WLAN_EID_BSS_COEX_2040: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.coex_2040 = pos[2]; break; case WLAN_EID_EXT_CAPABILITY: - if (pos > end - sizeof(struct ieee_types_header)) - return; - if (pos[1] < sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] > 8) + if (ie_len > 8) return; memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], 8)); + min_t(u8, ie_len, 8)); break; case WLAN_EID_RSN: - if (pos > end - sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] < sizeof(struct ieee_types_header)) - return; - if (pos[1] > IEEE_MAX_IE_SIZE - + if (ie_len > IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header)) return; memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], IEEE_MAX_IE_SIZE - + min_t(u8, ie_len, IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header))); break; case WLAN_EID_QOS_CAPA: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.qos_info = pos[2]; break; case WLAN_EID_VHT_OPERATION: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_operation) - 2) - return; - if (pos[1] != + if (ie_len != sizeof(struct ieee80211_vht_operation)) return; /* copy the ie's value into vhtoper*/ @@ -1046,10 +1032,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_VHT_CAPABILITY: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_vht_cap)) + if (ie_len != sizeof(struct ieee80211_vht_cap)) return; /* copy the ie's value into vhtcap*/ memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2, @@ -1059,9 +1042,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_AID: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - 4) - return; - if (pos[1] != 2) + if (ie_len != sizeof(u16)) return; sta_ptr->tdls_cap.aid = get_unaligned_le16((pos + 2)); -- GitLab From e12afc7d76d63eabadc444f620bcf6be4306f0d6 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Jan 2020 17:42:16 -0600 Subject: [PATCH 0357/1278] KVM: SVM: Override default MMIO mask if memory encryption is enabled commit 52918ed5fcf05d97d257f4131e19479da18f5d16 upstream. The KVM MMIO support uses bit 51 as the reserved bit to cause nested page faults when a guest performs MMIO. The AMD memory encryption support uses a CPUID function to define the encryption bit position. Given this, it is possible that these bits can conflict. Use svm_hardware_setup() to override the MMIO mask if memory encryption support is enabled. Various checks are performed to ensure that the mask is properly defined and rsvd_bits() is used to generate the new mask (as was done prior to the change that necessitated this patch). Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs") Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8e65a9b40c18..d63621386418 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1088,6 +1088,47 @@ static int avic_ga_log_notifier(u32 ga_tag) return 0; } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1123,6 +1164,8 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) -- GitLab From 0bdc63911545438223d5e44f869e3b1d9981a08b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 9 Jan 2020 15:56:18 -0800 Subject: [PATCH 0358/1278] KVM: Check for a bad hva before dropping into the ghc slow path commit fcfbc617547fc6d9552cb6c1c563b6a90ee98085 upstream. When reading/writing using the guest/host cache, check for a bad hva before checking for a NULL memslot, which triggers the slow path for handing cross-page accesses. Because the memslot is nullified on error by __kvm_gfn_to_hva_cache_init(), if the bad hva is encountered after crossing into a new page, then the kvm_{read,write}_guest() slow path could potentially write/access the first chunk prior to detecting the bad hva. Arguably, performing a partial access is semantically correct from an architectural perspective, but that behavior is certainly not intended. In the original implementation, memslot was not explicitly nullified and therefore the partial access behavior varied based on whether the memslot itself was null, or if the hva was simply bad. The current behavior was introduced as a seemingly unintentional side effect in commit f1b9dd5eb86c ("kvm: Disallow wraparound in kvm_gfn_to_hva_cache_init"), which justified the change with "since some callers don't check the return code from this function, it sit seems prudent to clear ghc->memslot in the event of an error". Regardless of intent, the partial access is dependent on _not_ checking the result of the cache initialization, which is arguably a bug in its own right, at best simply weird. Fixes: 8f964525a121 ("KVM: Allow cross page reads and writes from cached translations.") Cc: Jim Mattson Cc: Andrew Honig Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 745ee09083dd..71f77ae6c2a6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2027,12 +2027,12 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2060,12 +2060,12 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT; -- GitLab From 380a732d1792ab887669f6b5ab438ccad6cc1adb Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 22 May 2018 14:21:04 +0800 Subject: [PATCH 0359/1278] tuntap: correctly set SOCKWQ_ASYNC_NOSPACE commit 2f3ab6221e4c87960347d65c7cab9bd917d1f637 upstream. When link is down, writes to the device might fail with -EIO. Userspace needs an indication when the status is resolved. As a fix, tun_net_open() attempts to wake up writers - but that is only effective if SOCKWQ_ASYNC_NOSPACE has been set in the past. This is not the case of vhost_net which only poll for EPOLLOUT after it meets errors during sendmsg(). This patch fixes this by making sure SOCKWQ_ASYNC_NOSPACE is set when socket is not writable or device is down to guarantee EPOLLOUT will be raised in either tun_chr_poll() or tun_sock_write_space() after device is up. Cc: Hannes Frederic Sowa Cc: Eric Dumazet Fixes: 1bd4978a88ac2 ("tun: honor IFF_UP in tun_get_user()") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala --- drivers/net/tun.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3086211829a7..ba34f61d70de 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1134,6 +1134,13 @@ static void tun_net_init(struct net_device *dev) dev->max_mtu = MAX_MTU - dev->hard_header_len; } +static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) +{ + struct sock *sk = tfile->socket.sk; + + return (tun->dev->flags & IFF_UP) && sock_writeable(sk); +} + /* Character device part */ /* Poll */ @@ -1156,10 +1163,14 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) + /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to + * guarantee EPOLLOUT to be raised by either here or + * tun_sock_write_space(). Then process could get notification + * after it writes to a down device and meets -EIO. + */ + if (tun_sock_writeable(tun, tfile) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + tun_sock_writeable(tun, tfile))) mask |= POLLOUT | POLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) -- GitLab From cf66af9d9305c3caead5446ced6ad7af762e3e86 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Jan 2020 11:44:29 +0100 Subject: [PATCH 0360/1278] drivers: net: xgene: Fix the order of the arguments of 'alloc_etherdev_mqs()' commit 5a44c71ccda60a50073c5d7fe3f694cdfa3ab0c2 upstream. 'alloc_etherdev_mqs()' expects first 'tx', then 'rx'. The semantic here looks reversed. Reorder the arguments passed to 'alloc_etherdev_mqs()' in order to keep the correct semantic. In fact, this is a no-op because both XGENE_NUM_[RT]X_RING are 8. Fixes: 107dec2749fe ("drivers: net: xgene: Add support for multiple queues") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 50dd6bf176d0..3a489b2b99c9 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2034,7 +2034,7 @@ static int xgene_enet_probe(struct platform_device *pdev) int ret; ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), - XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); + XGENE_NUM_TX_RING, XGENE_NUM_RX_RING); if (!ndev) return -ENOMEM; -- GitLab From b996b668dafbff61a630be55479a2f9d5c5a5b19 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 27 Nov 2019 14:57:04 +0900 Subject: [PATCH 0361/1278] kprobes: Set unoptimized flag after unoptimizing code commit f66c0447cca1281116224d474cdb37d6a18e4b5b upstream. Set the unoptimized flag after confirming the code is completely unoptimized. Without this fix, when a kprobe hits the intermediate modified instruction (the first byte is replaced by an INT3, but later bytes can still be a jump address operand) while unoptimizing, it can return to the middle byte of the modified code, which causes an invalid instruction exception in the kernel. Usually, this is a rare case, but if we put a probe on the function call while text patching, it always causes a kernel panic as below: # echo p text_poke+5 > kprobe_events # echo 1 > events/kprobes/enable # echo 0 > events/kprobes/enable invalid opcode: 0000 [#1] PREEMPT SMP PTI RIP: 0010:text_poke+0x9/0x50 Call Trace: arch_unoptimize_kprobe+0x22/0x28 arch_unoptimize_kprobes+0x39/0x87 kprobe_optimizer+0x6e/0x290 process_one_work+0x2a0/0x610 worker_thread+0x28/0x3d0 ? process_one_work+0x610/0x610 kthread+0x10d/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x3a/0x50 text_poke() is used for patching the code in optprobes. This can happen even if we blacklist text_poke() and other functions, because there is a small time window during which we show the intermediate code to other CPUs. [ mingo: Edited the changelog. ] Tested-by: Alexei Starovoitov Signed-off-by: Masami Hiramatsu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: bristot@redhat.com Fixes: 6274de4984a6 ("kprobes: Support delayed unoptimizing") Link: https://lkml.kernel.org/r/157483422375.25881.13508326028469515760.stgit@devnote2 Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d0fe20a5475f..48bf93bbb22e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -523,6 +523,8 @@ static void do_unoptimize_kprobes(void) arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { + /* Switching from detour code to origin */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; /* Disarm probes if marked disabled */ if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); @@ -662,6 +664,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -689,7 +692,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; } - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (!list_empty(&op->list)) { /* Dequeue from the optimization queue */ list_del_init(&op->list); -- GitLab From 28a5ca261d1140b4a22da701e9ab59c8aa81adbf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Dec 2019 13:22:33 -0300 Subject: [PATCH 0362/1278] perf hists browser: Restore ESC as "Zoom out" of DSO/thread/etc commit 3f7774033e6820d25beee5cf7aefa11d4968b951 upstream. We need to set actions->ms.map since 599a2f38a989 ("perf hists browser: Check sort keys before hot key actions"), as in that patch we bail out if map is NULL. Reviewed-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Fixes: 599a2f38a989 ("perf hists browser: Check sort keys before hot key actions") Link: https://lkml.kernel.org/n/tip-wp1ssoewy6zihwwexqpohv0j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/ui/browsers/hists.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 628ad5f7eddb..49a87fb64156 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3142,6 +3142,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } + actions->ms.map = map; top = pstack__peek(browser->pstack); if (top == &browser->hists->dso_filter) { /* -- GitLab From b200a5dded6fc266cbcf79ade856ea69e3633817 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 30 Jan 2020 22:14:29 -0800 Subject: [PATCH 0363/1278] mm/huge_memory.c: use head to check huge zero page commit cb829624867b5ab10bc6a7036d183b1b82bfe9f8 upstream. The page could be a tail page, if this is the case, this BUG_ON will never be triggered. Link: http://lkml.kernel.org/r/20200110032610.26499-1-richardw.yang@linux.intel.com Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()") Signed-off-by: Wei Yang Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92915cc87549..98fb0b4be7b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2561,7 +2561,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) unsigned long flags; pgoff_t end; - VM_BUG_ON_PAGE(is_huge_zero_page(page), page); + VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); -- GitLab From 2630ea39bca2aa6c05f795035cf17e338178bbaf Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 30 Jan 2020 22:14:48 -0800 Subject: [PATCH 0364/1278] mm, thp: fix defrag setting if newline is not used commit f42f25526502d851d0e3ca1e46297da8aafce8a7 upstream. If thp defrag setting "defer" is used and a newline is *not* used when writing to the sysfs file, this is interpreted as the "defer+madvise" option. This is because we do prefix matching and if five characters are written without a newline, the current code ends up comparing to the first five bytes of the "defer+madvise" option and using that instead. Use the more appropriate sysfs_streq() that handles the trailing newline for us. Since this doubles as a nice cleanup, do it in enabled_store() as well. The current implementation relies on prefix matching: the number of bytes compared is either the number of bytes written or the length of the option being compared. With a newline, "defer\n" does not match "defer+"madvise"; without a newline, however, "defer" is considered to match "defer+madvise" (prefix matching is only comparing the first five bytes). End result is that writing "defer" is broken unless it has an additional trailing character. This means that writing "madv" in the past would match and set "madvise". With strict checking, that no longer is the case but it is unlikely anybody is currently doing this. Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2001171411020.56385@chino.kir.corp.google.com Fixes: 21440d7eb904 ("mm, thp: add new defer+madvise defrag option") Signed-off-by: David Rientjes Suggested-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 98fb0b4be7b1..283963032eff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -163,16 +163,13 @@ static ssize_t enabled_store(struct kobject *kobj, { ssize_t ret = count; - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); } else @@ -236,32 +233,27 @@ static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer+madvise", buf, - min(sizeof("defer+madvise")-1, count))) { + } else if (sysfs_streq(buf, "defer+madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { + } else if (sysfs_streq(buf, "defer")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); -- GitLab From 0abb54ae42e7b3b11ff044c7c59e4d7ca1eef7e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Mar 2020 15:48:03 +0100 Subject: [PATCH 0365/1278] Revert "char/random: silence a lockdep splat with printk()" This reverts commit 28820c5802f9f83c655ab09ccae8289103ce1490 which is commit 1b710b1b10eff9d46666064ea25f079f70bc67a8 upstream. It causes problems here just like it did in 4.19.y and odds are it will be reverted upstream as well. Reported-by: Guenter Roeck Cc: Sergey Senozhatsky Cc: Qian Cai Cc: Theodore Ts'o Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 50d5846acf48..e6efa07e9f9e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1598,9 +1598,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS " - "with crng_init=%d\n", func_name, caller, - crng_init); + pr_notice("random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); } /* -- GitLab From c7cba03b2bdced33715a7167cb9c5c8733cd31c3 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 24 Feb 2020 16:38:57 -0500 Subject: [PATCH 0366/1278] audit: always check the netlink payload length in audit_receive_msg() [ Upstream commit 756125289285f6e55a03861bf4b6257aa3d19a93 ] This patch ensures that we always check the netlink payload length in audit_receive_msg() before we take any action on the payload itself. Cc: stable@vger.kernel.org Reported-by: syzbot+399c44bf1f43b8747403@syzkaller.appspotmail.com Reported-by: syzbot+e4b12d8d202701f08b6d@syzkaller.appspotmail.com Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- kernel/audit.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index d301276bca58..b21a8910f765 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1067,13 +1067,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature audit_log_end(ab); } -static int audit_set_feature(struct sk_buff *skb) +static int audit_set_feature(struct audit_features *uaf) { - struct audit_features *uaf; int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); - uaf = nlmsg_data(nlmsg_hdr(skb)); /* if there is ever a version 2 we should handle that here */ @@ -1141,6 +1139,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; void *data; + int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; @@ -1154,6 +1153,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); + data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { @@ -1177,7 +1177,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) @@ -1281,7 +1281,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; break; case AUDIT_SET_FEATURE: - err = audit_set_feature(skb); + if (data_len < sizeof(struct audit_features)) + return -EINVAL; + err = audit_set_feature(data); if (err) return err; break; @@ -1293,6 +1295,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ + char *str = data; + err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); @@ -1300,26 +1304,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } audit_log_common_recv_msg(&ab, msg_type); - if (msg_type != AUDIT_USER_TTY) + if (msg_type != AUDIT_USER_TTY) { + /* ensure NULL termination */ + str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, - (char *)data); - else { - int size; - + str); + } else { audit_log_format(ab, " data="); - size = nlmsg_len(nlh); - if (size > 0 && - ((unsigned char *)data)[size - 1] == '\0') - size--; - audit_log_n_untrustedstring(ab, data, size); + if (data_len > 0 && str[data_len - 1] == '\0') + data_len--; + audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: - if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) + if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE); @@ -1327,7 +1329,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1341,7 +1343,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; - size_t msglen = nlmsg_len(nlh); + size_t msglen = data_len; char *old, *new; err = -EINVAL; @@ -1417,7 +1419,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) -- GitLab From ff8e12b0cfe277a54edbab525f068b39c7ed0de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 5 Mar 2020 17:30:05 +0100 Subject: [PATCH 0367/1278] vhost: Check docket sk_family instead of call getname MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 42d84c8490f9f0931786f1623191fcab397c3d64 upstream. Doing so, we save one call to get data we already have in the struct. Also, since there is no guarantee that getname use sockaddr_ll parameter beyond its size, we add a little bit of security here. It should do not do beyond MAX_ADDR_LEN, but syzbot found that ax25_getname writes more (72 bytes, the size of full_sockaddr_ax25, versus 20 + 32 bytes of sockaddr_ll + MAX_ADDR_LEN in syzbot repro). Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Reported-by: syzbot+f2a62d07a5198c819c7b@syzkaller.appspotmail.com Signed-off-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman [jwang: backport to 4.14] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- drivers/vhost/net.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4d11152e60c1..8fe07622ae59 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1025,11 +1025,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) static struct socket *get_raw_socket(int fd) { - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; - int uaddr_len = sizeof uaddr, r; + int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) @@ -1041,12 +1037,7 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, - &uaddr_len, 0); - if (r) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { + if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } -- GitLab From 7a4139ccd2ffe87c5125eb476b57c3db1b7b70d1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 5 Mar 2020 17:30:06 +0100 Subject: [PATCH 0368/1278] x86/mce: Handle varying MCA bank counts [ Upstream commit 006c077041dc73b9490fffc4c6af5befe0687110 ] Linux reads MCG_CAP[Count] to find the number of MCA banks visible to a CPU. Currently, this number is the same for all CPUs and a warning is shown if there is a difference. The number of banks is overwritten with the MCG_CAP[Count] value of each following CPU that boots. According to the Intel SDM and AMD APM, the MCG_CAP[Count] value gives the number of banks that are available to a "processor implementation". The AMD BKDGs/PPRs further clarify that this value is per core. This value has historically been the same for every core in the system, but that is not an architectural requirement. Future AMD systems may have different MCG_CAP[Count] values per core, so the assumption that all CPUs will have the same MCG_CAP[Count] value will no longer be valid. Also, the first CPU to boot will allocate the struct mce_banks[] array using the number of banks based on its MCG_CAP[Count] value. The machine check handler and other functions use the global number of banks to iterate and index into the mce_banks[] array. So it's possible to use an out-of-bounds index on an asymmetric system where a following CPU sees a MCG_CAP[Count] value greater than its predecessors. Thus, allocate the mce_banks[] array to the maximum number of banks. This will avoid the potential out-of-bounds index since the value of mca_cfg.banks is capped to MAX_NR_BANKS. Set the value of mca_cfg.banks equal to the max of the previous value and the value for the current CPU. This way mca_cfg.banks will always represent the max number of banks detected on any CPU in the system. This will ensure that all CPUs will access all the banks that are visible to them. A CPU that can access fewer than the max number of banks will find the registers of the extra banks to be read-as-zero. Furthermore, print the resulting number of MCA banks in use. Do this in mcheck_late_init() so that the final value is printed after all CPUs have been initialized. Finally, get bank count from target CPU when doing injection with mce-inject module. [ bp: Remove out-of-bounds example, passify and cleanup commit message. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Pu Wen Cc: Thomas Gleixner Cc: Tony Luck Cc: Vishal Verma Cc: x86-ml Link: https://lkml.kernel.org/r/20180727214009.78289-1-Yazen.Ghannam@amd.com Signed-off-by: Sasha Levin [jwang: cherry-pick to fix boot warning in arch/x86/kernel/cpu/mcheck/mce.c:1549 in epyc rome server] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 14 +++++++------- arch/x86/kernel/cpu/mcheck/mce.c | 22 +++++++--------------- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index f12141ba9a76..e57b59762f9f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -570,9 +568,15 @@ static void do_inject(void) static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } @@ -665,10 +669,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0b0e44f85393..95c09db1bba2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1499,13 +1499,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1519,28 +1518,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -2470,6 +2460,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); -- GitLab From 5b5295b5c60d6048db2112f4bb691c9cf97631f0 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 5 Mar 2020 17:30:07 +0100 Subject: [PATCH 0369/1278] EDAC/amd64: Set grain per DIMM [ Upstream commit 466503d6b1b33be46ab87c6090f0ade6c6011cbc ] The following commit introduced a warning on error reports without a non-zero grain value. 3724ace582d9 ("EDAC/mc: Fix grain_bits calculation") The amd64_edac_mod module does not provide a value, so the warning will be given on the first reported memory error. Set the grain per DIMM to cacheline size (64 bytes). This is the current recommendation. Fixes: 3724ace582d9 ("EDAC/mc: Fix grain_bits calculation") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Link: https://lkml.kernel.org/r/20191022203448.13962-7-Yazen.Ghannam@amd.com [jwang: backport to 4.14 for fix warning during memory error. ] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- drivers/edac/amd64_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 40fb0e7ff8fd..b36abd253786 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2863,6 +2863,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } -- GitLab From e1e85c041e00897bfa2e68d46971177f4c0df4f2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Feb 2020 15:56:32 -0800 Subject: [PATCH 0370/1278] net: dsa: bcm_sf2: Forcibly configure IMP port for 1Gb/sec [ Upstream commit 98c5f7d44fef309e692c24c6d71131ee0f0871fb ] We are still experiencing some packet loss with the existing advanced congestion buffering (ACB) settings with the IMP port configured for 2Gb/sec, so revert to conservative link speeds that do not produce packet loss until this is resolved. Fixes: 8f1880cbe8d0 ("net: dsa: bcm_sf2: Configure IMP port for 2Gb/sec") Fixes: de34d7084edd ("net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/bcm_sf2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 747062f04bb5..6bca42e34a53 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -138,8 +138,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); reg |= (MII_SW_OR | LINK_STS); - if (priv->type == BCM7278_DEVICE_ID) - reg |= GMII_SPEED_UP_2G; + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ -- GitLab From c18a10efb04a7166831383b168c64d158425c54d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Feb 2020 14:57:28 +0200 Subject: [PATCH 0371/1278] RDMA/core: Fix pkey and port assignment in get_new_pps [ Upstream commit 801b67f3eaafd3f2ec8b65d93142d4ffedba85df ] When port is part of the modify mask, then we should take it from the qp_attr and not from the old pps. Same for PKEY. Otherwise there are panics in some configurations: RIP: 0010:get_pkey_idx_qp_list+0x50/0x80 [ib_core] Code: c7 18 e8 13 04 30 ef 0f b6 43 06 48 69 c0 b8 00 00 00 48 03 85 a0 04 00 00 48 8b 50 20 48 8d 48 20 48 39 ca 74 1a 0f b7 73 04 <66> 39 72 10 75 08 eb 10 66 39 72 10 74 0a 48 8b 12 48 39 ca 75 f2 RSP: 0018:ffffafb3480932f0 EFLAGS: 00010203 RAX: ffff98059ababa10 RBX: ffff980d926e8cc0 RCX: ffff98059ababa30 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff98059ababa28 RBP: ffff98059b940000 R08: 00000000000310c0 R09: ffff97fe47c07480 R10: 0000000000000036 R11: 0000000000000200 R12: 0000000000000071 R13: ffff98059b940000 R14: ffff980d87f948a0 R15: 0000000000000000 FS: 00007f88deb31740(0000) GS:ffff98059f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000853e26001 CR4: 00000000001606e0 Call Trace: port_pkey_list_insert+0x3d/0x1b0 [ib_core] ? kmem_cache_alloc_trace+0x215/0x220 ib_security_modify_qp+0x226/0x3a0 [ib_core] _ib_modify_qp+0xcf/0x390 [ib_core] ipoib_init_qp+0x7f/0x200 [ib_ipoib] ? rvt_modify_port+0xd0/0xd0 [rdmavt] ? ib_find_pkey+0x99/0xf0 [ib_core] ipoib_ib_dev_open_default+0x1a/0x200 [ib_ipoib] ipoib_ib_dev_open+0x96/0x130 [ib_ipoib] ipoib_open+0x44/0x130 [ib_ipoib] __dev_open+0xd1/0x160 __dev_change_flags+0x1ab/0x1f0 dev_change_flags+0x23/0x60 do_setlink+0x328/0xe30 ? __nla_validate_parse+0x54/0x900 __rtnl_newlink+0x54e/0x810 ? __alloc_pages_nodemask+0x17d/0x320 ? page_fault+0x30/0x50 ? _cond_resched+0x15/0x30 ? kmem_cache_alloc_trace+0x1c8/0x220 rtnl_newlink+0x43/0x60 rtnetlink_rcv_msg+0x28f/0x350 ? kmem_cache_alloc+0x1fb/0x200 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x24d/0x2d0 ? rtnl_calcit.isra.31+0x120/0x120 netlink_rcv_skb+0xcb/0x100 netlink_unicast+0x1e0/0x340 netlink_sendmsg+0x317/0x480 ? __check_object_size+0x48/0x1d0 sock_sendmsg+0x65/0x80 ____sys_sendmsg+0x223/0x260 ? copy_msghdr_from_user+0xdc/0x140 ___sys_sendmsg+0x7c/0xc0 ? skb_dequeue+0x57/0x70 ? __inode_wait_for_writeback+0x75/0xe0 ? fsnotify_grab_connector+0x45/0x80 ? __dentry_kill+0x12c/0x180 __sys_sendmsg+0x58/0xa0 do_syscall_64+0x5b/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f88de467f10 Link: https://lore.kernel.org/r/20200227125728.100551-1-leon@kernel.org Cc: Fixes: 1dd017882e01 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list") Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Tested-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/security.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 9b8276691329..61aff69e9f67 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -339,11 +339,15 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, return NULL; if (qp_attr_mask & IB_QP_PORT) - new_pps->main.port_num = - (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) + new_pps->main.port_num = qp_pps->main.port_num; + if (qp_attr_mask & IB_QP_PKEY_INDEX) - new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : - qp_attr->pkey_index; + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) + new_pps->main.pkey_index = qp_pps->main.pkey_index; + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; -- GitLab From b2c775307de06372dd2f6d3b66386e434a66f8d2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 17 Feb 2020 13:43:18 -0700 Subject: [PATCH 0372/1278] RDMA/core: Fix use of logical OR in get_new_pps [ Upstream commit 4ca501d6aaf21de31541deac35128bbea8427aa6 ] Clang warns: ../drivers/infiniband/core/security.c:351:41: warning: converting the enum constant to a boolean [-Wint-in-bool-context] if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { ^ 1 warning generated. A bitwise OR should have been used instead. Fixes: 1dd017882e01 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list") Link: https://lore.kernel.org/r/20200217204318.13609-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/889 Reported-by: Dan Carpenter Signed-off-by: Nathan Chancellor Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 61aff69e9f67..ce8e3009344a 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -351,7 +351,7 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) -- GitLab From 53647b8201accbde651a54e65a2c8c66892a35da Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 Jan 2020 23:42:24 +0900 Subject: [PATCH 0373/1278] kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic [ Upstream commit e4add247789e4ba5e08ad8256183ce2e211877d4 ] optimize_kprobe() and unoptimize_kprobe() cancels if a given kprobe is on the optimizing_list or unoptimizing_list already. However, since the following commit: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") modified the update timing of the KPROBE_FLAG_OPTIMIZED, it doesn't work as expected anymore. The optimized_kprobe could be in the following states: - [optimizing]: Before inserting jump instruction op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. - [optimized]: jump inserted op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is empty. - [unoptimizing]: Before removing jump instruction (including unused optprobe) op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. - [unoptimized]: jump removed op.kp->flags doesn't have KPROBE_FLAG_OPTIMIZED and op->list is empty. Current code mis-expects [unoptimizing] state doesn't have KPROBE_FLAG_OPTIMIZED, and that can cause incorrect results. To fix this, introduce optprobe_queued_unopt() to distinguish [optimizing] and [unoptimizing] states and fixes the logic in optimize_kprobe() and unoptimize_kprobe(). [ mingo: Cleaned up the changelog and the code a bit. ] Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt (VMware) Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bristot@redhat.com Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Link: https://lkml.kernel.org/r/157840814418.7181.13478003006386303481.stgit@devnote2 Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/kprobes.c | 67 +++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 48bf93bbb22e..66f1818d4762 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -625,6 +625,18 @@ void wait_for_kprobe_optimizer(void) mutex_unlock(&kprobe_mutex); } +static bool optprobe_queued_unopt(struct optimized_kprobe *op) +{ + struct optimized_kprobe *_op; + + list_for_each_entry(_op, &unoptimizing_list, list) { + if (op == _op) + return true; + } + + return false; +} + /* Optimize kprobe if p is ready to be optimized */ static void optimize_kprobe(struct kprobe *p) { @@ -646,17 +658,21 @@ static void optimize_kprobe(struct kprobe *p) return; /* Check if it is already optimized. */ - if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) + if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { + if (optprobe_queued_unopt(op)) { + /* This is under unoptimizing. Just dequeue the probe */ + list_del_init(&op->list); + } return; + } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) - /* This is under unoptimizing. Just dequeue the probe */ - list_del_init(&op->list); - else { - list_add(&op->list, &optimizing_list); - kick_kprobe_optimizer(); - } + /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ + if (WARN_ON_ONCE(!list_empty(&op->list))) + return; + + list_add(&op->list, &optimizing_list); + kick_kprobe_optimizer(); } /* Short cut to direct unoptimizing */ @@ -678,30 +694,33 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; /* This is not an optprobe nor optimized */ op = container_of(p, struct optimized_kprobe, kp); - if (!kprobe_optimized(p)) { - /* Unoptimized or unoptimizing case */ - if (force && !list_empty(&op->list)) { - /* - * Only if this is unoptimizing kprobe and forced, - * forcibly unoptimize it. (No need to unoptimize - * unoptimized kprobe again :) - */ - list_del_init(&op->list); - force_unoptimize_kprobe(op); - } + if (!kprobe_optimized(p)) return; - } if (!list_empty(&op->list)) { - /* Dequeue from the optimization queue */ - list_del_init(&op->list); + if (optprobe_queued_unopt(op)) { + /* Queued in unoptimizing queue */ + if (force) { + /* + * Forcibly unoptimize the kprobe here, and queue it + * in the freeing list for release afterwards. + */ + force_unoptimize_kprobe(op); + list_move(&op->list, &freeing_list); + } + } else { + /* Dequeue from the optimizing queue */ + list_del_init(&op->list); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + } return; } + /* Optimized kprobe case */ - if (force) + if (force) { /* Forcibly update the code: this is a special case */ force_unoptimize_kprobe(op); - else { + } else { list_add(&op->list, &unoptimizing_list); kick_kprobe_optimizer(); } -- GitLab From 6d53f29dd99bde2cf093f4246a6f602f4507f552 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Fri, 7 Feb 2020 11:53:35 +0200 Subject: [PATCH 0374/1278] serial: ar933x_uart: set UART_CS_{RX,TX}_READY_ORIDE [ Upstream commit 87c5cbf71ecbb9e289d60a2df22eb686c70bf196 ] On AR934x this UART is usually not initialized by the bootloader as it is only used as a secondary serial port while the primary UART is a newly introduced NS16550-compatible. In order to make use of the ar933x-uart on AR934x without RTS/CTS hardware flow control, one needs to set the UART_CS_{RX,TX}_READY_ORIDE bits as other than on AR933x where this UART is used as primary/console, the bootloader on AR934x typically doesn't set those bits. Setting them explicitely on AR933x should not do any harm, so just set them unconditionally. Tested-by: Chuanhong Guo Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/20200207095335.GA179836@makrotopia.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/ar933x_uart.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index ed545a61413c..ac56a5131a9c 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -289,6 +289,10 @@ static void ar933x_uart_set_termios(struct uart_port *port, ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* reenable the UART */ ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, @@ -421,6 +425,10 @@ static int ar933x_uart_startup(struct uart_port *port) ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* Enable RX interrupts */ up->ier = AR933X_UART_INT_RX_VALID; ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier); -- GitLab From dd3a97bd15ea278c8347a9a68c5bb6b3355b1240 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Feb 2020 09:40:52 +0100 Subject: [PATCH 0375/1278] selftests: fix too long argument [ Upstream commit c363eb48ada5cf732b3f489fab799fc881097842 ] With some shells, the command construed for install of bpf selftests becomes too large due to long list of files: make[1]: execvp: /bin/sh: Argument list too long make[1]: *** [../lib.mk:73: install] Error 127 Currently, each of the file lists is replicated three times in the command: in the shell 'if' condition, in the 'echo' and in the 'rsync'. Reduce that by one instance by using make conditionals and separate the echo and rsync into two shell commands. (One would be inclined to just remove the '@' at the beginning of the rsync command and let 'make' echo it by itself; unfortunately, it appears that the '@' in the front of mkdir silences output also for the following commands.) Also, separate handling of each of the lists to its own shell command. The semantics of the makefile is unchanged before and after the patch. The ability of individual test directories to override INSTALL_RULE is retained. Reported-by: Yauheni Kaliuta Tested-by: Yauheni Kaliuta Signed-off-by: Jiri Benc Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/lib.mk | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba39..c9be64dc681d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -54,17 +54,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all -- GitLab From e2dc64fa1df7aa5912e07157ad6f15734d25666d Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 30 Jan 2020 19:10:36 -0800 Subject: [PATCH 0376/1278] usb: gadget: composite: Support more than 500mA MaxPower [ Upstream commit a2035411fa1d1206cea7d5dfe833e78481844a76 ] USB 3.x SuperSpeed peripherals can draw up to 900mA of VBUS power when in configured state. However, if a configuration wanting to take advantage of this is added with MaxPower greater than 500 (currently possible if using a ConfigFS gadget) the composite driver fails to accommodate this for a couple reasons: - usb_gadget_vbus_draw() when called from set_config() and composite_resume() will be passed the MaxPower value without regard for the current connection speed, resulting in a violation for USB 2.0 since the max is 500mA. - the bMaxPower of the configuration descriptor would be incorrectly encoded, again if the connection speed is only at USB 2.0 or below, likely wrapping around U8_MAX since the 2mA multiplier corresponds to a maximum of 510mA. Fix these by adding checks against the current gadget->speed when the c->MaxPower value is used (set_config() and composite_resume()) and appropriately limit based on whether it is currently at a low-/full-/high- or super-speed connection. Because 900 is not divisible by 8, with the round-up division currently used in encode_bMaxPower() a MaxPower of 900mA will result in an encoded value of 0x71. When a host stack (including Linux and Windows) enumerates this on a single port root hub, it reads this value back and decodes (multiplies by 8) to get 904mA which is strictly greater than 900mA that is typically budgeted for that port, causing it to reject the configuration. Instead, we should be using the round-down behavior of normal integral division so that 900 / 8 -> 0x70 or 896mA to stay within range. And we might as well change it for the high/full/low case as well for consistency. N.B. USB 3.2 Gen N x 2 allows for up to 1500mA but there doesn't seem to be any any peripheral controller supported by Linux that does two lane operation, so for now keeping the clamp at 900 should be fine. Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/composite.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 6e30b177aa22..5a4cf779b269 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -441,9 +441,13 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, if (!val) return 0; if (speed < USB_SPEED_SUPER) - return DIV_ROUND_UP(val, 2); + return min(val, 500U) / 2; else - return DIV_ROUND_UP(val, 8); + /* + * USB 3.x supports up to 900mA, but since 900 isn't divisible + * by 8 the integral division will effectively cap to 896mA. + */ + return min(val, 900U) / 8; } static int config_buf(struct usb_configuration *config, @@ -841,6 +845,10 @@ static int set_config(struct usb_composite_dev *cdev, /* when we return, be sure our power usage is valid */ power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + power = min(power, 500U); + else + power = min(power, 900U); done: usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2280,7 +2288,7 @@ void composite_resume(struct usb_gadget *gadget) { struct usb_composite_dev *cdev = get_gadget_data(gadget); struct usb_function *f; - u16 maxpower; + unsigned maxpower; /* REVISIT: should we have config level * suspend/resume callbacks? @@ -2294,10 +2302,14 @@ void composite_resume(struct usb_gadget *gadget) f->resume(f); } - maxpower = cdev->config->MaxPower; + maxpower = cdev->config->MaxPower ? + cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + maxpower = min(maxpower, 500U); + else + maxpower = min(maxpower, 900U); - usb_gadget_vbus_draw(gadget, maxpower ? - maxpower : CONFIG_USB_GADGET_VBUS_DRAW); + usb_gadget_vbus_draw(gadget, maxpower); } cdev->suspended = 0; -- GitLab From e476b55da6d22cb29ac50f5b585a16d37854c312 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 16 Jan 2020 15:29:01 +0200 Subject: [PATCH 0377/1278] usb: gadget: ffs: ffs_aio_cancel(): Save/restore IRQ flags [ Upstream commit 43d565727a3a6fd24e37c7c2116475106af71806 ] ffs_aio_cancel() can be called from both interrupt and thread context. Make sure that the current IRQ state is saved and restored by using spin_{un,}lock_irq{save,restore}(). Otherwise undefined behavior might occur. Acked-by: Michal Nazarewicz Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index cdffbe999500..282396e8eec6 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1078,18 +1078,19 @@ static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + unsigned long flags; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); + spin_lock_irqsave(&epfile->ffs->eps_lock, flags); if (likely(io_data && io_data->ep && io_data->req)) value = usb_ep_dequeue(io_data->ep, io_data->req); else value = -EINVAL; - spin_unlock_irq(&epfile->ffs->eps_lock); + spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags); return value; } -- GitLab From 60fe004c8cb3b48b575c807988aacef81aef8867 Mon Sep 17 00:00:00 2001 From: Sergey Organov Date: Wed, 29 Jan 2020 14:21:46 +0300 Subject: [PATCH 0378/1278] usb: gadget: serial: fix Tx stall after buffer overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e4bfded56cf39b8d02733c1e6ef546b97961e18a ] Symptom: application opens /dev/ttyGS0 and starts sending (writing) to it while either USB cable is not connected, or nobody listens on the other side of the cable. If driver circular buffer overflows before connection is established, no data will be written to the USB layer until/unless /dev/ttyGS0 is closed and re-opened again by the application (the latter besides having no means of being notified about the event of establishing of the connection.) Fix: on open and/or connect, kick Tx to flush circular buffer data to USB layer. Signed-off-by: Sergey Organov Reviewed-by: Michał Mirosław Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_serial.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 520ace49f91d..942d2977797d 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -715,8 +715,10 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); - /* unblock any pending writes into our circular buffer */ if (started) { + gs_start_tx(port); + /* Unblock any pending writes into our circular buffer, in case + * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { gs_free_requests(ep, head, &port->read_allocated); -- GitLab From 3824b96e06cc2b28d708e0b0d51051ee112dd5dc Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 18 Jan 2020 15:41:20 -0500 Subject: [PATCH 0379/1278] drm/msm/mdp5: rate limit pp done timeout warnings [ Upstream commit ef8c9809acb0805c991bba8bdd4749fc46d44a98 ] Add rate limiting of the 'pp done time out' warnings since these warnings can quickly fill the dmesg buffer. Signed-off-by: Brian Masney Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 440977677001..99d356b6e915 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -1004,8 +1004,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion, msecs_to_jiffies(50)); if (ret == 0) - dev_warn(dev->dev, "pp done time out, lm=%d\n", - mdp5_cstate->pipeline.mixer->lm); + dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n", + mdp5_cstate->pipeline.mixer->lm); } static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) -- GitLab From 892afde0f4a1950a3f561253c2a3caf5a5fd8959 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 29 Jan 2020 20:12:44 +0000 Subject: [PATCH 0380/1278] drm: msm: Fix return type of dsi_mgr_connector_mode_valid for kCFI [ Upstream commit 7fd2dfc3694922eb7ace4801b7208cf9f62ebc7d ] I was hitting kCFI crashes when building with clang, and after some digging finally narrowed it down to the dsi_mgr_connector_mode_valid() function being implemented as returning an int, instead of an enum drm_mode_status. This patch fixes it, and appeases the opaque word of the kCFI gods (seriously, clang inlining everything makes the kCFI backtraces only really rough estimates of where things went wrong). Thanks as always to Sami for his help narrowing this down. Cc: Rob Clark Cc: Sean Paul Cc: Sami Tolvanen Cc: Todd Kjos Cc: Alistair Delva Cc: Amit Pundir Cc: Sumit Semwal Cc: freedreno@lists.freedesktop.org Cc: clang-built-linux@googlegroups.com Signed-off-by: John Stultz Reviewed-by: Nick Desaulniers Tested-by: Amit Pundir Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 855248132b2b..7d46399a39b4 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -400,7 +400,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return num; } -static int dsi_mgr_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int id = dsi_mgr_connector_get_id(connector); -- GitLab From 89e30bb46074c1a11b0b6e6797b0bcbcd6d83d54 Mon Sep 17 00:00:00 2001 From: Harigovindan P Date: Thu, 6 Feb 2020 14:26:15 +0530 Subject: [PATCH 0381/1278] drm/msm/dsi: save pll state before dsi host is powered off [ Upstream commit a1028dcfd0dd97884072288d0c8ed7f30399b528 ] Save pll state before dsi host is powered off. Without this change some register values gets resetted. Signed-off-by: Harigovindan P Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 5 +++++ drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 7d46399a39b4..9fbfa9f94e6c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -543,6 +543,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; + struct msm_dsi_pll *src_pll; bool is_dual_dsi = IS_DUAL_DSI(); int ret; @@ -583,6 +584,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + /* Save PLL status if it is a clock source */ + src_pll = msm_dsi_phy_get_pll(msm_dsi->phy); + msm_dsi_pll_save_state(src_pll); + ret = msm_dsi_host_power_off(host); if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b..c0a7fa56d9a7 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -613,10 +613,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) if (!phy || !phy->cfg->ops.disable) return; - /* Save PLL status if it is a clock source */ - if (phy->usecase != MSM_DSI_PHY_SLAVE) - msm_dsi_pll_save_state(phy->pll); - phy->cfg->ops.disable(phy); dsi_phy_regulator_disable(phy); -- GitLab From ea29d94b09cb7629a7ddd5e1484c00a56ed20a86 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:17 +0100 Subject: [PATCH 0382/1278] net: ks8851-ml: Remove 8-bit bus accessors [ Upstream commit 69233bba6543a37755158ca3382765387b8078df ] This driver is mixing 8-bit and 16-bit bus accessors for reasons unknown, however the speculation is that this was some sort of attempt to support the 8-bit bus mode. As per the KS8851-16MLL documentation, all two registers accessed via the 8-bit accessors are internally 16-bit registers, so reading them using 16-bit accessors is fine. The KS_CCR read can be converted to 16-bit read outright, as it is already a concatenation of two 8-bit reads of that register. The KS_RXQCR accesses are 8-bit only, however writing the top 8 bits of the register is OK as well, since the driver caches the entire 16-bit register value anyway. Finally, the driver is not used by any hardware in the kernel right now. The only hardware available to me is one with 16-bit bus, so I have no way to test the 8-bit bus mode, however it is unlikely this ever really worked anyway. If the 8-bit bus mode is ever required, it can be easily added by adjusting the 16-bit accessors to do 2 consecutive accesses, which is how this should have been done from the beginning. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 45 +++--------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index adbe0a6fe0db..77d059d7f8c5 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -474,24 +474,6 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ -/** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -507,22 +489,6 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -642,8 +608,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -747,7 +712,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -764,7 +729,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -997,13 +962,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ -- GitLab From af490013154af4ebe2c02d9d85be6ed84e480f84 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:18 +0100 Subject: [PATCH 0383/1278] net: ks8851-ml: Fix 16-bit data access [ Upstream commit edacb098ea9c31589276152f09b4439052c0f2b1 ] The packet data written to and read from Micrel KSZ8851-16MLLI must be byte-swapped in 16-bit mode, add this byte-swapping. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 77d059d7f8c5..fb5f4055e159 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -515,7 +515,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -529,7 +529,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) -- GitLab From 29c27650511ab1da770a3f35556c6f7d4d2dfc03 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:19 +0100 Subject: [PATCH 0384/1278] net: ks8851-ml: Fix 16-bit IO operation [ Upstream commit 58292104832fef6cb4a89f736012c0e0724c3442 ] The Micrel KSZ8851-16MLLI datasheet DS00002357B page 12 states that BE[3:0] signals are active high. This contradicts the measurements of the behavior of the actual chip, where these signals behave as active low. For example, to read the CIDER register, the bus must expose 0xc0c0 during the address phase, which means BE[3:0]=4'b1100. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index fb5f4055e159..799154d7c047 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -484,7 +484,7 @@ static int msg_enable; static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } @@ -499,7 +499,7 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } -- GitLab From 5f07ae5c8a2506af5ee979d588faa34514e314a7 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 20 Jan 2020 10:17:29 +0100 Subject: [PATCH 0385/1278] watchdog: da9062: do not ping the hw during stop() [ Upstream commit e9a0e65eda3f78d0b04ec6136c591c000cbc3b76 ] The da9062 hw has a minimum ping cool down phase of at least 200ms. The driver takes that into account by setting the min_hw_heartbeat_ms to 300ms and the core guarantees that the hw limit is observed for the ping() calls. But the core can't guarantee the required minimum ping cool down phase if a stop() command is send immediately after the ping() command. So it is not allowed to ping the watchdog within the stop() command as the driver does. Remove the ping can be done without doubts because the watchdog gets disabled anyway and a (re)start resets the watchdog counter too. Signed-off-by: Marco Felsch Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200120091729.16256-1-m.felsch@pengutronix.de [groeck: Updated description] Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/da9062_wdt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 9083d3d922b0..79383ff62019 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -126,13 +126,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd) struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); int ret; - ret = da9062_reset_watchdog_timer(wdt); - if (ret) { - dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n", - ret); - return ret; - } - ret = regmap_update_bits(wdt->hw->regmap, DA9062AA_CONTROL_D, DA9062AA_TWDSCALE_MASK, -- GitLab From 8ea1411740a0b71b06a6eb8cf4aeeadbf540346b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 08:48:55 +0300 Subject: [PATCH 0386/1278] s390/cio: cio_ignore_proc_seq_next should increase position index [ Upstream commit 8b101a5e14f2161869636ff9cb4907b7749dc0c2 ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206283 Link: https://lore.kernel.org/r/d44c53a7-9bc1-15c7-6d4a-0c10cb9dffce@virtuozzo.com Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Averin Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- drivers/s390/cio/blacklist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2a3f874a21d5..9cebff8e8d74 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -303,8 +303,10 @@ static void * cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) { struct ccwdev_iter *iter; + loff_t p = *offset; - if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + (*offset)++; + if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) return NULL; iter = it; if (iter->devno == __MAX_SUBCHANNEL) { @@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) return NULL; } else iter->devno++; - (*offset)++; return iter; } -- GitLab From a8dc79753d727b4dbb11bb1190f90f4863046f32 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 16 Jan 2020 12:46:51 -0800 Subject: [PATCH 0387/1278] x86/boot/compressed: Don't declare __force_order in kaslr_64.c [ Upstream commit df6d4f9db79c1a5d6f48b59db35ccd1e9ff9adfc ] GCC 10 changed the default to -fno-common, which leads to LD arch/x86/boot/compressed/vmlinux ld: arch/x86/boot/compressed/pgtable_64.o:(.bss+0x0): multiple definition of `__force_order'; \ arch/x86/boot/compressed/kaslr_64.o:(.bss+0x0): first defined here make[2]: *** [arch/x86/boot/compressed/Makefile:119: arch/x86/boot/compressed/vmlinux] Error 1 Since __force_order is already provided in pgtable_64.c, there is no need to declare __force_order in kaslr_64.c. Signed-off-by: H.J. Lu Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200124181811.4780-1-hjl.tools@gmail.com Signed-off-by: Sasha Levin --- arch/x86/boot/compressed/pagetable.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index e691ff734cb5..46573842d8c3 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -36,9 +36,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; -- GitLab From bdb5136a32ee4156ed215fb4dc1ab2b41510aa71 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 20 Feb 2020 00:59:36 +0900 Subject: [PATCH 0388/1278] nvme: Fix uninitialized-variable warning [ Upstream commit 15755854d53b4bbb0bb37a0fce66f0156cfc8a17 ] gcc may detect a false positive on nvme using an unintialized variable if setting features fails. Since this is not a fast path, explicitly initialize this variable to suppress the warning. Reported-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f543b9932c83..a760c449f4a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -889,8 +889,8 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, void *buffer, size_t buflen, u32 *result) { + union nvme_result res = { 0 }; struct nvme_command c; - union nvme_result res; int ret; memset(&c, 0, sizeof(c)); -- GitLab From e2de3b93e89ffe5ac4efce187790fa9535143e90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 22:23:18 -0800 Subject: [PATCH 0389/1278] x86/xen: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9038ec99ceb94fb8d93ade5e236b2928f0792c7c ] Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. arch/x86/xen/enlighten_pv.c: In function ‘xen_write_msr_safe’: arch/x86/xen/enlighten_pv.c:904:12: warning: statement will never be executed [-Wswitch-unreachable] 904 | unsigned which; | ^~~~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20200220062318.69299-1-keescook@chromium.org Reviewed-by: Juergen Gross [boris: made @which an 'unsigned int'] Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten_pv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f79a0cdc6b4e..1f8175bf2a5e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -909,14 +909,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; -- GitLab From 34bf8ca7879b3c47c74f6f39662812ad9e0a7555 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 19 Feb 2020 15:19:36 -0800 Subject: [PATCH 0390/1278] net: thunderx: workaround BGX TX Underflow issue [ Upstream commit 971617c3b761c876d686a2188220a33898c90e99 ] While it is not yet understood why a TX underflow can easily occur for SGMII interfaces resulting in a TX wedge. It has been found that disabling/re-enabling the LMAC resolves the issue. Signed-off-by: Tim Harvey Reviewed-by: Robert Jones Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 62 ++++++++++++++++++- .../net/ethernet/cavium/thunder/thunder_bgx.h | 9 +++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 586e35593310..d678f088925c 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -234,10 +234,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -1340,6 +1349,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1355,7 +1406,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1409,6 +1460,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1425,6 +1478,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1442,6 +1496,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 23acdc5ab896..adaa3bfa5f6c 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -179,6 +179,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 -- GitLab From 18a7af5cb6de5c44c3d0f6880b9d6a1d79783ad4 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 19 Feb 2020 06:01:03 +1000 Subject: [PATCH 0391/1278] cifs: don't leak -EAGAIN for stat() during reconnect commit fc513fac56e1b626ae48a74d7551d9c35c50129e upstream. If from cifs_revalidate_dentry_attr() the SMB2/QUERY_INFO call fails with an error, such as STATUS_SESSION_EXPIRED, causing the session to be reconnected it is possible we will leak -EAGAIN back to the application even for system calls such as stat() where this is not a valid error. Fix this by re-trying the operation from within cifs_revalidate_dentry_attr() if cifs_get_inode_info*() returns -EAGAIN. This fixes stat() and possibly also other system calls that uses cifs_revalidate_dentry*(). Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3a10d405362e..bdce714e9448 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1998,6 +1998,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2019,15 +2020,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } -- GitLab From 5c4d9b1e0c68fc969929b6a21b370d4c11542508 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 2 Mar 2020 22:21:35 +0800 Subject: [PATCH 0392/1278] usb: storage: Add quirk for Samsung Fit flash commit 86d92f5465958752481269348d474414dccb1552 upstream. Current driver has 240 (USB2.0) and 2048 (USB3.0) as max_sectors, e.g., /sys/bus/scsi/devices/0:0:0:0/max_sectors If data access times out, driver error handling will issue a port reset. Sometimes Samsung Fit (090C:1000) flash disk will not respond to later Set Address or Get Descriptor command. Adding this quirk to limit max_sectors to 64 sectors to avoid issue occurring. Signed-off-by: Jim Lin Acked-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/1583158895-31342-1-git-send-email-jilin@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index fb69cb64f7d4..df8ee83c3f1a 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1277,6 +1277,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, + "Samsung", + "Flash Drive FIT", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", -- GitLab From 504a02d494b7cda36bb8175704e908c72d247c9e Mon Sep 17 00:00:00 2001 From: Dan Lazewatsky Date: Wed, 26 Feb 2020 14:34:38 +0000 Subject: [PATCH 0393/1278] usb: quirks: add NO_LPM quirk for Logitech Screen Share commit b96ed52d781a2026d0c0daa5787c6f3d45415862 upstream. LPM on the device appears to cause xHCI host controllers to claim that there isn't enough bandwidth to support additional devices. Signed-off-by: Dan Lazewatsky Cc: stable Signed-off-by: Gustavo Padovan Link: https://lore.kernel.org/r/20200226143438.1445-1-gustavo.padovan@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index ad8307140df8..64c03e871f2d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -86,6 +86,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech PTZ Pro Camera */ { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Screen Share */ + { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, -- GitLab From bc6da5b19a0f892655783d932f45bc3fd73fd76f Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Wed, 26 Feb 2020 18:50:34 +0100 Subject: [PATCH 0394/1278] usb: core: hub: fix unhandled return by employing a void function commit 63d6d7ed475c53dc1cabdfedf63de1fd8dcd72ee upstream. Address below Coverity complaint (Feb 25, 2020, 8:06 AM CET): --- drivers/usb/core/hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ff1be6a6841b..b0624940ccb0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1818,7 +1818,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { hub->quirk_disable_autosuspend = 1; - usb_autopm_get_interface(intf); + usb_autopm_get_interface_no_resume(intf); } if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) -- GitLab From af20d8c06ec400a3ad8384efea369821ce3db1bd Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Wed, 26 Feb 2020 18:50:35 +0100 Subject: [PATCH 0395/1278] usb: core: hub: do error out if usb_autopm_get_interface() fails commit 60e3f6e4ac5b0fda43dad01c32e09409ec710045 upstream. Reviewing a fresh portion of coverity defects in USB core (specifically CID 1458999), Alan Stern noted below in [1]: On Tue, Feb 25, 2020 at 02:39:23PM -0500, Alan Stern wrote: > A revised search finds line 997 in drivers/usb/core/hub.c and lines > 216, 269 in drivers/usb/core/port.c. (I didn't try looking in any > other directories.) AFAICT all three of these should check the > return value, although a error message in the kernel log probably > isn't needed. Factor out the usb_remove_device() change into a standalone patch to allow conflict-free integration on top of the earliest stable branches. [1] https://lore.kernel.org/lkml/Pine.LNX.4.44L0.2002251419120.1485-100000@iolanthe.rowland.org Fixes: 253e05724f9230 ("USB: add a "remove hardware" sysfs attribute") Cc: stable@vger.kernel.org # v2.6.33+ Suggested-by: Alan Stern Signed-off-by: Eugeniu Rosca Acked-by: Alan Stern Link: https://lore.kernel.org/r/20200226175036.14946-2-erosca@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b0624940ccb0..4391192bdd19 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -958,13 +958,17 @@ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; + int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); - usb_autopm_get_interface(intf); + ret = usb_autopm_get_interface(intf); + if (ret < 0) + return ret; + set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); -- GitLab From 8389c9d75e0867064eb5699251da3836191d0420 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Wed, 26 Feb 2020 18:50:36 +0100 Subject: [PATCH 0396/1278] usb: core: port: do error out if usb_autopm_get_interface() fails commit 1f8b39bc99a31759e97a0428a5c3f64802c1e61d upstream. Reviewing a fresh portion of coverity defects in USB core (specifically CID 1458999), Alan Stern noted below in [1]: On Tue, Feb 25, 2020 at 02:39:23PM -0500, Alan Stern wrote: > A revised search finds line 997 in drivers/usb/core/hub.c and lines > 216, 269 in drivers/usb/core/port.c. (I didn't try looking in any > other directories.) AFAICT all three of these should check the > return value, although a error message in the kernel log probably > isn't needed. Factor out the usb_port_runtime_{resume,suspend}() changes into a standalone patch to allow conflict-free porting on top of stable v3.9+. [1] https://lore.kernel.org/lkml/Pine.LNX.4.44L0.2002251419120.1485-100000@iolanthe.rowland.org Fixes: 971fcd492cebf5 ("usb: add runtime pm support for usb port device") Cc: stable@vger.kernel.org # v3.9+ Suggested-by: Alan Stern Signed-off-by: Eugeniu Rosca Acked-by: Alan Stern Link: https://lore.kernel.org/r/20200226175036.14946-3-erosca@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 460c855be0d0..53c1f6e604b1 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -179,7 +179,10 @@ static int usb_port_runtime_resume(struct device *dev) if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { @@ -232,7 +235,10 @@ static int usb_port_runtime_suspend(struct device *dev) if (usb_port_block_power_off) return -EBUSY; - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) -- GitLab From 1855aaccd74cb9528c24ceb6bc15358a411f65ff Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Wed, 4 Mar 2020 10:24:29 +0800 Subject: [PATCH 0397/1278] vgacon: Fix a UAF in vgacon_invert_region commit 513dc792d6060d5ef572e43852683097a8420f56 upstream. When syzkaller tests, there is a UAF: BUG: KASan: use after free in vgacon_invert_region+0x9d/0x110 at addr ffff880000100000 Read of size 2 by task syz-executor.1/16489 page:ffffea0000004000 count:0 mapcount:-127 mapping: (null) index:0x0 page flags: 0xfffff00000000() page dumped because: kasan: bad access detected CPU: 1 PID: 16489 Comm: syz-executor.1 Not tainted Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Call Trace: [] dump_stack+0x1e/0x20 [] kasan_report+0x577/0x950 [] __asan_load2+0x62/0x80 [] vgacon_invert_region+0x9d/0x110 [] invert_screen+0xe5/0x470 [] set_selection+0x44b/0x12f0 [] tioclinux+0xee/0x490 [] vt_ioctl+0xff4/0x2670 [] tty_ioctl+0x46a/0x1a10 [] do_vfs_ioctl+0x5bd/0xc40 [] SyS_ioctl+0x132/0x170 [] system_call_fastpath+0x22/0x27 Memory state around the buggy address: ffff8800000fff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8800000fff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff880000100000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff It can be reproduce in the linux mainline by the program: #include #include #include #include #include #include #include #include struct tiocl_selection { unsigned short xs; /* X start */ unsigned short ys; /* Y start */ unsigned short xe; /* X end */ unsigned short ye; /* Y end */ unsigned short sel_mode; /* selection mode */ }; #define TIOCL_SETSEL 2 struct tiocl { unsigned char type; unsigned char pad; struct tiocl_selection sel; }; int main() { int fd = 0; const char *dev = "/dev/char/4:1"; struct vt_consize v = {0}; struct tiocl tioc = {0}; fd = open(dev, O_RDWR, 0); v.v_rows = 3346; ioctl(fd, VT_RESIZEX, &v); tioc.type = TIOCL_SETSEL; ioctl(fd, TIOCLINUX, &tioc); return 0; } When resize the screen, update the 'vc->vc_size_row' to the new_row_size, but when 'set_origin' in 'vgacon_set_origin', vgacon use 'vga_vram_base' for 'vc_origin' and 'vc_visible_origin', not 'vc_screenbuf'. It maybe smaller than 'vc_screenbuf'. When TIOCLINUX, use the new_row_size to calc the offset, it maybe larger than the vga_vram_size in vgacon driver, then bad access. Also, if set an larger screenbuf firstly, then set an more larger screenbuf, when copy old_origin to new_origin, a bad access may happen. So, If the screen size larger than vga_vram, resize screen should be failed. This alse fix CVE-2020-8649 and CVE-2020-8647. Linus pointed out that overflow checking seems absent. We're saved by the existing bounds checks in vc_do_resize() with rather strict limits: if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW) return -EINVAL; Fixes: 0aec4867dca14 ("[PATCH] SVGATextMode fix") Reference: CVE-2020-8647 and CVE-2020-8649 Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu [danvet: augment commit message to point out overflow safety] Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200304022429.37738-1-zhangxiaoxu5@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/vgacon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index a17ba1465815..ff6612a3ddc8 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1309,6 +1309,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) static int vgacon_resize(struct vc_data *c, unsigned int width, unsigned int height, unsigned int user) { + if ((width << 1) * height > vga_vram_size) + return -EINVAL; + if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) -- GitLab From ebf46a94280d3b30dc20a743b4cc0eb328d61723 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 5 Mar 2020 22:28:26 -0800 Subject: [PATCH 0398/1278] mm, numa: fix bad pmd by atomically check for pmd_trans_huge when marking page tables prot_numa commit 8b272b3cbbb50a6a8e62d8a15affd473a788e184 upstream. : A user reported a bug against a distribution kernel while running a : proprietary workload described as "memory intensive that is not swapping" : that is expected to apply to mainline kernels. The workload is : read/write/modifying ranges of memory and checking the contents. They : reported that within a few hours that a bad PMD would be reported followed : by a memory corruption where expected data was all zeros. A partial : report of the bad PMD looked like : : [ 5195.338482] ../mm/pgtable-generic.c:33: bad pmd ffff8888157ba008(000002e0396009e2) : [ 5195.341184] ------------[ cut here ]------------ : [ 5195.356880] kernel BUG at ../mm/pgtable-generic.c:35! : .... : [ 5195.410033] Call Trace: : [ 5195.410471] [] change_protection_range+0x7dd/0x930 : [ 5195.410716] [] change_prot_numa+0x18/0x30 : [ 5195.410918] [] task_numa_work+0x1fe/0x310 : [ 5195.411200] [] task_work_run+0x72/0x90 : [ 5195.411246] [] exit_to_usermode_loop+0x91/0xc2 : [ 5195.411494] [] prepare_exit_to_usermode+0x31/0x40 : [ 5195.411739] [] retint_user+0x8/0x10 : : Decoding revealed that the PMD was a valid prot_numa PMD and the bad PMD : was a false detection. The bug does not trigger if automatic NUMA : balancing or transparent huge pages is disabled. : : The bug is due a race in change_pmd_range between a pmd_trans_huge and : pmd_nond_or_clear_bad check without any locks held. During the : pmd_trans_huge check, a parallel protection update under lock can have : cleared the PMD and filled it with a prot_numa entry between the transhuge : check and the pmd_none_or_clear_bad check. : : While this could be fixed with heavy locking, it's only necessary to make : a copy of the PMD on the stack during change_pmd_range and avoid races. A : new helper is created for this as the check if quite subtle and the : existing similar helpful is not suitable. This passed 154 hours of : testing (usually triggers between 20 minutes and 24 hours) without : detecting bad PMDs or corruption. A basic test of an autonuma-intensive : workload showed no significant change in behaviour. Although Mel withdrew the patch on the face of LKML comment https://lkml.org/lkml/2017/4/10/922 the race window aforementioned is still open, and we have reports of Linpack test reporting bad residuals after the bad PMD warning is observed. In addition to that, bad rss-counter and non-zero pgtables assertions are triggered on mm teardown for the task hitting the bad PMD. host kernel: mm/pgtable-generic.c:40: bad pmd 00000000b3152f68(8000000d2d2008e7) .... host kernel: BUG: Bad rss-counter state mm:00000000b583043d idx:1 val:512 host kernel: BUG: non-zero pgtables_bytes on freeing mm: 4096 The issue is observed on a v4.18-based distribution kernel, but the race window is expected to be applicable to mainline kernels, as well. [akpm@linux-foundation.org: fix comment typo, per Rafael] Signed-off-by: Andrew Morton Signed-off-by: Rafael Aquini Signed-off-by: Mel Gorman Cc: Cc: Zi Yan Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Michal Hocko Link: http://lkml.kernel.org/r/20200216191800.22423-1-aquini@redhat.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mprotect.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 60864e19421e..18ecbd744978 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -148,6 +148,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -164,8 +189,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ -- GitLab From 265b81a52542e1a76c53f5aa0f3fd3c576b67be7 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 5 Mar 2020 22:28:36 -0800 Subject: [PATCH 0399/1278] fat: fix uninit-memory access for partial initialized inode commit bc87302a093f0eab45cd4e250c2021299f712ec6 upstream. When get an error in the middle of reading an inode, some fields in the inode might be still not initialized. And then the evict_inode path may access those fields via iput(). To fix, this makes sure that inode fields are initialized. Reported-by: syzbot+9d82b8de2992579da5d0@syzkaller.appspotmail.com Signed-off-by: Andrew Morton Signed-off-by: OGAWA Hirofumi Cc: Link: http://lkml.kernel.org/r/871rqnreqx.fsf@mail.parknet.co.jp Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 3b40937b942a..1df023c4c2cc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -736,6 +736,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1366,16 +1373,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1820,13 +1817,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); -- GitLab From 414de95cde5efed15bbe1690ed934dbf44d6f70f Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Tue, 28 Jan 2020 19:17:59 +0530 Subject: [PATCH 0400/1278] arm: dts: dra76x: Fix mmc3 max-frequency commit fa63c0039787b8fbacf4d6a51e3ff44288f5b90b upstream. dra76x is not affected by i887 which requires mmc3 node to be limited to a max frequency of 64 MHz. Fix this by overwriting the correct value in the the dra76 specific dtsi. Fixes: 895bd4b3e5ec ("ARM: dts: Add support for dra76-evm") Cc: stable@vger.kernel.org Signed-off-by: Faiz Abbas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra76x.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 1c88c581ff18..78d58b8af67e 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -17,3 +17,8 @@ &crossbar_mpu { ti,irqs-skip = <10 67 68 133 139 140>; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; -- GitLab From b0960a0ea4b941adb70c443be59e276897553f56 Mon Sep 17 00:00:00 2001 From: tangbin Date: Thu, 5 Mar 2020 09:38:23 +0800 Subject: [PATCH 0401/1278] tty:serial:mvebu-uart:fix a wrong return commit 4a3e208474204e879d22a310b244cb2f39e5b1f8 upstream. in this place, the function should return a negative value and the PTR_ERR already returns a negative,so return -PTR_ERR() is wrong. Signed-off-by: tangbin Cc: stable Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20200305013823.20976-1-tangbin@cmss.chinamobile.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 401c983ec5f3..a10e4aa9e18e 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -581,7 +581,7 @@ static int mvebu_uart_probe(struct platform_device *pdev) port->membase = devm_ioremap_resource(&pdev->dev, reg); if (IS_ERR(port->membase)) - return -PTR_ERR(port->membase); + return PTR_ERR(port->membase); data = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart_data), GFP_KERNEL); -- GitLab From 7336a80aebc633d75c09d8dad09cd467807c787a Mon Sep 17 00:00:00 2001 From: Jay Dolan Date: Thu, 5 Mar 2020 06:05:04 -0800 Subject: [PATCH 0402/1278] serial: 8250_exar: add support for ACCES cards commit 10c5ccc3c6d32f3d7d6c07de1d3f0f4b52f3e3ab upstream. Add ACCES VIDs and PIDs that use the Exar chips Signed-off-by: Jay Dolan Cc: stable Link: https://lore.kernel.org/r/20200305140504.22237-1-jay.dolan@accesio.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 411b4b03457b..899f36b59af7 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -27,6 +27,14 @@ #include "8250.h" +#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052 +#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d +#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c +#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8 +#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2 +#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db +#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -562,6 +570,22 @@ static int __maybe_unused exar_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume); +static const struct exar8250_board acces_com_2x = { + .num_ports = 2, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_4x = { + .num_ports = 4, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_8x = { + .num_ports = 8, + .setup = pci_xr17c154_setup, +}; + + static const struct exar8250_board pbn_fastcom335_2 = { .num_ports = 2, .setup = pci_fastcom335_setup, @@ -632,6 +656,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = { } static const struct pci_device_id exar_pci_tbl[] = { + EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x), + + CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect), CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect), CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect), -- GitLab From 432ef54c0444e7cab85a291347bfc1f69ee6257a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Feb 2020 09:11:31 +0100 Subject: [PATCH 0403/1278] vt: selection, close sel_buffer race commit 07e6124a1a46b4b5a9b3cacc0c306b50da87abf5 upstream. syzkaller reported this UAF: BUG: KASAN: use-after-free in n_tty_receive_buf_common+0x2481/0x2940 drivers/tty/n_tty.c:1741 Read of size 1 at addr ffff8880089e40e9 by task syz-executor.1/13184 CPU: 0 PID: 13184 Comm: syz-executor.1 Not tainted 5.4.7 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: ... kasan_report+0xe/0x20 mm/kasan/common.c:634 n_tty_receive_buf_common+0x2481/0x2940 drivers/tty/n_tty.c:1741 tty_ldisc_receive_buf+0xac/0x190 drivers/tty/tty_buffer.c:461 paste_selection+0x297/0x400 drivers/tty/vt/selection.c:372 tioclinux+0x20d/0x4e0 drivers/tty/vt/vt.c:3044 vt_ioctl+0x1bcf/0x28d0 drivers/tty/vt/vt_ioctl.c:364 tty_ioctl+0x525/0x15a0 drivers/tty/tty_io.c:2657 vfs_ioctl fs/ioctl.c:47 [inline] It is due to a race between parallel paste_selection (TIOCL_PASTESEL) and set_selection_user (TIOCL_SETSEL) invocations. One uses sel_buffer, while the other frees it and reallocates a new one for another selection. Add a mutex to close this race. The mutex takes care properly of sel_buffer and sel_buffer_lth only. The other selection global variables (like sel_start, sel_end, and sel_cons) are protected only in set_selection_user. The other functions need quite some more work to close the races of the variables there. This is going to happen later. This likely fixes (I am unsure as there is no reproducer provided) bug 206361 too. It was marked as CVE-2020-8648. Signed-off-by: Jiri Slaby Reported-by: syzbot+59997e8d5cbdc486e6f6@syzkaller.appspotmail.com References: https://bugzilla.kernel.org/show_bug.cgi?id=206361 Cc: stable Link: https://lore.kernel.org/r/20200210081131.23572-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index b157f17d2be2..4b62fb052c82 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; static char *sel_buffer; +static DEFINE_MUTEX(sel_lock); /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ @@ -162,7 +164,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t char *bp, *obp; int i, ps, pe, multiplier; u16 c; - int mode; + int mode, ret = 0; poke_blanked_console(); @@ -202,6 +204,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t pe = tmp; } + mutex_lock(&sel_lock); if (sel_cons != vc_cons[fg_console].d) { clear_selection(); sel_cons = vc_cons[fg_console].d; @@ -247,9 +250,10 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t break; case TIOCL_SELPOINTER: highlight_pointer(pe); - return 0; + goto unlock; default: - return -EINVAL; + ret = -EINVAL; + goto unlock; } /* remove the pointer */ @@ -271,7 +275,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t else if (new_sel_start == sel_start) { if (new_sel_end == sel_end) /* no action required */ - return 0; + goto unlock; else if (new_sel_end > sel_end) /* extend to right */ highlight(sel_end + 2, new_sel_end); else /* contract from right */ @@ -298,7 +302,8 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); - return -ENOMEM; + ret = -ENOMEM; + goto unlock; } kfree(sel_buffer); sel_buffer = bp; @@ -323,7 +328,9 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t } } sel_buffer_lth = bp - sel_buffer; - return 0; +unlock: + mutex_unlock(&sel_lock); + return ret; } /* Insert the contents of the selection buffer into the @@ -352,6 +359,7 @@ int paste_selection(struct tty_struct *tty) tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); + mutex_lock(&sel_lock); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { @@ -359,7 +367,9 @@ int paste_selection(struct tty_struct *tty) break; } if (tty_throttled(tty)) { + mutex_unlock(&sel_lock); schedule(); + mutex_lock(&sel_lock); continue; } __set_current_state(TASK_RUNNING); @@ -368,6 +378,7 @@ int paste_selection(struct tty_struct *tty) count); pasted += count; } + mutex_unlock(&sel_lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); -- GitLab From 64489a229bbf902244d8407b02015f30e2cd4651 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 28 Feb 2020 12:54:05 +0100 Subject: [PATCH 0404/1278] vt: selection, push console lock down commit 4b70dd57a15d2f4685ac6e38056bad93e81e982f upstream. We need to nest the console lock in sel_lock, so we have to push it down a bit. Fortunately, the callers of set_selection_* just lock the console lock around the function call. So moving it down is easy. In the next patch, we switch the order. Signed-off-by: Jiri Slaby Fixes: 07e6124a1a46 ("vt: selection, close sel_buffer race") Cc: stable Link: https://lore.kernel.org/r/20200228115406.5735-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 13 ++++++++++++- drivers/tty/vt/vt.c | 2 -- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 4b62fb052c82..8fe5f46ff7bb 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -157,7 +157,7 @@ static int store_utf8(u16 c, char *p) * The entire selection process is managed under the console_lock. It's * a lot under the lock but its hardly a performance path */ -int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) +static int __set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct vc_data *vc = vc_cons[fg_console].d; int sel_mode, new_sel_start, new_sel_end, spc; @@ -333,6 +333,17 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t return ret; } +int set_selection(const struct tiocl_selection __user *v, struct tty_struct *tty) +{ + int ret; + + console_lock(); + ret = __set_selection(v, tty); + console_unlock(); + + return ret; +} + /* Insert the contents of the selection buffer into the * queue of the tty associated with the current console. * Invoked by ioctl(). diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06761fcedeff..826433af4bdd 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2688,9 +2688,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); -- GitLab From a4719f6d07b2c63223f7452c435c5f578f105cfe Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 28 Feb 2020 12:54:06 +0100 Subject: [PATCH 0405/1278] vt: selection, push sel_lock up commit e8c75a30a23c6ba63f4ef6895cbf41fd42f21aa2 upstream. sel_lock cannot nest in the console lock. Thanks to syzkaller, the kernel states firmly: > WARNING: possible circular locking dependency detected > 5.6.0-rc3-syzkaller #0 Not tainted > ------------------------------------------------------ > syz-executor.4/20336 is trying to acquire lock: > ffff8880a2e952a0 (&tty->termios_rwsem){++++}, at: tty_unthrottle+0x22/0x100 drivers/tty/tty_ioctl.c:136 > > but task is already holding lock: > ffffffff89462e70 (sel_lock){+.+.}, at: paste_selection+0x118/0x470 drivers/tty/vt/selection.c:374 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 (sel_lock){+.+.}: > mutex_lock_nested+0x1b/0x30 kernel/locking/mutex.c:1118 > set_selection_kernel+0x3b8/0x18a0 drivers/tty/vt/selection.c:217 > set_selection_user+0x63/0x80 drivers/tty/vt/selection.c:181 > tioclinux+0x103/0x530 drivers/tty/vt/vt.c:3050 > vt_ioctl+0x3f1/0x3a30 drivers/tty/vt/vt_ioctl.c:364 This is ioctl(TIOCL_SETSEL). Locks held on the path: console_lock -> sel_lock > -> #1 (console_lock){+.+.}: > console_lock+0x46/0x70 kernel/printk/printk.c:2289 > con_flush_chars+0x50/0x650 drivers/tty/vt/vt.c:3223 > n_tty_write+0xeae/0x1200 drivers/tty/n_tty.c:2350 > do_tty_write drivers/tty/tty_io.c:962 [inline] > tty_write+0x5a1/0x950 drivers/tty/tty_io.c:1046 This is write(). Locks held on the path: termios_rwsem -> console_lock > -> #0 (&tty->termios_rwsem){++++}: > down_write+0x57/0x140 kernel/locking/rwsem.c:1534 > tty_unthrottle+0x22/0x100 drivers/tty/tty_ioctl.c:136 > mkiss_receive_buf+0x12aa/0x1340 drivers/net/hamradio/mkiss.c:902 > tty_ldisc_receive_buf+0x12f/0x170 drivers/tty/tty_buffer.c:465 > paste_selection+0x346/0x470 drivers/tty/vt/selection.c:389 > tioclinux+0x121/0x530 drivers/tty/vt/vt.c:3055 > vt_ioctl+0x3f1/0x3a30 drivers/tty/vt/vt_ioctl.c:364 This is ioctl(TIOCL_PASTESEL). Locks held on the path: sel_lock -> termios_rwsem > other info that might help us debug this: > > Chain exists of: > &tty->termios_rwsem --> console_lock --> sel_lock Clearly. From the above, we have: console_lock -> sel_lock sel_lock -> termios_rwsem termios_rwsem -> console_lock Fix this by reversing the console_lock -> sel_lock dependency in ioctl(TIOCL_SETSEL). First, lock sel_lock, then console_lock. Signed-off-by: Jiri Slaby Reported-by: syzbot+26183d9746e62da329b8@syzkaller.appspotmail.com Fixes: 07e6124a1a46 ("vt: selection, close sel_buffer race") Cc: stable Link: https://lore.kernel.org/r/20200228115406.5735-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 8fe5f46ff7bb..91ffe3f2b8a0 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -204,7 +204,6 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ pe = tmp; } - mutex_lock(&sel_lock); if (sel_cons != vc_cons[fg_console].d) { clear_selection(); sel_cons = vc_cons[fg_console].d; @@ -250,10 +249,9 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ break; case TIOCL_SELPOINTER: highlight_pointer(pe); - goto unlock; + return 0; default: - ret = -EINVAL; - goto unlock; + return -EINVAL; } /* remove the pointer */ @@ -275,7 +273,7 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ else if (new_sel_start == sel_start) { if (new_sel_end == sel_end) /* no action required */ - goto unlock; + return 0; else if (new_sel_end > sel_end) /* extend to right */ highlight(sel_end + 2, new_sel_end); else /* contract from right */ @@ -302,8 +300,7 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); - ret = -ENOMEM; - goto unlock; + return -ENOMEM; } kfree(sel_buffer); sel_buffer = bp; @@ -328,8 +325,7 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ } } sel_buffer_lth = bp - sel_buffer; -unlock: - mutex_unlock(&sel_lock); + return ret; } @@ -337,9 +333,11 @@ int set_selection(const struct tiocl_selection __user *v, struct tty_struct *tty { int ret; + mutex_lock(&sel_lock); console_lock(); ret = __set_selection(v, tty); console_unlock(); + mutex_unlock(&sel_lock); return ret; } -- GitLab From a350eee14eca753114567d66ee1895187f9f40e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2020 15:16:15 -0800 Subject: [PATCH 0406/1278] x86/pkeys: Manually set X86_FEATURE_OSPKE to preserve existing changes commit 735a6dd02222d8d070c7bb748f25895239ca8c92 upstream. Explicitly set X86_FEATURE_OSPKE via set_cpu_cap() instead of calling get_cpu_cap() to pull the feature bit from CPUID after enabling CR4.PKE. Invoking get_cpu_cap() effectively wipes out any {set,clear}_cpu_cap() changes that were made between this_cpu->c_init() and setup_pku(), as all non-synthetic feature words are reinitialized from the CPU's CPUID values. Blasting away capability updates manifests most visibility when running on a VMX capable CPU, but with VMX disabled by BIOS. To indicate that VMX is disabled, init_ia32_feat_ctl() clears X86_FEATURE_VMX, using clear_cpu_cap() instead of setup_clear_cpu_cap() so that KVM can report which CPU is misconfigured (KVM needs to probe every CPU anyways). Restoring X86_FEATURE_VMX from CPUID causes KVM to think VMX is enabled, ultimately leading to an unexpected #GP when KVM attempts to do VMXON. Arguably, init_ia32_feat_ctl() should use setup_clear_cpu_cap() and let KVM figure out a different way to report the misconfigured CPU, but VMX is not the only feature bit that is affected, i.e. there is precedent that tweaking feature bits via {set,clear}_cpu_cap() after ->c_init() is expected to work. Most notably, x86_init_rdrand()'s clearing of X86_FEATURE_RDRAND when RDRAND malfunctions is also overwritten. Fixes: 0697694564c8 ("x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU") Reported-by: Jacob Keller Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Tested-by: Jacob Keller Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200226231615.13664-1-sean.j.christopherson@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d805e8b3739..7b4141889919 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -360,7 +360,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -- GitLab From 04b31630d500a14e64090470b7d5adf58b2be4fd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 9 Feb 2020 19:33:38 +0300 Subject: [PATCH 0407/1278] dmaengine: tegra-apb: Fix use-after-free commit 94788af4ed039476ff3527b0e6a12c1dc42cb022 upstream. I was doing some experiments with I2C and noticed that Tegra APB DMA driver crashes sometime after I2C DMA transfer termination. The crash happens because tegra_dma_terminate_all() bails out immediately if pending list is empty, and thus, it doesn't release the half-completed descriptors which are getting re-used before ISR tasklet kicks-in. tegra-i2c 7000c400.i2c: DMA transfer timeout elants_i2c 0-0010: elants_i2c_irq: failed to read data: -110 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 142 at lib/list_debug.c:45 __list_del_entry_valid+0x45/0xac list_del corruption, ddbaac44->next is LIST_POISON1 (00000100) Modules linked in: CPU: 0 PID: 142 Comm: kworker/0:2 Not tainted 5.5.0-rc2-next-20191220-00175-gc3605715758d-dirty #538 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: events_freezable_power_ thermal_zone_device_check [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x85/0x94) [] (dump_stack) from [] (__warn+0xc1/0xc4) [] (__warn) from [] (warn_slowpath_fmt+0x61/0x78) [] (warn_slowpath_fmt) from [] (__list_del_entry_valid+0x45/0xac) [] (__list_del_entry_valid) from [] (tegra_dma_tasklet+0x5b/0x154) [] (tegra_dma_tasklet) from [] (tasklet_action_common.constprop.0+0x41/0x7c) [] (tasklet_action_common.constprop.0) from [] (__do_softirq+0xd3/0x2a8) [] (__do_softirq) from [] (irq_exit+0x7b/0x98) [] (irq_exit) from [] (__handle_domain_irq+0x45/0x80) [] (__handle_domain_irq) from [] (gic_handle_irq+0x45/0x7c) [] (gic_handle_irq) from [] (__irq_svc+0x65/0x94) Exception stack(0xde2ebb90 to 0xde2ebbd8) Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Cc: Link: https://lore.kernel.org/r/20200209163356.6439-2-digetx@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/tegra20-apb-dma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 7db2766b5fe9..ffe8126a9553 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -755,10 +755,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) bool was_busy; spin_lock_irqsave(&tdc->lock, flags); - if (list_empty(&tdc->pending_sg_req)) { - spin_unlock_irqrestore(&tdc->lock, flags); - return 0; - } if (!tdc->busy) goto skip_dma_stop; -- GitLab From 3e4c735e6ba9a5add132c8bcad8700029fbdb609 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 9 Feb 2020 19:33:39 +0300 Subject: [PATCH 0408/1278] dmaengine: tegra-apb: Prevent race conditions of tasklet vs free list commit c33ee1301c393a241d6424e36eff1071811b1064 upstream. The interrupt handler puts a half-completed DMA descriptor on a free list and then schedules tasklet to process bottom half of the descriptor that executes client's callback, this creates possibility to pick up the busy descriptor from the free list. Thus, let's disallow descriptor's re-use until it is fully processed. Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Cc: Link: https://lore.kernel.org/r/20200209163356.6439-3-digetx@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/tegra20-apb-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index ffe8126a9553..3402494cadf9 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -288,7 +288,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get( /* Do not allocate if desc are waiting for ack */ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { - if (async_tx_test_ack(&dma_desc->txd)) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { list_del(&dma_desc->node); spin_unlock_irqrestore(&tdc->lock, flags); dma_desc->txd.flags = 0; -- GitLab From 165dc070ce2f57e01541409741c721db65eecd39 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 19 Feb 2020 10:25:45 -0500 Subject: [PATCH 0409/1278] dm cache: fix a crash due to incorrect work item cancelling commit 7cdf6a0aae1cccf5167f3f04ecddcf648b78e289 upstream. The crash can be reproduced by running the lvm2 testsuite test lvconvert-thin-external-cache.sh for several minutes, e.g.: while :; do make check T=shell/lvconvert-thin-external-cache.sh; done The crash happens in this call chain: do_waker -> policy_tick -> smq_tick -> end_hotspot_period -> clear_bitset -> memset -> __memset -- which accesses an invalid pointer in the vmalloc area. The work entry on the workqueue is executed even after the bitmap was freed. The problem is that cancel_delayed_work doesn't wait for the running work item to finish, so the work item can continue running and re-submitting itself even after cache_postsuspend. In order to make sure that the work item won't be running, we must use cancel_delayed_work_sync. Also, change flush_workqueue to drain_workqueue, so that if some work item submits itself or another work item, we are properly waiting for both of them. Fixes: c6b4fcbad044 ("dm: add cache target") Cc: stable@vger.kernel.org # v3.9 Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b5f541112fca..69cdb29ef6be 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2971,8 +2971,8 @@ static void cache_postsuspend(struct dm_target *ti) prevent_background_work(cache); BUG_ON(atomic_read(&cache->nr_io_migrations)); - cancel_delayed_work(&cache->waker); - flush_workqueue(cache->wq); + cancel_delayed_work_sync(&cache->waker); + drain_workqueue(cache->wq); WARN_ON(cache->tracker.in_flight); /* -- GitLab From 77ed33b91c868973b08c0b3c8b206a88416327b3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 26 Jan 2020 21:49:50 +0200 Subject: [PATCH 0410/1278] ARM: dts: ls1021a: Restore MDIO compatible to gianfar commit 7155c44624d061692b4c13aa8343f119c67d4fc0 upstream. The difference between "fsl,etsec2-mdio" and "gianfar" has to do with the .get_tbipa function, which calculates the address of the TBIPA register automatically, if not explicitly specified. [ see drivers/net/ethernet/freescale/fsl_pq_mdio.c ]. On LS1021A, the TBIPA register is at offset 0x30 within the port register block, which is what the "gianfar" method of calculating addresses actually does. Luckily, the bad "compatible" is inconsequential for ls1021a.dtsi, because the TBIPA register is explicitly specified via the second "reg" (<0x0 0x2d10030 0x0 0x4>), so the "get_tbipa" function is dead code. Nonetheless it's good to restore it to its correct value. Background discussion: https://www.spinics.net/lists/stable/msg361156.html Fixes: c7861adbe37f ("ARM: dts: ls1021: Fix SGMII PCS link remaining down after PHY disconnect") Reported-by: Pavel Machek Signed-off-by: Vladimir Oltean Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ls1021a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 1343c86988c5..68f4482c35e2 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -562,7 +562,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -570,7 +570,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; -- GitLab From c40c33a8936174dcd78268e619960c2ed421d43b Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Fri, 7 Feb 2020 20:53:24 +0200 Subject: [PATCH 0411/1278] ASoC: topology: Fix memleak in soc_tplg_link_elems_load() commit 2b2d5c4db732c027a14987cfccf767dac1b45170 upstream. If soc_tplg_link_config() fails, _link needs to be freed in case of topology ABI version mismatch. However the current code is returning directly and ends up leaking memory in this case. This patch fixes that. Fixes: 593d9e52f9bb ("ASoC: topology: Add support to configure existing physical DAI links") Signed-off-by: Dragos Tarcatu Link: https://lore.kernel.org/r/20200207185325.22320-2-dragos_tarcatu@mentor.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-topology.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 72301bcad3bd..0f91b4ed6814 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2177,8 +2177,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg, } ret = soc_tplg_link_config(tplg, _link); - if (ret < 0) + if (ret < 0) { + if (!abi_match) + kfree(_link); return ret; + } /* offset by version-specific struct size and * real priv data size -- GitLab From 543eafede7b67fec47286f61f7fb370691d415f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 12:17:35 +0100 Subject: [PATCH 0412/1278] ASoC: intel: skl: Fix pin debug prints commit 64bbacc5f08c01954890981c63de744df1f29a30 upstream. skl_print_pins() loops over all given pins but it overwrites the text at the very same position while increasing the returned length. Fix this to show the all pin contents properly. Fixes: d14700a01f91 ("ASoC: Intel: Skylake: Debugfs facility to dump module config") Signed-off-by: Takashi Iwai Acked-by: Cezary Rojewski Link: https://lore.kernel.org/r/20200218111737.14193-2-tiwai@suse.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 1987f78ea91e..9e38f2afa084 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -42,7 +42,7 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, int i; ssize_t ret = 0; - for (i = 0; i < max_pin; i++) + for (i = 0; i < max_pin; i++) { ret += snprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" @@ -53,6 +53,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, m_pin[i].in_use ? "Used" : "Unused", m_pin[i].is_dynamic ? "Dynamic" : "Static", m_pin[i].pin_state, i); + size += ret; + } return ret; } -- GitLab From 7c4e080113704692c075a1cd98fe98d9dcf51e9c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 12:17:36 +0100 Subject: [PATCH 0413/1278] ASoC: intel: skl: Fix possible buffer overflow in debug outputs commit 549cd0ba04dcfe340c349cd983bd440480fae8ee upstream. The debugfs output of intel skl driver writes strings with multiple snprintf() calls with the fixed size. This was supposed to avoid the buffer overflow but actually it still would, because snprintf() returns the expected size to be output, not the actual output size. Fix it by replacing snprintf() calls with scnprintf(). Fixes: d14700a01f91 ("ASoC: Intel: Skylake: Debugfs facility to dump module config") Signed-off-by: Takashi Iwai Acked-by: Cezary Rojewski Link: https://lore.kernel.org/r/20200218111737.14193-3-tiwai@suse.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-debug.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 9e38f2afa084..71c6bbf37b6c 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -43,7 +43,7 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, ssize_t ret = 0; for (i = 0; i < max_pin; i++) { - ret += snprintf(buf + size, MOD_BUF - size, + ret += scnprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" "\tState %d\n\tIndex %d\n", @@ -61,7 +61,7 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf, ssize_t size, bool direction) { - return snprintf(buf + size, MOD_BUF - size, + return scnprintf(buf + size, MOD_BUF - size, "%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t" "Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t" "Sample Type %d\n\tCh Map %#x\n", @@ -83,16 +83,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" + ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" "\tInstance id %d\n\tPvt_id %d\n", mconfig->guid, mconfig->id.module_id, mconfig->id.instance_id, mconfig->id.pvt_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Resources:\n\tMCPS %#x\n\tIBS %#x\n\tOBS %#x\t\n", mconfig->mcps, mconfig->ibs, mconfig->obs); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module data:\n\tCore %d\n\tIn queue %d\n\t" "Out queue %d\n\tType %s\n", mconfig->core_id, mconfig->max_in_queue, @@ -102,38 +102,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true); ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Fixup:\n\tParams %#x\n\tConverter %#x\n", mconfig->params_fixup, mconfig->converter); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n", mconfig->dev_type, mconfig->vbus_id, mconfig->hw_conn_type, mconfig->time_slot); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t" "Pages %#x\n", mconfig->pipe->ppl_id, mconfig->pipe->pipe_priority, mconfig->pipe->conn_type, mconfig->pipe->memory_pages); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n", mconfig->pipe->p_params->host_dma_id, mconfig->pipe->p_params->link_dma_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n", mconfig->pipe->p_params->ch, mconfig->pipe->p_params->s_freq, mconfig->pipe->p_params->s_fmt); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tLink %#x\n\tStream %#x\n", mconfig->pipe->p_params->linktype, mconfig->pipe->p_params->stream); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tState %d\n\tPassthru %s\n", mconfig->pipe->state, mconfig->pipe->passthru ? "true" : "false"); @@ -143,7 +143,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_pins(mconfig->m_out_pin, buf, mconfig->max_out_queue, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Other:\n\tDomain %d\n\tHomogenous Input %s\n\t" "Homogenous Output %s\n\tIn Queue Mask %d\n\t" "Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t" @@ -201,7 +201,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf, __ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2); for (offset = 0; offset < FW_REG_SIZE; offset += 16) { - ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); + ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4, tmp + ret, FW_REG_BUF - ret, 0); ret += strlen(tmp + ret); -- GitLab From 6dfcfe0c07fa24f6d601feb3499746c8a4f6102b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 12:17:37 +0100 Subject: [PATCH 0414/1278] ASoC: pcm: Fix possible buffer overflow in dpcm state sysfs output commit 6c89ffea60aa3b2a33ae7987de1e84bfb89e4c9e upstream. dpcm_show_state() invokes multiple snprintf() calls to concatenate formatted strings on the fixed size buffer. The usage of snprintf() is supposed for avoiding the buffer overflow, but it doesn't work as expected because snprintf() doesn't return the actual output size but the size to be written. Fix this bug by replacing all snprintf() calls with scnprintf() calls. Fixes: f86dcef87b77 ("ASoC: dpcm: Add debugFS support for DPCM") Signed-off-by: Takashi Iwai Acked-by: Cezary Rojewski Link: https://lore.kernel.org/r/20200218111737.14193-4-tiwai@suse.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 89f772ed4705..e75822dd9930 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2957,16 +2957,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, ssize_t offset = 0; /* FE state */ - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "[%s - %s]\n", fe->dai_link->name, stream ? "Capture" : "Playback"); - offset += snprintf(buf + offset, size - offset, "State: %s\n", + offset += scnprintf(buf + offset, size - offset, "State: %s\n", dpcm_state_string(fe->dpcm[stream].state)); if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), @@ -2974,10 +2974,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_rate(params)); /* BEs state */ - offset += snprintf(buf + offset, size - offset, "Backends:\n"); + offset += scnprintf(buf + offset, size - offset, "Backends:\n"); if (list_empty(&fe->dpcm[stream].be_clients)) { - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " No active DSP links\n"); goto out; } @@ -2986,16 +2986,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "- %s\n", be->dai_link->name); - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " State: %s\n", dpcm_state_string(be->dpcm[stream].state)); if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), -- GitLab From 5c17ed40a9a0fb37b07b461eeb11b4d48ab4206b Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Thu, 20 Feb 2020 21:29:56 +0100 Subject: [PATCH 0415/1278] ASoC: pcm512x: Fix unbalanced regulator enable call in probe error path commit ac0a68997935c4acb92eaae5ad8982e0bb432d56 upstream. When we get a clock error during probe we have to call regulator_bulk_disable before bailing out, otherwise we trigger a warning in regulator_put. Fix this by using "goto err" like in the error cases above. Fixes: 5a3af1293194d ("ASoC: pcm512x: Add PCM512x driver") Signed-off-by: Matthias Reichl Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200220202956.29233-1-hias@horus.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/pcm512x.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 68feae262476..940bdc30753d 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1438,13 +1438,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) } pcm512x->sclk = devm_clk_get(dev, NULL); - if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err; + } if (!IS_ERR(pcm512x->sclk)) { ret = clk_prepare_enable(pcm512x->sclk); if (ret != 0) { dev_err(dev, "Failed to enable SCLK: %d\n", ret); - return ret; + goto err; } } -- GitLab From 9b2c4c1b2c114c3bbe69351a91213f2aa204a6fc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Feb 2020 15:31:45 +0000 Subject: [PATCH 0416/1278] ASoC: dapm: Correct DAPM handling of active widgets during shutdown commit 9b3193089e77d3b59b045146ff1c770dd899acb1 upstream. commit c2caa4da46a4 ("ASoC: Fix widget powerdown on shutdown") added a set of the power state during snd_soc_dapm_shutdown to ensure the widgets powered off. However, when commit 39eb5fd13dff ("ASoC: dapm: Delay w->power update until the changes are written") added the new_power member of the widget structure, to differentiate between the current power state and the target power state, it did not update the shutdown to use the new_power member. As new_power has not updated it will be left in the state set by the last DAPM sequence, ie. 1 for active widgets. So as the DAPM sequence for the shutdown proceeds it will turn the widgets on (despite them already being on) rather than turning them off. Fixes: 39eb5fd13dff ("ASoC: dapm: Delay w->power update until the changes are written") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200228153145.21013-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 104d5f487c7d..fb2fef166672 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4481,7 +4481,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm) continue; if (w->power) { dapm_seq_insert(w, &down_list, false); - w->power = 0; + w->new_power = 0; powerdown = 1; } } -- GitLab From 8457a77611f784abb4b02d01e0e97a1ad3139c8c Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Mon, 2 Mar 2020 19:16:14 +0100 Subject: [PATCH 0417/1278] RDMA/iwcm: Fix iwcm work deallocation commit 810dbc69087b08fd53e1cdd6c709f385bc2921ad upstream. The dealloc_work_entries() function must update the work_free_list pointer while freeing its entries, since potentially called again on same list. A second iteration of the work list caused system crash. This happens, if work allocation fails during cma_iw_listen() and free_cm_id() tries to free the list again during cleanup. Fixes: 922a8e9fb2e0 ("RDMA: iWARP Connection Manager.") Link: https://lore.kernel.org/r/20200302181614.17042-1-bmt@zurich.ibm.com Reported-by: syzbot+cb0c054eabfba4342146@syzkaller.appspotmail.com Signed-off-by: Bernard Metzler Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/iwcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 30d7277249b8..16b0c10348e8 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -158,8 +158,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) -- GitLab From 3c1099c80cd1cefd205c604e22b0e69c6728a7ad Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 21 Feb 2020 15:20:26 +0000 Subject: [PATCH 0418/1278] RMDA/cm: Fix missing ib_cm_destroy_id() in ib_cm_insert_listen() commit c14dfddbd869bf0c2bafb7ef260c41d9cebbcfec upstream. The algorithm pre-allocates a cm_id since allocation cannot be done while holding the cm.lock spinlock, however it doesn't free it on one error path, leading to a memory leak. Fixes: 067b171b8679 ("IB/cm: Share listening CM IDs") Link: https://lore.kernel.org/r/20200221152023.GA8680@ziepe.ca Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2af79e4f3235..80a8eb7e5d6e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1143,6 +1143,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); -- GitLab From de374e15e8d82104765cbebfe88bf2bd8b3b8698 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 25 Feb 2020 14:54:45 -0500 Subject: [PATCH 0419/1278] IB/hfi1, qib: Ensure RCU is locked when accessing list commit 817a68a6584aa08e323c64283fec5ded7be84759 upstream. The packet handling function, specifically the iteration of the qp list for mad packet processing misses locking RCU before running through the list. Not only is this incorrect, but the list_for_each_entry_rcu() call can not be called with a conditional check for lock dependency. Remedy this by invoking the rcu lock and unlock around the critical section. This brings MAD packet processing in line with what is done for non-MAD packets. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://lore.kernel.org/r/20200225195445.140896.41873.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/verbs.c | 4 +++- drivers/infiniband/hw/qib/qib_verbs.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ad78b471c112..b962dbcfe9a7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -593,10 +593,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -605,6 +606,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 350bc29a066f..b473df8eea1a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -360,8 +360,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. -- GitLab From 09192ee93dc01a38f479bfa5a0d9a5ddbc8730c7 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 16 Jan 2020 15:18:49 +0100 Subject: [PATCH 0420/1278] ARM: imx: build v7_cpu_resume() unconditionally commit 512a928affd51c2dc631401e56ad5ee5d5dd68b6 upstream. This function is not only needed by the platform suspend code, but is also reused as the CPU resume function when the ARM cores can be powered down completely in deep idle, which is the case on i.MX6SX and i.MX6UL(L). Providing the static inline stub whenever CONFIG_SUSPEND is disabled means that those platforms will hang on resume from cpuidle if suspend is disabled. So there are two problems: - The static inline stub masks the linker error - The function is not available where needed Fix both by just building the function unconditionally, when CONFIG_SOC_IMX6 is enabled. The actual code is three instructions long, so it's arguably ok to just leave it in for all i.MX6 kernel configurations. Fixes: 05136f0897b5 ("ARM: imx: support arm power off in cpuidle for i.mx6sx") Signed-off-by: Lucas Stach Signed-off-by: Ahmad Fatoum Signed-off-by: Rouven Czerwinski Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/Makefile | 2 ++ arch/arm/mach-imx/common.h | 4 ++-- arch/arm/mach-imx/resume-imx6.S | 24 ++++++++++++++++++++++++ arch/arm/mach-imx/suspend-imx6.S | 14 -------------- 4 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 arch/arm/mach-imx/resume-imx6.S diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 8ff71058207d..8cf1a98785a5 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -87,6 +87,8 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index b09a2ec19267..4b318c864446 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -111,17 +111,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 000000000000..5bd1ba7ef15b --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 76ee2ceec8d5..7d84b617af48 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -333,17 +333,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) -- GitLab From 5c5cdae8fb84b06e6145cd4f649385db074b56d3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Mar 2020 13:16:08 +0300 Subject: [PATCH 0421/1278] hwmon: (adt7462) Fix an error return in ADT7462_REG_VOLT() commit 44f2f882909fedfc3a56e4b90026910456019743 upstream. This is only called from adt7462_update_device(). The caller expects it to return zero on error. I fixed a similar issue earlier in commit a4bf06d58f21 ("hwmon: (adt7462) ADT7462_REG_VOLT_MAX() should return 0") but I missed this one. Fixes: c0b4e3ab0c76 ("adt7462: new hwmon driver") Signed-off-by: Dan Carpenter Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200303101608.kqjwfcazu2ylhi2a@kili.mountain Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/adt7462.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 19f2a6d48bac..bdd7679fd298 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -426,7 +426,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which) return 0x95; break; } - return -ENODEV; + return 0; } /* Provide labels for sysfs */ -- GitLab From 956b5b1c038557d2d2168865846462b03087593e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Feb 2020 17:40:50 +0300 Subject: [PATCH 0422/1278] dmaengine: coh901318: Fix a double lock bug in dma_tc_handle() commit 36d5d22090d13fd3a7a8c9663a711cbe6970aac8 upstream. The caller is already holding the lock so this will deadlock. Fixes: 0b58828c923e ("DMAENGINE: COH 901 318 remove irq counting") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200217144050.3i4ymbytogod4ijn@kili.mountain Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/coh901318.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 6d7d2d54eacf..f0932f25a9b1 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1944,8 +1944,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) return; } - spin_lock(&cohc->lock); - /* * When we reach this point, at least one queue item * should have been moved over from cohc->queue to @@ -1966,8 +1964,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - spin_unlock(&cohc->lock); - /* * This tasklet will remove items from cohc->active * and thus terminates them. -- GitLab From d78934442b62f345338ab4370d3d875ad1891686 Mon Sep 17 00:00:00 2001 From: "Desnes A. Nunes do Rosario" Date: Thu, 27 Feb 2020 10:47:15 -0300 Subject: [PATCH 0423/1278] powerpc: fix hardware PMU exception bug on PowerVM compatibility mode systems commit fc37a1632d40c80c067eb1bc235139f5867a2667 upstream. PowerVM systems running compatibility mode on a few Power8 revisions are still vulnerable to the hardware defect that loses PMU exceptions arriving prior to a context switch. The software fix for this issue is enabled through the CPU_FTR_PMAO_BUG cpu_feature bit, nevertheless this bit also needs to be set for PowerVM compatibility mode systems. Fixes: 68f2f0d431d9ea4 ("powerpc: Add a cpu feature CPU_FTR_PMAO_BUG") Signed-off-by: Desnes A. Nunes do Rosario Reviewed-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200227134715.9715-1-desnesn@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cputable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index da4b0e379238..6ef41e823013 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2232,11 +2232,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } -- GitLab From ec6ddb1aa82fb7fae89ce485ee874eae26de56a8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 17 Feb 2020 07:43:03 -0500 Subject: [PATCH 0424/1278] dm integrity: fix a deadlock due to offloading to an incorrect workqueue commit 53770f0ec5fd417429775ba006bc4abe14002335 upstream. If we need to perform synchronous I/O in dm_integrity_map_continue(), we must make sure that we are not in the map function - in order to avoid the deadlock due to bio queuing in generic_make_request. To avoid the deadlock, we offload the request to metadata_wq. However, metadata_wq also processes metadata updates for write requests. If there are too many requests that get offloaded to metadata_wq at the beginning of dm_integrity_map_continue, the workqueue metadata_wq becomes clogged and the system is incapable of processing any metadata updates. This causes a deadlock because all the requests that need to do metadata updates wait for metadata_wq to proceed and metadata_wq waits inside wait_and_add_new_range until some existing request releases its range lock (which doesn't happen because the range lock is released after metadata update). In order to fix the deadlock, we create a new workqueue offload_wq and offload requests to it - so that processing of offload_wq is independent from processing of metadata_wq. Fixes: 7eada909bfd7 ("dm: add integrity target") Cc: stable@vger.kernel.org # v4.12+ Reported-by: Heinz Mauelshagen Tested-by: Heinz Mauelshagen Signed-off-by: Heinz Mauelshagen Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 23f0f4eaaa2e..b6ca5b1100db 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -187,6 +187,7 @@ struct dm_integrity_c { struct rb_root in_progress; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; + struct workqueue_struct *offload_wq; unsigned char commit_seq; commit_id_t commit_ids[N_COMMIT_IDS]; @@ -1157,7 +1158,7 @@ static void dec_in_flight(struct dm_integrity_io *dio) dio->range.logical_sector += dio->range.n_sectors; bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } do_endio_flush(ic, dio); @@ -1577,7 +1578,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (need_sync_io && from_map) { INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->metadata_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } @@ -3005,6 +3006,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, + METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->offload_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); if (!ic->commit_wq) { ti->error = "Cannot allocate workqueue"; @@ -3189,6 +3198,8 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) destroy_workqueue(ic->wait_wq); + if (ic->offload_wq) + destroy_workqueue(ic->offload_wq); if (ic->commit_wq) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) -- GitLab From df4bf4dceb3a9fd91dc85b3d7f6ca1a0afdf8a75 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 9 Nov 2018 17:21:18 +0200 Subject: [PATCH 0425/1278] xhci: handle port status events for removed USB3 hcd commit 1245374e9b8340fc255fd51b2015173a83050d03 upstream. At xhci removal the USB3 hcd (shared_hcd) is removed before the primary USB2 hcd. Interrupts for port status changes may still occur for USB3 ports after the shared_hcd is freed, causing NULL pointer dereference. Check if xhci->shared_hcd is still valid before handing USB3 port events Cc: Reported-by: Peter Chen Tested-by: Jack Pham Signed-off-by: Mathias Nyman Cc: Macpaul Lin [redone for 4.14.y based on Mathias's comments] Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 868878f5b72b..97cf8e1fc07c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1640,6 +1640,12 @@ static void handle_port_status(struct xhci_hcd *xhci, if ((major_revision == 0x03) != (hcd->speed >= HCD_USB3)) hcd = xhci->shared_hcd; + if (!hcd) { + xhci_dbg(xhci, "No hcd found for port %u event\n", port_id); + bogus_port_status = true; + goto cleanup; + } + if (major_revision == 0) { xhci_warn(xhci, "Event for port %u not in " "Extended Capabilities, ignoring.\n", -- GitLab From 30238068123e304f02f594726451e3e93a06ce42 Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Fri, 7 Feb 2020 20:53:25 +0200 Subject: [PATCH 0426/1278] ASoC: topology: Fix memleak in soc_tplg_manifest_load() commit 242c46c023610dbc0213fc8fb6b71eb836bc5d95 upstream. In case of ABI version mismatch, _manifest needs to be freed as it is just a copy of the original topology manifest. However, if a driver manifest handler is defined, that would get executed and the cleanup is never reached. Fix that by getting the return status of manifest() instead of returning directly. Fixes: 583958fa2e52 ("ASoC: topology: Make manifest backward compatible from ABI v4") Signed-off-by: Dragos Tarcatu Link: https://lore.kernel.org/r/20200207185325.22320-3-dragos_tarcatu@mentor.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-topology.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 0f91b4ed6814..1a912f72bddd 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2333,7 +2333,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, { struct snd_soc_tplg_manifest *manifest, *_manifest; bool abi_match; - int err; + int ret = 0; if (tplg->pass != SOC_TPLG_PASS_MANIFEST) return 0; @@ -2346,19 +2346,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - err = manifest_new_ver(tplg, manifest, &_manifest); - if (err < 0) - return err; + ret = manifest_new_ver(tplg, manifest, &_manifest); + if (ret < 0) + return ret; } /* pass control to component driver for optional further init */ if (tplg->comp && tplg->ops && tplg->ops->manifest) - return tplg->ops->manifest(tplg->comp, _manifest); + ret = tplg->ops->manifest(tplg->comp, _manifest); if (!abi_match) /* free the duplicated one */ kfree(_manifest); - return 0; + return ret; } /* validate header magic, size and type */ -- GitLab From 12cd844a39ed16aa183a820a54fe6f9a0bb4cd14 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Mar 2020 18:03:09 +0100 Subject: [PATCH 0427/1278] Linux 4.14.173 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d3cecad7f1e..9a524b5c1d55 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 172 +SUBLEVEL = 173 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 53f2446e648cc6fb8650d6c68e01fa0a2a7f015c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Mar 2020 00:41:38 -0800 Subject: [PATCH 0428/1278] fscrypt: don't evict dirty inodes after removing key After FS_IOC_REMOVE_ENCRYPTION_KEY removes a key, it syncs the filesystem and tries to get and put all inodes that were unlocked by the key so that unused inodes get evicted via fscrypt_drop_inode(). Normally, the inodes are all clean due to the sync. However, after the filesystem is sync'ed, userspace can modify and close one of the files. (Userspace is *supposed* to close the files before removing the key. But it doesn't always happen, and the kernel can't assume it.) This causes the inode to be dirtied and have i_count == 0. Then, fscrypt_drop_inode() failed to consider this case and indicated that the inode can be dropped, causing the write to be lost. On f2fs, other problems such as a filesystem freeze could occur due to the inode being freed while still on f2fs's dirty inode list. Fix this bug by making fscrypt_drop_inode() only drop clean inodes. I've written an xfstest which detects this bug on ext4, f2fs, and ubifs. Fixes: b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") Cc: # v5.4+ Link: https://lore.kernel.org/r/20200305084138.653498-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index f9ab21c778e1..84039a163585 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -538,6 +538,15 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with -- GitLab From 9e469e717b409592a9ca42a1203a267e56491446 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Mar 2020 00:41:38 -0800 Subject: [PATCH 0429/1278] UPSTREAM: fscrypt: don't evict dirty inodes after removing key After FS_IOC_REMOVE_ENCRYPTION_KEY removes a key, it syncs the filesystem and tries to get and put all inodes that were unlocked by the key so that unused inodes get evicted via fscrypt_drop_inode(). Normally, the inodes are all clean due to the sync. However, after the filesystem is sync'ed, userspace can modify and close one of the files. (Userspace is *supposed* to close the files before removing the key. But it doesn't always happen, and the kernel can't assume it.) This causes the inode to be dirtied and have i_count == 0. Then, fscrypt_drop_inode() failed to consider this case and indicated that the inode can be dropped, causing the write to be lost. On f2fs, other problems such as a filesystem freeze could occur due to the inode being freed while still on f2fs's dirty inode list. Fix this bug by making fscrypt_drop_inode() only drop clean inodes. I've written an xfstest which detects this bug on ext4, f2fs, and ubifs. Fixes: b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") Cc: # v5.4+ Link: https://lore.kernel.org/r/20200305084138.653498-1-ebiggers@kernel.org Signed-off-by: Eric Biggers (cherry picked from commit 2b4eae95c7361e0a147b838715c8baa1380a428f) Bug: 150589360 Test: kvm-xfstests -c ext4,f2fs -g encrypt Change-Id: Ia32db980c2fffb68caeaf9f38e5cfbe781b45011 Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index c289f4e32186..f4c6d8cb4587 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -607,6 +607,15 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with -- GitLab From 1540e7955b923e0dddf33343031b951c922acd9d Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Wed, 4 Mar 2020 12:03:23 -0800 Subject: [PATCH 0430/1278] ANDROID: serdev: restrict claim of platform devices Make the fallback path for claiming platform devices trigger only if a new module parameter is specified: serdev_ttyport.pdev_tty_port=ttyS2 Bug: 146517987 Change-Id: Ibf331ad6e6d8712a405921530f217f7122428b13 Signed-off-by: Alistair Delva --- drivers/tty/serdev/core.c | 15 +++++++++------ drivers/tty/serdev/serdev-ttyport.c | 26 +++++++++++++++++++++++++- include/linux/serdev.h | 14 +++++++++++++- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index f513107b9ea0..e6c9ff65402a 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -452,16 +452,18 @@ static int platform_serdev_register_devices(struct serdev_controller *ctrl) return err; } + /** - * serdev_controller_add() - Add an serdev controller + * serdev_controller_add_platform() - Add an serdev controller * @ctrl: controller to be registered. + * @platform: whether to permit fallthrough to platform device probe * * Register a controller previously allocated via serdev_controller_alloc() with - * the serdev core. + * the serdev core. Optionally permit probing via a platform device fallback. */ -int serdev_controller_add(struct serdev_controller *ctrl) +int serdev_controller_add_platform(struct serdev_controller *ctrl, bool platform) { - int ret_of, ret_platform, ret; + int ret, ret_of, ret_platform = -ENODEV; /* Can't register until after driver model init */ if (WARN_ON(!is_registered)) @@ -471,8 +473,9 @@ int serdev_controller_add(struct serdev_controller *ctrl) if (ret) return ret; - ret_platform = platform_serdev_register_devices(ctrl); ret_of = of_serdev_register_devices(ctrl); + if (platform) + ret_platform = platform_serdev_register_devices(ctrl); if (ret_of && ret_platform) { dev_dbg(&ctrl->dev, "no devices registered: of:%d " "platform:%d\n", @@ -489,7 +492,7 @@ int serdev_controller_add(struct serdev_controller *ctrl) device_del(&ctrl->dev); return ret; }; -EXPORT_SYMBOL_GPL(serdev_controller_add); +EXPORT_SYMBOL_GPL(serdev_controller_add_platform); /* Remove a device associated with a controller */ static int serdev_remove_device(struct device *dev, void *data) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 88cf520da739..0669e18ff879 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -15,9 +15,15 @@ #include #include #include +#include +#include #define SERPORT_ACTIVE 1 +static char *pdev_tty_port; +module_param(pdev_tty_port, charp, 0644); +MODULE_PARM_DESC(pdev_tty_port, "platform device tty port to claim"); + struct serport { struct tty_port *port; struct tty_struct *tty; @@ -240,6 +246,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, { struct serdev_controller *ctrl; struct serport *serport; + bool platform = false; int ret; if (!port || !drv || !parent) @@ -259,7 +266,24 @@ struct device *serdev_tty_port_register(struct tty_port *port, port->client_ops = &client_ops; port->client_data = ctrl; - ret = serdev_controller_add(ctrl); + /* There is not always a way to bind specific platform devices because + * they may be defined on platforms without DT or ACPI. When dealing + * with a platform devices, do not allow direct binding unless it is + * whitelisted by module parameter. If a platform device is otherwise + * described by DT or ACPI it will still be bound and this check will + * be ignored. + */ + if (parent->bus == &platform_bus_type) { + char tty_port_name[7]; + + sprintf(tty_port_name, "%s%d", drv->name, idx); + if (pdev_tty_port && + !strcmp(pdev_tty_port, tty_port_name)) { + platform = true; + } + } + + ret = serdev_controller_add_platform(ctrl, platform); if (ret) goto err_reset_data; diff --git a/include/linux/serdev.h b/include/linux/serdev.h index d609e6dc5bad..49f6e382c94e 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -164,9 +164,21 @@ int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); struct serdev_controller *serdev_controller_alloc(struct device *, size_t); -int serdev_controller_add(struct serdev_controller *); +int serdev_controller_add_platform(struct serdev_controller *, bool); void serdev_controller_remove(struct serdev_controller *); +/** + * serdev_controller_add() - Add an serdev controller + * @ctrl: controller to be registered. + * + * Register a controller previously allocated via serdev_controller_alloc() with + * the serdev core. + */ +static inline int serdev_controller_add(struct serdev_controller *ctrl) +{ + return serdev_controller_add_platform(ctrl, false); +} + static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl) { struct serdev_device *serdev = ctrl->serdev; -- GitLab From 5198da7465585d01d68785d8a43bd8b57edb91fd Mon Sep 17 00:00:00 2001 From: "A. Cody Schuffelen" Date: Tue, 11 Feb 2020 18:01:10 -0800 Subject: [PATCH 0431/1278] ANDROID: Add TPM support and the vTPM proxy to Cuttlefish. This module allows presenting the kernel TPM interface while proxying the TPM commands into a file descriptor. The module was originally implemented to support running a TPM simulator on the same host system and exposing a kernel TPM interface to a Linux container, but it is also a convenient incremental step while we figure out our long-term strategy with crosvm, which does not have TPM support following the same standards as qemu. Implicitly enables SECURITYFS via 'selects' from these new drivers. Bug: 148102533 Test: Build and run locally with cuttlefish, check for /dev/vtpmx Change-Id: I568a50c2ecb7899aae70e7a20efaedc84443511d Signed-off-by: A. Cody Schuffelen --- arch/arm64/configs/cuttlefish_defconfig | 2 ++ arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index bd0df26e4416..83f6f54d009a 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -306,6 +306,8 @@ CONFIG_SERIAL_DEV_BUS=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CAVIUM is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index ad10c87900ca..9d2d0f051085 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -327,6 +327,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HPET=y # CONFIG_HPET_MMAP_DEFAULT is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_ACPI_I2C_OPREGION is not set # CONFIG_I2C_COMPAT is not set -- GitLab From 3419228ff5f9a17b7c7342221e92fd8d671d99d2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 19 Feb 2020 12:31:56 +0000 Subject: [PATCH 0432/1278] UPSTREAM: mm: Avoid creating virtual address aliases in brk()/mmap()/mremap() (Upstream commit dcde237319e626d1ec3c9d8b7613032f0fd4663a.) Currently the arm64 kernel ignores the top address byte passed to brk(), mmap() and mremap(). When the user is not aware of the 56-bit address limit or relies on the kernel to return an error, untagging such pointers has the potential to create address aliases in user-space. Passing a tagged address to munmap(), madvise() is permitted since the tagged pointer is expected to be inside an existing mapping. The current behaviour breaks the existing glibc malloc() implementation which relies on brk() with an address beyond 56-bit to be rejected by the kernel. Remove untagging in the above functions by partially reverting commit ce18d171cb73 ("mm: untag user pointers in mmap/munmap/mremap/brk"). In addition, update the arm64 tagged-address-abi.rst document accordingly. Link: https://bugzilla.redhat.com/1797052 Fixes: ce18d171cb73 ("mm: untag user pointers in mmap/munmap/mremap/brk") Cc: # 5.4.x- Cc: Florian Weimer Reviewed-by: Andrew Morton Reported-by: Victor Stinner Acked-by: Will Deacon Acked-by: Andrey Konovalov Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Bug: 135692346 Change-Id: Iadeceb2d5d5fb576ab1bb5ae1a67f4971bbbf88e Signed-off-by: Andrey Konovalov --- Documentation/arm64/tagged-address-abi.rst | 11 +++++++++-- mm/mmap.c | 4 ---- mm/mremap.c | 1 - 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf9..4a9d9c794ee5 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with the exception of + ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/mm/mmap.c b/mm/mmap.c index e19a8fa081f6..6afba0872180 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -184,8 +184,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool populate; LIST_HEAD(uf); - brk = untagged_addr(brk); - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -1522,8 +1520,6 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, struct file *file = NULL; unsigned long retval; - addr = untagged_addr(addr); - if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); diff --git a/mm/mremap.c b/mm/mremap.c index 6cd85324de7f..e9990c1afd60 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -530,7 +530,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, LIST_HEAD(uf_unmap); addr = untagged_addr(addr); - new_addr = untagged_addr(new_addr); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; -- GitLab From df4f18795afa02f8f55720a191e9e156f9655d32 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Feb 2020 10:19:13 +0000 Subject: [PATCH 0433/1278] UPSTREAM: arm64: memory: Add missing brackets to untagged_addr() macro (Upstream commit d0022c0ef29b78bcbe8a5c5894bd2307143afce1.) Add brackets around the evaluation of the 'addr' parameter to the untagged_addr() macro so that the cast to 'u64' applies to the result of the expression. Cc: Fixes: 597399d0cb91 ("arm64: tags: Preserve tags for addresses translated via TTBR1") Reported-by: Linus Torvalds Signed-off-by: Will Deacon Bug: 135692346 Change-Id: I1bce8f6a185258a365aaa292483fabc02519301f Signed-off-by: Andrey Konovalov --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8f7f30fec6be..d4f49599c5db 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -219,7 +219,7 @@ static inline unsigned long kaslr_offset(void) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #define untagged_addr(addr) ({ \ - u64 __addr = (__force u64)addr; \ + u64 __addr = (__force u64)(addr); \ __addr &= __untagged_addr(__addr); \ (__force __typeof__(addr))__addr; \ }) -- GitLab From 67ea9300c746ed5a29c19ff30dc51aa9b849e517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 24 Jan 2020 15:07:27 -0800 Subject: [PATCH 0434/1278] UPSTREAM: cgroup: Iterate tasks that did not finish do_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PF_EXITING is set earlier than actual removal from css_set when a task is exitting. This can confuse cgroup.procs readers who see no PF_EXITING tasks, however, rmdir is checking against css_set membership so it can transitionally fail with EBUSY. Fix this by listing tasks that weren't unlinked from css_set active lists. It may happen that other users of the task iterator (without CSS_TASK_ITER_PROCS) spot a PF_EXITING task before cgroup_exit(). This is equal to the state before commit c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") but it may be reviewed later. Reported-by: Suren Baghdasaryan Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") Signed-off-by: Michal Koutný (cherry picked from commit 9c974c77246460fa6a92c18554c3311c8c83c160) Bug: 141213848 Bug: 146758430 Test: test_cgcore_destroy from linux-kselftest Signed-off-by: Suren Baghdasaryan Change-Id: Iac57661b931129ed1e44b89675f8115bb89084ff (cherry picked from commit 21ee296526c70d6dc3c64639406f156f39b80fd0) --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup.c | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index de0f5fe28490..a22949de5b40 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -61,6 +61,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ea287f53c3a3..3ef1680d9c1c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4141,12 +4141,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4204,10 +4208,14 @@ static void css_task_iter_advance(struct css_task_iter *it) else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4226,11 +4234,12 @@ static void css_task_iter_advance(struct css_task_iter *it) goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } -- GitLab From 89e090543660aef3da90faa341be0932a66ac4a4 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 16 Mar 2020 13:47:17 -0700 Subject: [PATCH 0435/1278] ANDROID: Incremental fs: Remove all access_ok checks They provide no value and simply duplicate a check in copy_from/to_user Test: incfs_test passes Bug: 138149732 Signed-off-by: Paul Lawrence Change-Id: Icc6054a2d6a495c9a03cd1507dda1ab8ca0b0dc4 --- fs/incfs/vfs.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 22edaeeaa613..46e2617e4c25 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -465,9 +465,6 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, ssize_t result = 0; int i = 0; - if (!access_ok(VERIFY_WRITE, buf, len)) - return -EFAULT; - if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) return 0; @@ -859,9 +856,6 @@ static struct signature_info *incfs_copy_signature_info_from_user( if (!original) return NULL; - if (!access_ok(VERIFY_READ, original, sizeof(usr_si))) - return ERR_PTR(-EFAULT); - if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) return ERR_PTR(-EFAULT); @@ -1191,10 +1185,7 @@ static long ioctl_create_file(struct mount_info *mi, error = -EFAULT; goto out; } - if (!access_ok(VERIFY_READ, usr_args, sizeof(args))) { - error = -EFAULT; - goto out; - } + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { error = -EFAULT; goto out; @@ -1320,12 +1311,6 @@ static long ioctl_create_file(struct mount_info *mi, goto delete_index_file; } - if (!access_ok(VERIFY_READ, u64_to_user_ptr(args.file_attr), - args.file_attr_len)) { - error = -EFAULT; - goto delete_index_file; - } - if (copy_from_user(attr_value, u64_to_user_ptr(args.file_attr), args.file_attr_len) > 0) { @@ -1387,15 +1372,9 @@ static long ioctl_read_file_signature(struct file *f, void __user *arg) if (!df) return -EINVAL; - if (!access_ok(VERIFY_READ, args_usr_ptr, sizeof(args))) - return -EFAULT; if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) return -EINVAL; - if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(args.file_signature), - args.file_signature_buf_size)) - return -EFAULT; - sig_buf_size = args.file_signature_buf_size; if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) return -E2BIG; @@ -1917,9 +1896,6 @@ static ssize_t file_write(struct file *f, const char __user *buf, if (!df) return -EBADF; - if (!access_ok(VERIFY_READ, usr_blocks, size)) - return -EFAULT; - data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); if (!data_buf) return -ENOMEM; @@ -1936,11 +1912,7 @@ static ssize_t file_write(struct file *f, const char __user *buf, error = -E2BIG; break; } - if (!access_ok(VERIFY_READ, u64_to_user_ptr(block.data), - block.data_len)) { - error = -EFAULT; - break; - } + if (copy_from_user(data_buf, u64_to_user_ptr(block.data), block.data_len) > 0) { error = -EFAULT; -- GitLab From dd3909c4a7fc7288f24af4bcb251836d135608cc Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 10 Mar 2020 13:03:38 -0700 Subject: [PATCH 0436/1278] ANDROID: Incremental fs: Make fill block an ioctl Filling blocks is not equivalent to writing a file, since they are constrained by the root hash. selinux policy may wish to treat them differently, for instance. Test: incfs_test passes Bug: 138149732 Signed-off-by: Paul Lawrence Change-Id: Ic369b84b92547b1cfefe422bd881c4e466090aed --- fs/incfs/data_mgmt.c | 4 +- fs/incfs/data_mgmt.h | 5 +- fs/incfs/vfs.c | 138 +++++++++--------- include/uapi/linux/incrementalfs.h | 28 +++- .../selftests/filesystems/incfs/incfs_test.c | 56 +++---- 5 files changed, 118 insertions(+), 113 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 90bf9e37d236..eb4e32040f4e 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -699,7 +699,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, } int incfs_process_new_data_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data) + struct incfs_fill_block *block, u8 *data) { struct mount_info *mi = NULL; struct backing_file_context *bfc = NULL; @@ -781,7 +781,7 @@ int incfs_read_file_signature(struct data_file *df, struct mem_range dst) } int incfs_process_new_hash_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data) + struct incfs_fill_block *block, u8 *data) { struct backing_file_context *bfc = NULL; struct mount_info *mi = NULL; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 8b62b0348d51..5ce1966b4fc5 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -262,11 +262,10 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, int incfs_read_file_signature(struct data_file *df, struct mem_range dst); int incfs_process_new_data_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data); + struct incfs_fill_block *block, u8 *data); int incfs_process_new_hash_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data); - + struct incfs_fill_block *block, u8 *data); bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 46e2617e4c25..04f292e3377b 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -52,8 +52,6 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, static int file_open(struct inode *inode, struct file *file); static int file_release(struct inode *inode, struct file *file); -static ssize_t file_write(struct file *f, const char __user *buf, - size_t size, loff_t *offset); static int read_single_page(struct file *f, struct page *page); static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); @@ -129,7 +127,6 @@ static const struct address_space_operations incfs_address_space_ops = { static const struct file_operations incfs_file_ops = { .open = file_open, .release = file_release, - .write = file_write, .read_iter = generic_file_read_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, @@ -797,9 +794,6 @@ static int read_single_page(struct file *f, struct page *page) size = df->df_size; timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; - pr_debug("incfs: %s %s %lld\n", __func__, - f->f_path.dentry->d_name.name, offset); - if (offset < size) { struct mem_range tmp = { .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE @@ -1359,6 +1353,72 @@ static long ioctl_create_file(struct mount_info *mi, return error; } +static long ioctl_fill_blocks(struct file *f, void __user *arg) +{ + struct incfs_fill_blocks __user *usr_fill_blocks = arg; + struct incfs_fill_blocks fill_blocks; + struct incfs_fill_block *usr_fill_block_array; + struct data_file *df = get_incfs_data_file(f); + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; + u8 *data_buf = NULL; + ssize_t error = 0; + int i = 0; + + if (!df) + return -EBADF; + + if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) + return -EFAULT; + + usr_fill_block_array = u64_to_user_ptr(fill_blocks.fill_blocks); + data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + if (!data_buf) + return -ENOMEM; + + for (i = 0; i < fill_blocks.count; i++) { + struct incfs_fill_block fill_block = {}; + + if (copy_from_user(&fill_block, &usr_fill_block_array[i], + sizeof(fill_block)) > 0) { + error = -EFAULT; + break; + } + + if (fill_block.data_len > data_buf_size) { + error = -E2BIG; + break; + } + + if (copy_from_user(data_buf, u64_to_user_ptr(fill_block.data), + fill_block.data_len) > 0) { + error = -EFAULT; + break; + } + fill_block.data = 0; /* To make sure nobody uses it. */ + if (fill_block.flags & INCFS_BLOCK_FLAGS_HASH) { + error = incfs_process_new_hash_block(df, &fill_block, + data_buf); + } else { + error = incfs_process_new_data_block(df, &fill_block, + data_buf); + } + if (error) + break; + } + + if (data_buf) + free_pages((unsigned long)data_buf, get_order(data_buf_size)); + + /* + * Only report the error if no records were processed, otherwise + * just return how many were processed successfully. + */ + if (i == 0) + return error; + + return i; +} + static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1414,6 +1474,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) switch (req) { case INCFS_IOC_CREATE_FILE: return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_FILL_BLOCKS: + return ioctl_fill_blocks(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); default: @@ -1881,70 +1943,6 @@ static int file_release(struct inode *inode, struct file *file) return 0; } -static ssize_t file_write(struct file *f, const char __user *buf, - size_t size, loff_t *offset) -{ - struct data_file *df = get_incfs_data_file(f); - const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; - size_t block_count = size / sizeof(struct incfs_new_data_block); - struct incfs_new_data_block __user *usr_blocks = - (struct incfs_new_data_block __user *)buf; - u8 *data_buf = NULL; - ssize_t error = 0; - int i = 0; - - if (!df) - return -EBADF; - - data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); - if (!data_buf) - return -ENOMEM; - - for (i = 0; i < block_count; i++) { - struct incfs_new_data_block block = {}; - - if (copy_from_user(&block, &usr_blocks[i], sizeof(block)) > 0) { - error = -EFAULT; - break; - } - - if (block.data_len > data_buf_size) { - error = -E2BIG; - break; - } - - if (copy_from_user(data_buf, u64_to_user_ptr(block.data), - block.data_len) > 0) { - error = -EFAULT; - break; - } - block.data = 0; /* To make sure nobody uses it. */ - if (block.flags & INCFS_BLOCK_FLAGS_HASH) { - error = incfs_process_new_hash_block(df, &block, - data_buf); - } else { - error = incfs_process_new_data_block(df, &block, - data_buf); - } - if (error) - break; - } - - if (data_buf) - free_pages((unsigned long)data_buf, get_order(data_buf_size)); - *offset = 0; - - /* - * Only report the error if no records were processed, otherwise - * just return how many were processed successfully. - */ - if (i == 0) - return error; - - return i * sizeof(struct incfs_new_data_block); -} - - static int dentry_revalidate(struct dentry *d, unsigned int flags) { struct path backing_path = {}; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 787049031cca..81947ded482e 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -46,7 +46,15 @@ /* Read file signature */ #define INCFS_IOC_READ_FILE_SIGNATURE \ - _IOWR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + +/* + * Fill in one or more data block + * + * Returns number of blocks filled in, or error if none were + */ +#define INCFS_IOC_FILL_BLOCKS \ + _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) enum incfs_compression_alg { COMPRESSION_NONE = 0, @@ -81,10 +89,9 @@ struct incfs_pending_read_info { }; /* - * A struct to be written into a control file to load a data or hash - * block to a data file. + * Description of a data or hash block to add to a data file. */ -struct incfs_new_data_block { +struct incfs_fill_block { /* Index of a data block. */ __u32 block_index; @@ -117,6 +124,19 @@ struct incfs_new_data_block { __aligned_u64 reserved3; }; +/* + * Description of a number of blocks to add to a data file + * + * Argument for INCFS_IOC_FILL_BLOCKS + */ +struct incfs_fill_blocks { + /* Number of blocks */ + __u64 count; + + /* A pointer to an array of incfs_fill_block structs */ + __aligned_u64 fill_blocks; +}; + enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_NONE = 0, INCFS_HASH_TREE_SHA256 = 1 diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index dd70e019dc4c..5d0012b3972b 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -343,8 +343,12 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, uint8_t *data_buf = malloc(data_buf_size); uint8_t *current_data = data_buf; uint8_t *data_end = data_buf + data_buf_size; - struct incfs_new_data_block *block_buf = - calloc(block_count, sizeof(*block_buf)); + struct incfs_fill_block *block_buf = + calloc(block_count, sizeof(struct incfs_fill_block)); + struct incfs_fill_blocks fill_blocks = { + .count = block_count, + .fill_blocks = ptr_to_u64(block_buf), + }; ssize_t write_res = 0; int fd; int error = 0; @@ -404,17 +408,15 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, block_buf[i].block_index = block_index; block_buf[i].data_len = block_size; block_buf[i].data = ptr_to_u64(current_data); - block_buf[i].compression = - compress ? COMPRESSION_LZ4 : COMPRESSION_NONE; current_data += block_size; } if (!error) { - write_res = write(fd, block_buf, sizeof(*block_buf) * i); + write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); if (write_res < 0) error = -errno; else - blocks_written = write_res / sizeof(*block_buf); + blocks_written = write_res; } if (error) { ksft_print_msg( @@ -813,21 +815,22 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) int err; int i; int fd; - - size_t blocks_size = - file->mtree_block_count * sizeof(struct incfs_new_data_block); - struct incfs_new_data_block *blocks = NULL; char *file_path; + struct incfs_fill_blocks fill_blocks = { + .count = file->mtree_block_count, + }; + struct incfs_fill_block *fill_block_array = + calloc(fill_blocks.count, sizeof(struct incfs_fill_block)); - if (blocks_size == 0) + if (fill_blocks.count == 0) return 0; - blocks = malloc(blocks_size); - if (!blocks) + if (!fill_block_array) return -ENOMEM; + fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); - for (i = 0; i < file->mtree_block_count; i++) { - blocks[i] = (struct incfs_new_data_block){ + for (i = 0; i < fill_blocks.count; i++) { + fill_block_array[i] = (struct incfs_fill_block){ .block_index = i, .data_len = INCFS_DATA_FILE_BLOCK_SIZE, .data = ptr_to_u64(file->mtree[i].data), @@ -843,10 +846,10 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) goto failure; } - err = write(fd, blocks, blocks_size); + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); close(fd); - if (err < blocks_size) + if (err < fill_blocks.count) err = errno; else { err = 0; @@ -854,7 +857,7 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) } failure: - free(blocks); + free(fill_block_array); return err; } @@ -1274,13 +1277,6 @@ static int dynamic_files_and_data_test(char *mount_dir) if (i == missing_file_idx) continue; - res = load_hash_tree(mount_dir, file); - if (res) { - ksft_print_msg("Can't load hashes for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } - res = emit_test_file_data(mount_dir, file); if (res) { ksft_print_msg("Error %s emiting data for %s.\n", @@ -1479,7 +1475,6 @@ static int work_after_remount_test(char *mount_dir) /* Write first half of the data into the command file. (stage 1) */ for (i = 0; i < file_num_stage1; i++) { struct test_file *file = &test.files[i]; - int res; build_mtree(file); if (emit_file(cmd_fd, NULL, file->name, &file->id, @@ -1488,14 +1483,7 @@ static int work_after_remount_test(char *mount_dir) if (emit_test_file_data(mount_dir, file)) goto failure; - - res = load_hash_tree(mount_dir, file); - if (res) { - ksft_print_msg("Can't load hashes for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } -} + } /* Unmount and mount again, to see that data is persistent. */ close(cmd_fd); -- GitLab From 8118f34d9664148a08f77f40a47b39f819e941ee Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 19 Feb 2020 10:07:25 -0800 Subject: [PATCH 0437/1278] ANDROID: Incremental fs: Pad hash blocks Test: incfs_test passes Bug: 133435829 Signed-off-by: Paul Lawrence Change-Id: I4e6fbd0938f00e7e6883ce1a26cbfd38fdcaa9a5 --- fs/incfs/integrity.c | 14 ++++++++++++++ .../selftests/filesystems/incfs/incfs_test.c | 9 ++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index feb212c38945..1d00dda109e3 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -198,6 +198,20 @@ int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, return -EINVAL; desc->tfm = alg->shash; + + if (data.len < INCFS_DATA_FILE_BLOCK_SIZE) { + int err; + void *buf = kzalloc(INCFS_DATA_FILE_BLOCK_SIZE, GFP_NOFS); + + if (!buf) + return -ENOMEM; + + memcpy(buf, data.data, data.len); + err = crypto_shash_digest(desc, buf, INCFS_DATA_FILE_BLOCK_SIZE, + digest.data); + kfree(buf); + return err; + } return crypto_shash_digest(desc, data.data, data.len, digest.data); } diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 5d0012b3972b..22f6c7fca7de 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -750,8 +750,9 @@ static int build_mtree(struct test_file *file) if (block_count == 1) { int seed = get_file_block_seed(file->index, 0); + memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); rnd_buf((uint8_t *)data, file->size, seed); - sha256(data, file->size, file->root_hash); + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); return 0; } @@ -766,11 +767,13 @@ static int build_mtree(struct test_file *file) int seed = get_file_block_seed(file->index, i); char *hash_ptr = file->mtree[block_index].data + block_off; - if (file->size - offset < block_size) + if (file->size - offset < block_size) { block_size = file->size - offset; + memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); + } rnd_buf((uint8_t *)data, block_size, seed); - sha256(data, block_size, hash_ptr); + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); } /* Build higher levels of hash tree. */ -- GitLab From 758073bec37716c23f4711fbed1bd4b9a21cbb96 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 13 Mar 2020 12:38:35 -0700 Subject: [PATCH 0438/1278] ANDROID: Incremental fs: Remove signature checks from kernel Test: selftests pass Bug: 133435829 Signed-off-by: Paul Lawrence Change-Id: Ia7e69b1b0176202da4b418ea815b370cbdacd5c2 --- fs/incfs/data_mgmt.c | 154 +++------- fs/incfs/data_mgmt.h | 5 +- fs/incfs/format.c | 22 +- fs/incfs/format.h | 45 +-- fs/incfs/integrity.c | 169 ++++++----- fs/incfs/integrity.h | 20 +- fs/incfs/vfs.c | 155 +++------- include/uapi/linux/incrementalfs.h | 69 ++--- .../selftests/filesystems/incfs/incfs_test.c | 275 +----------------- .../selftests/filesystems/incfs/utils.c | 229 +++++---------- .../selftests/filesystems/incfs/utils.h | 34 +-- 11 files changed, 305 insertions(+), 872 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index eb4e32040f4e..afdb3dfd3355 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -251,7 +251,7 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, { u8 digest[INCFS_MAX_HASH_SIZE] = {}; struct mtree *tree = NULL; - struct ondisk_signature *sig = NULL; + struct incfs_df_signature *sig = NULL; struct mem_range calc_digest_rng; struct mem_range saved_digest_rng; struct mem_range root_hash_rng; @@ -274,8 +274,8 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, return res; for (lvl = 0; lvl < tree->depth; lvl++) { - loff_t lvl_off = tree->hash_level_suboffset[lvl] + - sig->mtree_offset; + loff_t lvl_off = + tree->hash_level_suboffset[lvl] + sig->hash_offset; loff_t hash_block_off = lvl_off + round_down(hash_block_index * digest_size, INCFS_DATA_FILE_BLOCK_SIZE); @@ -323,72 +323,6 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, return 0; } -static int revalidate_signature(struct file *bf, struct data_file *df) -{ - struct ondisk_signature *sig = df->df_signature; - struct mem_range root_hash = {}; - int result = 0; - u8 *sig_buf = NULL; - u8 *add_data_buf = NULL; - ssize_t read_res; - - /* File has no signature. */ - if (!sig || !df->df_hash_tree || sig->sig_size == 0) - return 0; - - /* Signature has already been validated. */ - if (df->df_signature_validated) - return 0; - - add_data_buf = kzalloc(sig->add_data_size, GFP_NOFS); - if (!add_data_buf) { - result = -ENOMEM; - goto out; - } - - read_res = incfs_kread(bf, add_data_buf, sig->add_data_size, - sig->add_data_offset); - if (read_res < 0) { - result = read_res; - goto out; - } - if (read_res != sig->add_data_size) { - result = -EIO; - goto out; - } - - sig_buf = kzalloc(sig->sig_size, GFP_NOFS); - if (!sig_buf) { - result = -ENOMEM; - goto out; - } - - read_res = incfs_kread(bf, sig_buf, sig->sig_size, sig->sig_offset); - if (read_res < 0) { - result = read_res; - goto out; - } - if (read_res != sig->sig_size) { - result = -EIO; - goto out; - } - - root_hash = range(df->df_hash_tree->root_hash, - df->df_hash_tree->alg->digest_size); - - result = incfs_validate_pkcs7_signature( - range(sig_buf, sig->sig_size), - root_hash, - range(add_data_buf, sig->add_data_size)); - - if (result == 0) - df->df_signature_validated = true; -out: - kfree(sig_buf); - kfree(add_data_buf); - return result; -} - static struct data_file_segment *get_file_segment(struct data_file *df, int block_index) { @@ -684,13 +618,6 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, result = err; } - if (result > 0) { - int err = revalidate_signature(bf, df); - - if (err < 0) - result = err; - } - if (result >= 0) log_block_read(mi, &df->df_id, index, false /*timed out*/); @@ -756,7 +683,7 @@ int incfs_process_new_data_block(struct data_file *df, int incfs_read_file_signature(struct data_file *df, struct mem_range dst) { struct file *bf = df->df_backing_file_context->bc_file; - struct ondisk_signature *sig; + struct incfs_df_signature *sig; int read_res = 0; if (!dst.data) @@ -786,7 +713,7 @@ int incfs_process_new_hash_block(struct data_file *df, struct backing_file_context *bfc = NULL; struct mount_info *mi = NULL; struct mtree *hash_tree = NULL; - struct ondisk_signature *sig = NULL; + struct incfs_df_signature *sig = NULL; loff_t hash_area_base = 0; loff_t hash_area_size = 0; int error = 0; @@ -805,11 +732,11 @@ int incfs_process_new_hash_block(struct data_file *df, hash_tree = df->df_hash_tree; sig = df->df_signature; - if (!hash_tree || !sig || sig->mtree_offset == 0) + if (!hash_tree || !sig || sig->hash_offset == 0) return -ENOTSUPP; - hash_area_base = sig->mtree_offset; - hash_area_size = sig->mtree_size; + hash_area_base = sig->hash_offset; + hash_area_size = sig->hash_size; if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE + block->data_len) { /* Hash block goes beyond dedicated hash area of this file. */ @@ -867,58 +794,69 @@ static int process_file_signature_md(struct incfs_file_signature *sg, { struct data_file *df = handler->context; struct mtree *hash_tree = NULL; - struct ondisk_signature *signature = NULL; int error = 0; - loff_t base_tree_off = le64_to_cpu(sg->sg_hash_tree_offset); - u32 tree_size = le32_to_cpu(sg->sg_hash_tree_size); - loff_t sig_off = le64_to_cpu(sg->sg_sig_offset); - u32 sig_size = le32_to_cpu(sg->sg_sig_size); - loff_t add_data_off = le64_to_cpu(sg->sg_add_data_offset); - u32 add_data_size = le32_to_cpu(sg->sg_add_data_size); + struct incfs_df_signature *signature = + kzalloc(sizeof(*signature), GFP_NOFS); + void *buf = 0; + ssize_t read; + + if (!df || !df->df_backing_file_context || + !df->df_backing_file_context->bc_file) { + error = -ENOENT; + goto out; + } - if (!df) - return -ENOENT; + signature->hash_offset = le64_to_cpu(sg->sg_hash_tree_offset); + signature->hash_size = le32_to_cpu(sg->sg_hash_tree_size); + signature->sig_offset = le64_to_cpu(sg->sg_sig_offset); + signature->sig_size = le32_to_cpu(sg->sg_sig_size); - signature = kzalloc(sizeof(*signature), GFP_NOFS); - if (!signature) { + buf = kzalloc(signature->sig_size, GFP_NOFS); + if (!buf) { error = -ENOMEM; goto out; } - signature->add_data_offset = add_data_off; - signature->add_data_size = add_data_size; - signature->sig_offset = sig_off; - signature->sig_size = sig_size; - signature->mtree_offset = base_tree_off; - signature->mtree_size = tree_size; + read = incfs_kread(df->df_backing_file_context->bc_file, buf, + signature->sig_size, signature->sig_offset); + if (read < 0) { + error = read; + goto out; + } - hash_tree = incfs_alloc_mtree(sg->sg_hash_alg, df->df_block_count, - range(sg->sg_root_hash, sizeof(sg->sg_root_hash))); + if (read != signature->sig_size) { + error = -EINVAL; + goto out; + } + + hash_tree = incfs_alloc_mtree(range(buf, signature->sig_size), + df->df_block_count); if (IS_ERR(hash_tree)) { error = PTR_ERR(hash_tree); hash_tree = NULL; goto out; } - if (hash_tree->hash_tree_area_size != tree_size) { + if (hash_tree->hash_tree_area_size != signature->hash_size) { error = -EINVAL; goto out; } - if (tree_size > 0 && handler->md_record_offset <= base_tree_off) { + if (signature->hash_size > 0 && + handler->md_record_offset <= signature->hash_offset) { error = -EINVAL; goto out; } - if (handler->md_record_offset <= signature->add_data_offset || - handler->md_record_offset <= signature->sig_offset) { + if (handler->md_record_offset <= signature->sig_offset) { error = -EINVAL; goto out; } df->df_hash_tree = hash_tree; + hash_tree = NULL; df->df_signature = signature; + signature = NULL; out: - if (error) { - incfs_free_mtree(hash_tree); - kfree(signature); - } + incfs_free_mtree(hash_tree); + kfree(signature); + kfree(buf); return error; } diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 5ce1966b4fc5..01045403026a 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -215,10 +215,7 @@ struct data_file { struct mtree *df_hash_tree; - struct ondisk_signature *df_signature; - - /* True, if file signature has already been validated. */ - bool df_signature_validated; + struct incfs_df_signature *df_signature; }; struct dir_file { diff --git a/fs/incfs/format.c b/fs/incfs/format.c index db71f527cf36..8c8213ee325d 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -299,9 +299,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, } int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, - u8 hash_alg, u32 tree_size, - struct mem_range root_hash, struct mem_range add_data, - struct mem_range sig) + struct mem_range sig, u32 tree_size) { struct incfs_file_signature sg = {}; int result = 0; @@ -311,8 +309,6 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, if (!bfc) return -EFAULT; - if (root_hash.len > sizeof(sg.sg_root_hash)) - return -E2BIG; LOCK_REQUIRED(bfc->bc_mutex); @@ -321,7 +317,6 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); sg.sg_header.h_next_md_offset = cpu_to_le64(0); - sg.sg_hash_alg = hash_alg; if (sig.data != NULL && sig.len > 0) { loff_t pos = incfs_get_end_offset(bfc->bc_file); @@ -333,20 +328,8 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, goto err; } - if (add_data.len > 0) { - loff_t pos = incfs_get_end_offset(bfc->bc_file); - - sg.sg_add_data_size = cpu_to_le32(add_data.len); - sg.sg_add_data_offset = cpu_to_le64(pos); - - result = write_to_bf(bfc, add_data.data, - add_data.len, pos, false); - if (result) - goto err; - } - tree_area_pos = incfs_get_end_offset(bfc->bc_file); - if (hash_alg && tree_size > 0) { + if (tree_size > 0) { if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { /* * If hash tree is big enough, it makes sense to @@ -369,7 +352,6 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_hash_tree_size = cpu_to_le32(tree_size); sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); } - memcpy(sg.sg_root_hash, root_hash.data, root_hash.len); /* Write a hash tree metadata record pointing to the hash tree above. */ result = append_md_to_backing_file(bfc, &sg.sg_header); diff --git a/fs/incfs/format.h b/fs/incfs/format.h index a86881482e19..55e6938b30d6 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -217,27 +217,27 @@ struct incfs_file_attr { __le32 fa_crc; } __packed; -/* Metadata record for file attribute. Type = INCFS_MD_SIGNATURE */ +/* Metadata record for file signature. Type = INCFS_MD_SIGNATURE */ struct incfs_file_signature { struct incfs_md_header sg_header; - __u8 sg_hash_alg; /* Value from incfs_hash_tree_algorithm */ + __le32 sg_sig_size; /* The size of the signature. */ + + __le64 sg_sig_offset; /* Signature's offset in the backing file */ __le32 sg_hash_tree_size; /* The size of the hash tree. */ __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ - - __u8 sg_root_hash[INCFS_MAX_HASH_SIZE]; - - __le32 sg_sig_size; /* The size of the pkcs7 signature. */ - - __le64 sg_sig_offset; /* pkcs7 signature's offset in the backing file */ - - __le32 sg_add_data_size; /* The size of the additional data. */ - - __le64 sg_add_data_offset; /* Additional data's offset */ } __packed; +/* In memory version of above */ +struct incfs_df_signature { + u32 sig_size; + u64 sig_offset; + u32 hash_size; + u64 hash_offset; +}; + /* State of the backing file. */ struct backing_file_context { /* Protects writes to bc_file */ @@ -253,23 +253,6 @@ struct backing_file_context { loff_t bc_last_md_record_offset; }; - -/* Backing file locations of things required for signature validation. */ -struct ondisk_signature { - - loff_t add_data_offset; /* Additional data's offset */ - - loff_t sig_offset; /* pkcs7 signature's offset in the backing file */ - - loff_t mtree_offset; /* Backing file offset of the hash tree. */ - - u32 add_data_size; /* The size of the additional data. */ - - u32 sig_size; /* The size of the pkcs7 signature. */ - - u32 mtree_size; /* The size of the hash tree. */ -}; - struct metadata_handler { loff_t md_record_offset; loff_t md_prev_record_offset; @@ -319,9 +302,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, struct mem_range value, struct incfs_file_attr *attr); int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, - u8 hash_alg, u32 tree_size, - struct mem_range root_hash, struct mem_range add_data, - struct mem_range sig); + struct mem_range sig, u32 tree_size); int incfs_make_empty_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index 1d00dda109e3..f8af9a83ea8a 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -10,70 +10,6 @@ #include "integrity.h" -int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, - struct mem_range root_hash, struct mem_range add_data) -{ - struct pkcs7_message *pkcs7 = NULL; - const void *data = NULL; - size_t data_len = 0; - const char *p; - int err; - - pkcs7 = pkcs7_parse_message(pkcs7_blob.data, pkcs7_blob.len); - if (IS_ERR(pkcs7)) { - pr_debug("PKCS#7 parsing error. ptr=%p size=%ld err=%ld\n", - pkcs7_blob.data, pkcs7_blob.len, -PTR_ERR(pkcs7)); - return PTR_ERR(pkcs7); - } - - err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL); - if (err || data_len == 0 || data == NULL) { - pr_debug("PKCS#7 message does not contain data\n"); - err = -EBADMSG; - goto out; - } - - if (root_hash.len == 0) { - pr_debug("Root hash is empty.\n"); - err = -EBADMSG; - goto out; - } - - if (data_len != root_hash.len + add_data.len) { - pr_debug("PKCS#7 data size doesn't match arguments.\n"); - err = -EKEYREJECTED; - goto out; - } - - p = data; - if (memcmp(p, root_hash.data, root_hash.len) != 0) { - pr_debug("Root hash mismatch.\n"); - err = -EKEYREJECTED; - goto out; - } - p += root_hash.len; - if (memcmp(p, add_data.data, add_data.len) != 0) { - pr_debug("Additional data mismatch.\n"); - err = -EKEYREJECTED; - goto out; - } - - err = pkcs7_verify(pkcs7, VERIFYING_UNSPECIFIED_SIGNATURE); - if (err) - pr_debug("PKCS#7 signature verification error: %d\n", -err); - - /* - * RSA signature verification sometimes returns unexpected error codes - * when signature doesn't match. - */ - if (err == -ERANGE || err == -EINVAL) - err = -EBADMSG; - -out: - pkcs7_free_message(pkcs7); - return err; -} - struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) { static struct incfs_hash_alg sha256 = { @@ -113,11 +49,90 @@ struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) return result; } +struct signature_info { + u32 version; + enum incfs_hash_tree_algorithm hash_algorithm; + u8 log2_blocksize; + struct mem_range salt; + struct mem_range root_hash; +}; -struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, - int data_block_count, - struct mem_range root_hash) +static u32 read_u32(u8 **p, u8 *top, u32 *result) { + if (*p + sizeof(u32) > top) + return false; + + *result = le32_to_cpu(*(u32 *)*p); + *p += sizeof(u32); + return true; +} + +static bool read_u8(u8 **p, u8 *top, u8 *result) +{ + if (*p + sizeof(u8) > top) + return false; + + *result = *(u8 *)*p; + *p += sizeof(u8); + return true; +} + +static bool read_mem_range(u8 **p, u8 *top, struct mem_range *range) +{ + u32 len; + + if (!read_u32(p, top, &len) || *p + len > top) + return false; + + range->len = len; + range->data = *p; + *p += len; + return true; +} + +static int incfs_parse_signature(struct mem_range signature, + struct signature_info *si) +{ + u8 *p = signature.data; + u8 *top = signature.data + signature.len; + u32 hash_section_size; + + if (signature.len > INCFS_MAX_SIGNATURE_SIZE) + return -EINVAL; + + if (!read_u32(&p, top, &si->version) || + si->version != INCFS_SIGNATURE_VERSION) + return -EINVAL; + + if (!read_u32(&p, top, &hash_section_size) || + p + hash_section_size > top) + return -EINVAL; + top = p + hash_section_size; + + if (!read_u32(&p, top, &si->hash_algorithm) || + si->hash_algorithm != INCFS_HASH_TREE_SHA256) + return -EINVAL; + + if (!read_u8(&p, top, &si->log2_blocksize) || si->log2_blocksize != 12) + return -EINVAL; + + if (!read_mem_range(&p, top, &si->salt)) + return -EINVAL; + + if (!read_mem_range(&p, top, &si->root_hash)) + return -EINVAL; + + if (p != top) + return -EINVAL; + + return 0; +} + +struct mtree *incfs_alloc_mtree(struct mem_range signature, + int data_block_count) +{ + int error; + struct signature_info si; struct mtree *result = NULL; struct incfs_hash_alg *hash_alg = NULL; int hash_per_block; @@ -129,11 +144,15 @@ struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, if (data_block_count <= 0) return ERR_PTR(-EINVAL); - hash_alg = incfs_get_hash_alg(id); + error = incfs_parse_signature(signature, &si); + if (error) + return ERR_PTR(error); + + hash_alg = incfs_get_hash_alg(si.hash_algorithm); if (IS_ERR(hash_alg)) return ERR_PTR(PTR_ERR(hash_alg)); - if (root_hash.len < hash_alg->digest_size) + if (si.root_hash.len < hash_alg->digest_size) return ERR_PTR(-EINVAL); result = kzalloc(sizeof(*result), GFP_NOFS); @@ -173,7 +192,7 @@ struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, } /* Root hash is stored separately from the rest of the tree. */ - memcpy(result->root_hash, root_hash.data, hash_alg->digest_size); + memcpy(result->root_hash, si.root_hash.data, hash_alg->digest_size); return result; err: @@ -215,13 +234,3 @@ int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, return crypto_shash_digest(desc, data.data, data.len, digest.data); } -void incfs_free_signature_info(struct signature_info *si) -{ - if (!si) - return; - kfree(si->root_hash.data); - kfree(si->additional_data.data); - kfree(si->signature.data); - kfree(si); -} - diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h index da1c38486b2f..cf79b64da736 100644 --- a/fs/incfs/integrity.h +++ b/fs/incfs/integrity.h @@ -38,21 +38,10 @@ struct mtree { int depth; }; -struct signature_info { - struct mem_range root_hash; - - struct mem_range additional_data; - - struct mem_range signature; - - enum incfs_hash_tree_algorithm hash_alg; -}; - struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); -struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, - int data_block_count, - struct mem_range root_hash); +struct mtree *incfs_alloc_mtree(struct mem_range signature, + int data_block_count); void incfs_free_mtree(struct mtree *tree); @@ -64,9 +53,4 @@ size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, struct mem_range digest); -int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, - struct mem_range root_hash, struct mem_range add_data); - -void incfs_free_signature_info(struct signature_info *si); - #endif /* _INCFS_INTEGRITY_H */ diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 04f292e3377b..aae918963a76 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -840,104 +840,39 @@ static char *file_id_to_str(incfs_uuid_t id) return result; } -static struct signature_info *incfs_copy_signature_info_from_user( - struct incfs_file_signature_info __user *original) +static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, + u64 size) { - struct incfs_file_signature_info usr_si; - struct signature_info *result; - int error; + u8 *result; if (!original) - return NULL; + return range(NULL, 0); - if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) - return ERR_PTR(-EFAULT); + if (size > INCFS_MAX_SIGNATURE_SIZE) + return range(ERR_PTR(-EFAULT), 0); - result = kzalloc(sizeof(*result), GFP_NOFS); + result = kzalloc(size, GFP_NOFS); if (!result) - return ERR_PTR(-ENOMEM); - - result->hash_alg = usr_si.hash_tree_alg; - - if (result->hash_alg) { - void *p = kzalloc(INCFS_MAX_HASH_SIZE, GFP_NOFS); - - if (!p) { - error = -ENOMEM; - goto err; - } - - /* TODO this sets the root_hash length to MAX_HASH_SIZE not - * the actual size. Fix, then set INCFS_MAX_HASH_SIZE back - * to 64 - */ - result->root_hash = range(p, INCFS_MAX_HASH_SIZE); - if (copy_from_user(p, u64_to_user_ptr(usr_si.root_hash), - result->root_hash.len) > 0) { - error = -EFAULT; - goto err; - } - } - - if (usr_si.additional_data_size > INCFS_MAX_FILE_ATTR_SIZE) { - error = -E2BIG; - goto err; - } - - if (usr_si.additional_data && usr_si.additional_data_size) { - void *p = kzalloc(usr_si.additional_data_size, GFP_NOFS); + return range(ERR_PTR(-ENOMEM), 0); - if (!p) { - error = -ENOMEM; - goto err; - } - result->additional_data = range(p, - usr_si.additional_data_size); - if (copy_from_user(p, u64_to_user_ptr(usr_si.additional_data), - result->additional_data.len) > 0) { - error = -EFAULT; - goto err; - } + if (copy_from_user(result, original, size)) { + kfree(result); + return range(ERR_PTR(-EFAULT), 0); } - if (usr_si.signature_size > INCFS_MAX_SIGNATURE_SIZE) { - error = -E2BIG; - goto err; - } - - if (usr_si.signature && usr_si.signature_size) { - void *p = kzalloc(usr_si.signature_size, GFP_NOFS); - - if (!p) { - error = -ENOMEM; - goto err; - } - result->signature = range(p, usr_si.signature_size); - if (copy_from_user(p, u64_to_user_ptr(usr_si.signature), - result->signature.len) > 0) { - error = -EFAULT; - goto err; - } - } - - return result; - -err: - incfs_free_signature_info(result); - return ERR_PTR(-error); + return range(result, size); } static int init_new_file(struct mount_info *mi, struct dentry *dentry, - incfs_uuid_t *uuid, u64 size, struct mem_range attr, - struct incfs_file_signature_info __user *fsi) + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + u8 __user *user_signature_info, u64 signature_size) { struct path path = {}; struct file *new_file; int error = 0; struct backing_file_context *bfc = NULL; u32 block_count; - struct mem_range mem_range = {NULL}; - struct signature_info *si = NULL; + struct mem_range raw_signature = { NULL }; struct mtree *hash_tree = NULL; if (!mi || !dentry || !uuid) @@ -987,44 +922,27 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, goto out; } - if (fsi) { - si = incfs_copy_signature_info_from_user(fsi); + if (user_signature_info) { + raw_signature = incfs_copy_signature_info_from_user( + user_signature_info, signature_size); - if (IS_ERR(si)) { - error = PTR_ERR(si); - si = NULL; + if (IS_ERR(raw_signature.data)) { + error = PTR_ERR(raw_signature.data); + raw_signature.data = NULL; goto out; } - if (si->hash_alg) { - hash_tree = incfs_alloc_mtree(si->hash_alg, block_count, - si->root_hash); - if (IS_ERR(hash_tree)) { - error = PTR_ERR(hash_tree); - hash_tree = NULL; - goto out; - } - - /* TODO This code seems wrong when len is zero - we - * should error out?? - */ - if (si->signature.len > 0) - error = incfs_validate_pkcs7_signature( - si->signature, - si->root_hash, - si->additional_data); - if (error) - goto out; - - error = incfs_write_signature_to_backing_file(bfc, - si->hash_alg, - hash_tree->hash_tree_area_size, - si->root_hash, si->additional_data, - si->signature); - - if (error) - goto out; + hash_tree = incfs_alloc_mtree(raw_signature, block_count); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; } + + error = incfs_write_signature_to_backing_file( + bfc, raw_signature, hash_tree->hash_tree_area_size); + if (error) + goto out; } out: @@ -1033,8 +951,7 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, incfs_free_bfc(bfc); } incfs_free_mtree(hash_tree); - incfs_free_signature_info(si); - kfree(mem_range.data); + kfree(raw_signature.data); if (error) pr_debug("incfs: %s error: %d\n", __func__, error); @@ -1292,7 +1209,7 @@ static long ioctl_create_file(struct mount_info *mi, goto delete_index_file; } - /* Save the file's attrubute as an xattr */ + /* Save the file's attribute as an xattr */ if (args.file_attr_len && args.file_attr) { if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { error = -E2BIG; @@ -1323,9 +1240,9 @@ static long ioctl_create_file(struct mount_info *mi, /* Initializing a newly created file. */ error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, - range(attr_value, args.file_attr_len), - (struct incfs_file_signature_info __user *) - args.signature_info); + range(attr_value, args.file_attr_len), + (u8 __user *)args.signature_info, + args.signature_size); if (error) goto delete_index_file; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 81947ded482e..2efc53f591ef 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -35,6 +35,8 @@ #define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") #define INCFS_MAX_SIGNATURE_SIZE 8096 +#define INCFS_SIGNATURE_VERSION 2 +#define INCFS_SIGNATURE_SECTIONS 2 #define INCFS_IOCTL_BASE_CODE 'g' @@ -142,48 +144,6 @@ enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_SHA256 = 1 }; -struct incfs_file_signature_info { - /* - * A pointer to file's root hash (if determined != 0) - * Actual hash size determined by hash_tree_alg. - * Size of the buffer should be at least INCFS_MAX_HASH_SIZE - * - * Equivalent to: u8 *root_hash; - */ - __aligned_u64 root_hash; - - /* - * A pointer to additional data that was attached to the root hash - * before signing. - * - * Equivalent to: u8 *additional_data; - */ - __aligned_u64 additional_data; - - /* Size of additional data. */ - __u32 additional_data_size; - - /* Reserved - must be 0 */ - __u32 reserved1; - - /* - * A pointer to pkcs7 signature DER blob. - * - * Equivalent to: u8 *signature; - */ - __aligned_u64 signature; - - - /* Size of pkcs7 signature DER blob */ - __u32 signature_size; - - /* Reserved - must be 0 */ - __u32 reserved2; - - /* Value from incfs_hash_tree_algorithm */ - __u8 hash_tree_alg; -}; - /* * Create a new file or directory. */ @@ -240,11 +200,30 @@ struct incfs_new_file_args { /* Reserved - must be 0 */ __u32 reserved4; - /* struct incfs_file_signature_info *signature_info; */ + /* + * Points to an APK V4 Signature data blob + * Signature must have two sections + * Format is: + * u32 version + * u32 size_of_hash_info_section + * u8 hash_info_section[] + * u32 size_of_signing_info_section + * u8 signing_info_section[] + * + * Note that incfs does not care about what is in signing_info_section + * + * hash_info_section has following format: + * u32 hash_algorithm; // Must be SHA256 == 1 + * u8 log2_blocksize; // Must be 12 for 4096 byte blocks + * u32 salt_size; + * u8 salt[]; + * u32 hash_size; + * u8 root_hash[]; + */ __aligned_u64 signature_info; - /* Reserved - must be 0 */ - __aligned_u64 reserved5; + /* Size of signature_info */ + __aligned_u64 signature_size; /* Reserved - must be 0 */ __aligned_u64 reserved6; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 22f6c7fca7de..1cd1226f4e44 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -25,8 +25,6 @@ #include "lz4.h" #include "utils.h" -#define __packed __attribute__((__packed__)) - #define TEST_FAILURE 1 #define TEST_SUCCESS 0 #define INCFS_MAX_MTREE_LEVELS 8 @@ -69,101 +67,6 @@ struct linux_dirent64 { char d_name[0]; } __packed; -/* - * The certificate below and the private key were created by calling: - * openssl req -x509 -newkey rsa:4096 -keyout private.key -out cert.crt - * -days 1000 -sha256 -nodes -outform PEM -subj - * "/C=US/ST=WA/L=Kirkland/O=Example/OU=Org/CN=www.example.com" - */ -char x509_cert[] = -"-----BEGIN CERTIFICATE-----\n" -"MIIFvzCCA6egAwIBAgIUXpwqelEljm6BBllRQGHLrls2MYgwDQYJKoZIhvcNAQEL\n" -"BQAwbzELMAkGA1UEBhMCVVMxEzARBgNVBAgMCldhc2hpbmd0b24xETAPBgNVBAcM\n" -"CEtpcmtsYW5kMRAwDgYDVQQKDAdFeGFtcGxlMQwwCgYDVQQLDANPcmcxGDAWBgNV\n" -"BAMMD3d3dy5leGFtcGxlLmNvbTAeFw0xOTA4MDgyMzA3MDZaFw0yMjA1MDQyMzA3\n" -"MDZaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApXYXNoaW5ndG9uMREwDwYDVQQH\n" -"DAhLaXJrbGFuZDEQMA4GA1UECgwHRXhhbXBsZTEMMAoGA1UECwwDT3JnMRgwFgYD\n" -"VQQDDA93d3cuZXhhbXBsZS5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK\n" -"AoICAQC1LuFW/lDV/GflqFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43\n" -"NeeJtqUoVxSLS9wHURjSjD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtA\n" -"uYcY4P9GHQEXYUX+ue82A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt\n" -"4/NXS/Dn+S0/mJlxw34IKfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RAD\n" -"qGewNNCab3ClJDP7/M32BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolV\n" -"gSL1HM2jin5bi4bpFMreY0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBT\n" -"qjjFb3oiSMugJzY+MhISM754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3\n" -"UgC6SyVmZxG2o+AO6m8TRTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiV\n" -"XDmotNb2myXNYHHTjRYNxkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61S\n" -"oxKWi+LGa7B4NaCMjz1LnaOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAb\n" -"uxkq9EYUDg+w9broltiBf4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABo1MwUTAd\n" -"BgNVHQ4EFgQUo6JN3gY2yGbzOTNj8Al7hNB3rw0wHwYDVR0jBBgwFoAUo6JN3gY2\n" -"yGbzOTNj8Al7hNB3rw0wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n" -"AgEAQb3pJqOzM4whfNVdpEOswd1EApcWNM1ps9iTlEEjDoRv9F7F1PW0uXCIpk3B\n" -"j5JgCmIxAcPnzj42rduRSx421hHMZhbAIWI/JL4ZSF64qlG0YrmJDXlJgSMoyst5\n" -"biUqeWgO7Js5udPt3zhkeA62z3hGM6dE5B3k7gHTaKKtK17+UeR9imZKsOK8GBnM\n" -"rxMPI6XghxxAK2OQ/r09DHDiyf/GxgOE46oknfXfMPx3HaSvDKrZUTZ+UvVbM5c2\n" -"5eXOgH5UO/e4llLknJK7CoP/R6G7pV44iT4t4t9FMnvCYvavAHwfR+6z5vTF3o8a\n" -"wd80fC8z1vfLsIPLROdzBl9rGCvv536fPiEA677CM1AZkjfT0a9DVzrE1NDvuCUF\n" -"0KgEdiNwux+hO6dbTyiS38yPT6TbpoWJptJmFhFkC4hGvUgoX/TI0covSyf74VRH\n" -"k3BHojOBMYiX1K66xoN7fhlGK8cith3L0XXPB8CgSEUPWURvm8RCaGuX2T3FZomF\n" -"BCnNpN+WNnN3Yf4OkjtuvtxxktUU7pfVLsUxrdpo/ph4rWm6U83VT/Zlq92aF4vW\n" -"QJ+7uraQFip7e+Gy9g3UJINm3B7b1C4ch/Z/upCZESOI/23sVGzkfTgOrS+23i6/\n" -"Vi9YW75zySC2FCa1AWMS1NmS5qfDSycJUgD6YvOUg0C54ZI=\n" -"-----END CERTIFICATE-----"; - -char private_key[] = -"-----BEGIN PRIVATE KEY-----\n" -"MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1LuFW/lDV/Gfl\n" -"qFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43NeeJtqUoVxSLS9wHURjS\n" -"jD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtAuYcY4P9GHQEXYUX+ue82\n" -"A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt4/NXS/Dn+S0/mJlxw34I\n" -"KfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RADqGewNNCab3ClJDP7/M32\n" -"BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolVgSL1HM2jin5bi4bpFMre\n" -"Y0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBTqjjFb3oiSMugJzY+MhIS\n" -"M754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3UgC6SyVmZxG2o+AO6m8T\n" -"RTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiVXDmotNb2myXNYHHTjRYN\n" -"xkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61SoxKWi+LGa7B4NaCMjz1L\n" -"naOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAbuxkq9EYUDg+w9broltiB\n" -"f4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABAoICAQCMKul/0J2e/ncub6t2t4dr\n" -"PnTrfCT6xKqPqciny4Ee6hr9So1jR2gvink380bd/mQFMmEdZqGhM3cdpAzLf82f\n" -"hu7BSNxsYIF0er0PB4MZFMJ4sMaXC+zp5/TJnP5MG/zBND0c5k8tQpEyWy8O28Jj\n" -"FKW/0F5P90Q0ncP20EJUS50tXgniOMsU2Prtw/UE6yZDgD0mPxsurMu66ycXSFwM\n" -"WqyfqEeBk7lw/AjR6Sft71W31lTbl+DclG0MN2OIKUPcxiwCRmDFKI36MDgERk1x\n" -"sMPfdrWRLj2ryDFTUuLAWBTOVEGWS0RdRsWWVaJCuHbKd6FLl0TW2xQbOfWDTjYC\n" -"Ps31ejh163qdbk7OGOZIbd83fP3jsyL+4eNzhUpeXMKhfG58mFIv4yhdZIUOpuL6\n" -"aqnoU9z9wEsJKj/SrKr3nw6tuTnmbXgNjun9LfTFmqqDRBYd0Okiprw6jHNM1jgA\n" -"GG0kC/K7r89jKymVDABwGMFCS33ynR1Tb6zG+cqgNMPw19Fy3uQuW21CjqSzCOyP\n" -"aEVCEUZeP+ofql5+7ZKi6Dj+EdTfeKt2ihgheHZZoaYSINb8tsnKbdJhwBfW9PFT\n" -"aT/hu3bnO2FPC8H2NGOqxOEeel9ALU4SFu1pOknEhiL3/mNfOQ+KgrSRDtNRlcL0\n" -"cto05J90u0cmqwWKlshfaQKCAQEA5dcklxs4ezyzt28NcsiyS02oZ+9TkQp6pCXV\n" -"kx7AwhivAmVTlJ+c6BegA5EPd7A1gknM3+EKzGpoBOqmlF45G57phVIAphAp4oCH\n" -"UOVtIQgM8p4EU2gtX+uNOopdYlpBQnWimXaHA2sOD9/yTbZ03j/McRH6D15+iCld\n" -"3880GHdZaYYbQmHoSDg39LRRO1bdS3WC0oKBD2gPi3K0b9RaZSwKzuVrmlvrLURj\n" -"WMZfmkGl4BsITfuoTxbWFVncG3Kb9eYkYUFZy4M2G/s849PS/HjrN7BvgpanjtVp\n" -"1/39APQfAYfUuBPbKYnb6F8dE0pb5cVd4uMZklAeTb3bXjOO9QKCAQEAyc4CxWXr\n" -"bG6Do5dGpWudQ7ucq00MR0T3MHQIu5XTn6BsPHAJ9ZgrQw9C24PXm2VEjjsrMs5T\n" -"rHNF9oeO39s25Za1iyJ+893icqA3h3ivCUOOoVE54BkuJK6REhkXPD5G1ubmxeBz\n" -"MKNehlpd/eSbJJArkzKFZ8sBtLt8i9VFhRnXSpDAbiMpCbjW+bem9MWdLmkenSnu\n" -"OUbnqYcJhFBCvOT7ZCHFCDNUNPfHcaReSY2EYjw0ZqtqAZD0Q+DL+RkLz7l1+/bF\n" -"eEwNjmjFTcwRyawqf38D4miU0H6ca16FkeSlbmM5p3HdwZK2HVYYz3FSwhox6Ebd\n" -"n6in42qfL4Ug6wKCAQAh9IDRWhIkErmyNdPUy1WbzmM8x5ye5t9rdLNywq5TfnYM\n" -"co/AezwhBax8GmgglIWzM9fykzqXLHklkMz/SlRBgl6ZdZ3m6qhlb/uNtfdDU/8l\n" -"sLaO4+sgKpp4tYxKRW8ytFJLPbmAhcZUDg+r73KgiuhXJAK/VoR29TWLJP9bRfaN\n" -"omRQkEpSsQuDOUhu7cxPo5KqKuGKNyNkxJNnmgWowLLwEfCtozrBO0M6EER7c4tf\n" -"6l51tuIMnSEPknD0FSB5WYCyZYcwi7fotlsuhVK8PdjyJzyyHDOw5FJ4uGsyQt55\n" -"yWlhsH1GS7mTQMn42Zlt/pR6OnbCqNdxQMUxy4gpAoIBAFvMbs5E0pb8nr0n72cI\n" -"UP2itl3mKpOw95D+94n9WcrfOt0zShSCKAvVQWCB1O5HXqwklj4CRWXI+iZu+7sx\n" -"CQPfTq3//ygH4x6paxkg+N6J8LPJMz6Rtb/R+QP2je9FlQvk9U1GEKArcLBFI0R/\n" -"XWOAgZHwBWd1nU0NjFY/qeQmIR02Q5LWQ7C8eG4X8MafriSShO6RSGCdtHwVhWq+\n" -"59ztfL3L7skQMFn37K3xS0LCMVpOcLfTeeFEgxjthVvG3OydPOJlGubiEbiaSEZf\n" -"cif/PUXKDYZMdIVzUsw0ryXykJ5qXKuizHFlv5oQtDCJKFBLgjBbLC2YluaIdekz\n" -"8gkCggEBAJWxS7EuB/qL7fOz0o3HRy0plR3qbwZ0pLoCz0Ii7WxraBS1yQwmxif1\n" -"Rgv89GyFqg1yQl3CSrMiw7oC9WxxxuiEZDO18c4KO3NTv9K4itN9OPQVBTHmEhod\n" -"KWcyP4/W/Sfuae77PyclSqUsAARRrKYn2fpLTS5ibaU0QZgHmdPgYDUrPr+6PHKK\n" -"ZfQKU2uBfuo6zoMbMmFi3UYG49j9rv4d6v+44vS1MPHV9JK/LD8YfBhgx8Pg/u6D\n" -"nUgipS48pkGjJr2u2Vu7Mx70vqz0Yf2neyyDbdLtkYauC4w7YKPTD0yzDJyGuAeB\n" -"GyPbW1yZa5vE302a1Cr0Cd7RC4AFAAw=\n" -"-----END PRIVATE KEY-----"; - struct test_files_set get_test_files_set(void) { static struct test_file files[] = { @@ -290,7 +193,7 @@ char *bin2hex(char *dst, const void *src, size_t count) return dst; } -static char *get_index_filename(char *mnt_dir, incfs_uuid_t id) +static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) { char path[FILENAME_MAX]; char str_id[1 + 2 * sizeof(id)]; @@ -722,8 +625,6 @@ static int build_mtree(struct test_file *file) int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; int levels_count = 0; - char data_to_sign[256] = {}; - int sig_data_size; int i, level; if (file->size == 0) @@ -797,19 +698,6 @@ static int build_mtree(struct test_file *file) sha256(file->mtree[0].data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); - /* Calculating digital signature */ - snprintf(file->sig.add_data, sizeof(file->sig.add_data), "%ld", - file->size); - memcpy(data_to_sign, file->root_hash, SHA256_DIGEST_SIZE); - memcpy(data_to_sign + SHA256_DIGEST_SIZE, file->sig.add_data, - strlen(file->sig.add_data)); - sig_data_size = SHA256_DIGEST_SIZE + strlen(file->sig.add_data); - if (!sign_pkcs7(data_to_sign, sig_data_size, private_key, x509_cert, - &file->sig.data, &file->sig.size)) { - ksft_print_msg("Signing failed.\n"); - return -EINVAL; - } - return 0; } @@ -1873,162 +1761,6 @@ static int multiple_providers_test(char *mount_dir) return TEST_FAILURE; } -static int signature_test(char *mount_dir) -{ - struct test_files_set test = get_test_files_set(); - const int file_num = test.files_count; - int i = 0; - unsigned char sig_buf[INCFS_MAX_SIGNATURE_SIZE]; - char *backing_dir; - int cmd_fd = -1; - - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - /* Mount FS and release the backing file. (10s wait time) */ - if (mount_fs(mount_dir, backing_dir, 10000) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - /* Write hashes and data. */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - int res; - - build_mtree(file); - - res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, file->root_hash, - file->sig.data, file->sig.size, file->sig.add_data); - - if (res) { - ksft_print_msg("Emit failed for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } - - if (emit_test_file_data(mount_dir, file)) - goto failure; - - res = load_hash_tree(mount_dir, file); - if (res) { - ksft_print_msg("Can't load hashes for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } - } - - /* Validate data */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - int sig_len; - char *path; - int fd; - - if (validate_test_file_content(mount_dir, file) < 0) - goto failure; - - path = concat_file_name(mount_dir, file->name); - fd = open(path, O_RDWR); - free(path); - if (fd < 0) { - print_error("Can't open file"); - goto failure; - } - - sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); - - if (close(fd)) { - print_error("Can't close file"); - goto failure; - } - - if (sig_len < 0) { - ksft_print_msg("Can't load signature %s. error: %s\n", - file->name, strerror(-sig_len)); - goto failure; - } - - if (sig_len != file->sig.size || - memcmp(sig_buf, file->sig.data, sig_len)) { - ksft_print_msg("Signature mismatch %s.\n", - file->name); - goto failure; - } - } - - /* Unmount and mount again, to make sure the signature is persistent. */ - close(cmd_fd); - cmd_fd = -1; - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - if (mount_fs(mount_dir, backing_dir, 50) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - int sig_len; - char *path; - int fd; - - if (validate_test_file_content(mount_dir, file) < 0) - goto failure; - - path = concat_file_name(mount_dir, file->name); - fd = open(path, O_RDWR); - free(path); - if (fd < 0) { - print_error("Can't open file"); - goto failure; - } - - sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); - - if (close(fd)) { - print_error("Can't close file"); - goto failure; - } - - if (sig_len < 0) { - ksft_print_msg("Can't load signature %s. error: %s\n", - file->name, strerror(-sig_len)); - goto failure; - } - if (sig_len != file->sig.size || - memcmp(sig_buf, file->sig.data, sig_len)) { - ksft_print_msg("Signature mismatch %s.\n", - file->name); - goto failure; - } - } - - /* Final unmount */ - close(cmd_fd); - cmd_fd = -1; - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - return TEST_SUCCESS; - -failure: - close(cmd_fd); - free(backing_dir); - umount(mount_dir); - return TEST_FAILURE; -} - static int hash_tree_test(char *mount_dir) { char *backing_dir; @@ -2057,8 +1789,8 @@ static int hash_tree_test(char *mount_dir) build_mtree(file); res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, file->root_hash, - file->sig.data, file->sig.size, file->sig.add_data); + file->size, file->root_hash, + file->sig.add_data); if (i == corrupted_file_idx) { /* Corrupt third blocks hash */ @@ -2383,7 +2115,6 @@ int main(int argc, char *argv[]) MAKE_TEST(work_after_remount_test), MAKE_TEST(child_procs_waiting_for_data_test), MAKE_TEST(multiple_providers_test), - MAKE_TEST(signature_test), MAKE_TEST(hash_tree_test), MAKE_TEST(read_log_test), }; diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index 08b8452ad0bc..3a72fa5d5e9a 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -23,7 +23,8 @@ #include "utils.h" -int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) +int mount_fs(const char *mount_dir, const char *backing_dir, + int read_timeout_ms) { static const char fs_name[] = INCFS_NAME; char mount_options[512]; @@ -39,7 +40,8 @@ int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) return result; } -int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) +int mount_fs_opt(const char *mount_dir, const char *backing_dir, + const char *opt) { static const char fs_name[] = INCFS_NAME; int result; @@ -50,179 +52,94 @@ int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) return result; } -int unlink_node(int fd, int parent_ino, char *filename) +struct hash_section { + uint32_t algorithm; + uint8_t log2_blocksize; + uint32_t salt_size; + /* no salt */ + uint32_t hash_size; + uint8_t hash[SHA256_DIGEST_SIZE]; +} __packed; + +struct signature_blob { + uint32_t version; + uint32_t hash_section_size; + struct hash_section hash_section; + uint32_t signing_section_size; + uint8_t signing_section[]; +} __packed; + +size_t format_signature(void **buf, const char *root_hash, const char *add_data) { - return 0; -} - - -static EVP_PKEY *deserialize_private_key(const char *pem_key) -{ - BIO *bio = NULL; - EVP_PKEY *pkey = NULL; - int len = strlen(pem_key); - - bio = BIO_new_mem_buf(pem_key, len); - if (!bio) - return NULL; - - pkey = PEM_read_bio_PrivateKey(bio, NULL, NULL, NULL); - BIO_free(bio); - return pkey; -} - -static X509 *deserialize_cert(const char *pem_cert) -{ - BIO *bio = NULL; - X509 *cert = NULL; - int len = strlen(pem_cert); - - bio = BIO_new_mem_buf(pem_cert, len); - if (!bio) - return NULL; - - cert = PEM_read_bio_X509(bio, NULL, NULL, NULL); - BIO_free(bio); - return cert; -} - -bool sign_pkcs7(const void *data_to_sign, size_t data_size, - char *pkey_pem, char *cert_pem, - void **sig_ret, size_t *sig_size_ret) -{ - /* - * PKCS#7 signing flags: - * - * - PKCS7_BINARY signing binary data, so skip MIME translation - * - * - PKCS7_NOATTR omit extra authenticated attributes, such as - * SMIMECapabilities - * - * - PKCS7_PARTIAL PKCS7_sign() creates a handle only, then - * PKCS7_sign_add_signer() can add a signer later. - * This is necessary to change the message digest - * algorithm from the default of SHA-1. Requires - * OpenSSL 1.0.0 or later. - */ - int pkcs7_flags = PKCS7_BINARY | PKCS7_NOATTR | PKCS7_PARTIAL; - void *sig; - size_t sig_size; - BIO *bio = NULL; - PKCS7 *p7 = NULL; - EVP_PKEY *pkey = NULL; - X509 *cert = NULL; - bool ok = false; - - const EVP_MD *md = EVP_sha256(); - - pkey = deserialize_private_key(pkey_pem); - if (!pkey) { - printf("deserialize_private_key failed\n"); - goto out; - } - - cert = deserialize_cert(cert_pem); - if (!cert) { - printf("deserialize_cert failed\n"); - goto out; - } - - bio = BIO_new_mem_buf(data_to_sign, data_size); - if (!bio) - goto out; - - p7 = PKCS7_sign(NULL, NULL, NULL, bio, pkcs7_flags); - if (!p7) { - printf("failed to initialize PKCS#7 signature object\n"); - goto out; - } - - if (!PKCS7_sign_add_signer(p7, cert, pkey, md, pkcs7_flags)) { - printf("failed to add signer to PKCS#7 signature object\n"); - goto out; - } - - if (PKCS7_final(p7, bio, pkcs7_flags) != 1) { - printf("failed to finalize PKCS#7 signature\n"); - goto out; - } - - BIO_free(bio); - bio = BIO_new(BIO_s_mem()); - if (!bio) { - printf("out of memory\n"); - goto out; - } - - if (i2d_PKCS7_bio(bio, p7) != 1) { - printf("failed to DER-encode PKCS#7 signature object\n"); - goto out; - } + size_t size = sizeof(struct signature_blob) + strlen(add_data) + 1; + struct signature_blob *sb = malloc(size); + + *sb = (struct signature_blob){ + .version = INCFS_SIGNATURE_VERSION, + .hash_section_size = sizeof(struct hash_section), + .hash_section = + (struct hash_section){ + .algorithm = INCFS_HASH_TREE_SHA256, + .log2_blocksize = 12, + .salt_size = 0, + .hash_size = SHA256_DIGEST_SIZE, + }, + .signing_section_size = sizeof(uint32_t) + strlen(add_data) + 1, + }; - sig_size = BIO_get_mem_data(bio, &sig); - *sig_ret = malloc(sig_size); - memcpy(*sig_ret, sig, sig_size); - *sig_size_ret = sig_size; - ok = true; -out: - PKCS7_free(p7); - BIO_free(bio); - return ok; + memcpy(sb->hash_section.hash, root_hash, SHA256_DIGEST_SIZE); + memcpy((char *)sb->signing_section, add_data, strlen(add_data) + 1); + *buf = sb; + return size; } -int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, const char *root_hash, char *sig, size_t sig_size, - char *add_data) +int crypto_emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *root_hash, + const char *add_data) { int mode = __S_IFREG | 0555; - struct incfs_file_signature_info sig_info = { - .hash_tree_alg = root_hash - ? INCFS_HASH_TREE_SHA256 - : 0, - .root_hash = ptr_to_u64(root_hash), - .additional_data = ptr_to_u64(add_data), - .additional_data_size = strlen(add_data), - .signature = ptr_to_u64(sig), - .signature_size = sig_size, - }; + void *signature; + int error = 0; struct incfs_new_file_args args = { .size = size, .mode = mode, .file_name = ptr_to_u64(filename), .directory_path = ptr_to_u64(dir), - .signature_info = ptr_to_u64(&sig_info), .file_attr = 0, .file_attr_len = 0 }; + args.signature_size = format_signature(&signature, root_hash, add_data); + args.signature_info = ptr_to_u64(signature); + md5(filename, strlen(filename), (char *)args.file_id.bytes); - if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) - return -errno; + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) { + error = -errno; + goto out; + } *id_out = args.file_id; - return 0; -} +out: + free(signature); + return error; +} -int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, char *attr) +int emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *attr) { int mode = __S_IFREG | 0555; - struct incfs_file_signature_info sig_info = { - .hash_tree_alg = 0, - .root_hash = ptr_to_u64(NULL) - }; - struct incfs_new_file_args args = { - .size = size, - .mode = mode, - .file_name = ptr_to_u64(filename), - .directory_path = ptr_to_u64(dir), - .signature_info = ptr_to_u64(&sig_info), - .file_attr = ptr_to_u64(attr), - .file_attr_len = attr ? strlen(attr) : 0 - }; + struct incfs_new_file_args args = { .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(NULL), + .signature_size = 0, + .file_attr = ptr_to_u64(attr), + .file_attr_len = + attr ? strlen(attr) : 0 }; md5(filename, strlen(filename), (char *)args.file_id.bytes); @@ -250,7 +167,7 @@ int get_file_signature(int fd, unsigned char *buf, int buf_size) return -errno; } -loff_t get_file_size(char *name) +loff_t get_file_size(const char *name) { struct stat st; @@ -259,7 +176,7 @@ loff_t get_file_size(char *name) return -ENOENT; } -int open_commands_file(char *mount_dir) +int open_commands_file(const char *mount_dir) { char cmd_file[255]; int cmd_fd; @@ -273,7 +190,7 @@ int open_commands_file(char *mount_dir) return cmd_fd; } -int open_log_file(char *mount_dir) +int open_log_file(const char *mount_dir) { char cmd_file[255]; int cmd_fd; @@ -358,7 +275,7 @@ int delete_dir_tree(const char *dir_path) return result; } -void sha256(char *data, size_t dsize, char *hash) +void sha256(const char *data, size_t dsize, char *hash) { SHA256_CTX ctx; @@ -367,7 +284,7 @@ void sha256(char *data, size_t dsize, char *hash) SHA256_Final((unsigned char *)hash, &ctx); } -void md5(char *data, size_t dsize, char *hash) +void md5(const char *data, size_t dsize, char *hash) { MD5_CTX ctx; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 9c9ba3c5f70a..23c8a099662a 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -9,6 +9,8 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#define __packed __attribute__((__packed__)) + #ifdef __LP64__ #define ptr_to_u64(p) ((__u64)p) #else @@ -17,9 +19,11 @@ #define SHA256_DIGEST_SIZE 32 -int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms); +int mount_fs(const char *mount_dir, const char *backing_dir, + int read_timeout_ms); -int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt); +int mount_fs_opt(const char *mount_dir, const char *backing_dir, + const char *opt); int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); @@ -28,32 +32,26 @@ int get_file_signature(int fd, unsigned char *buf, int buf_size); int emit_node(int fd, char *filename, int *ino_out, int parent_ino, size_t size, mode_t mode, char *attr); -int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, char *attr); - -int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, const char *root_hash, char *sig, size_t sig_size, - char *add_data); +int emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *attr); -int unlink_node(int fd, int parent_ino, char *filename); +int crypto_emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *root_hash, + const char *add_data); -loff_t get_file_size(char *name); +loff_t get_file_size(const char *name); -int open_commands_file(char *mount_dir); +int open_commands_file(const char *mount_dir); -int open_log_file(char *mount_dir); +int open_log_file(const char *mount_dir); int wait_for_pending_reads(int fd, int timeout_ms, struct incfs_pending_read_info *prs, int prs_count); char *concat_file_name(const char *dir, char *file); -void sha256(char *data, size_t dsize, char *hash); - -void md5(char *data, size_t dsize, char *hash); +void sha256(const char *data, size_t dsize, char *hash); -bool sign_pkcs7(const void *data_to_sign, size_t data_size, - char *pkey_pem, char *cert_pem, - void **sig_ret, size_t *sig_size_ret); +void md5(const char *data, size_t dsize, char *hash); int delete_dir_tree(const char *path); -- GitLab From cb94ec7a4ddaddd0df315a33f621f9101e7a2e6f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 11 Mar 2020 15:21:20 -0700 Subject: [PATCH 0439/1278] ANDROID: Incremental fs: Add INCFS_IOC_PERMIT_FILL Provide a securable way to open a file for filling Test: incfs_test passes Bug: 138149732 Signed-off-by: Paul Lawrence Change-Id: Ib4b6fd839ad30ce08e31121d19e2c0d7066d302f --- fs/incfs/vfs.c | 62 +++++++++++++++- include/uapi/linux/incrementalfs.h | 23 +++++- .../selftests/filesystems/incfs/incfs_test.c | 73 ++++++++++++++++--- 3 files changed, 143 insertions(+), 15 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index aae918963a76..f911c5eb9290 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -135,6 +135,11 @@ static const struct file_operations incfs_file_ops = { .compat_ioctl = dispatch_ioctl }; +enum FILL_PERMISSION { + CANT_FILL = 0, + CAN_FILL = 1, +}; + static const struct file_operations incfs_pending_read_file_ops = { .read = pending_reads_read, .poll = pending_reads_poll, @@ -1284,6 +1289,9 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) if (!df) return -EBADF; + if ((uintptr_t)f->private_data != CAN_FILL) + return -EPERM; + if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) return -EFAULT; @@ -1336,6 +1344,53 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) return i; } +static long ioctl_permit_fill(struct file *f, void __user *arg) +{ + struct incfs_permit_fill __user *usr_permit_fill = arg; + struct incfs_permit_fill permit_fill; + long error = 0; + struct file *file = 0; + + if (f->f_op != &incfs_pending_read_file_ops) + return -EPERM; + + if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) + return -EFAULT; + + file = fget(permit_fill.file_descriptor); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (file->f_op != &incfs_file_ops) { + error = -EPERM; + goto out; + } + + if (file->f_inode->i_sb != f->f_inode->i_sb) { + error = -EPERM; + goto out; + } + + switch ((uintptr_t)file->private_data) { + case CANT_FILL: + file->private_data = (void *)CAN_FILL; + break; + + case CAN_FILL: + pr_debug("CAN_FILL already set"); + break; + + default: + pr_warn("Invalid file private data"); + error = -EFAULT; + goto out; + } + +out: + fput(file); + return error; +} + static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1393,6 +1448,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) return ioctl_create_file(mi, (void __user *)arg); case INCFS_IOC_FILL_BLOCKS: return ioctl_fill_blocks(f, (void __user *)arg); + case INCFS_IOC_PERMIT_FILL: + return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); default: @@ -1823,9 +1880,10 @@ static int file_open(struct inode *inode, struct file *file) goto out; } - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { err = make_inode_ready_for_data_ops(mi, inode, backing_file); - else if (S_ISDIR(inode->i_mode)) { + file->private_data = (void *)CANT_FILL; + } else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = NULL; dir = incfs_open_dir_file(mi, backing_file); diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 2efc53f591ef..fd65f575cdf0 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -51,13 +51,23 @@ _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) /* - * Fill in one or more data block + * Fill in one or more data block. This may only be called on a handle + * passed as a parameter to INCFS_IOC_PERMIT_FILLING * * Returns number of blocks filled in, or error if none were */ #define INCFS_IOC_FILL_BLOCKS \ _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) +/* + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor + * May only be called on .pending_reads file + * + * Returns 0 on success or error + */ +#define INCFS_IOC_PERMIT_FILL \ + _IOW(INCFS_IOCTL_BASE_CODE, 33, struct incfs_permit_fill) + enum incfs_compression_alg { COMPRESSION_NONE = 0, COMPRESSION_LZ4 = 1 @@ -139,6 +149,17 @@ struct incfs_fill_blocks { __aligned_u64 fill_blocks; }; +/* + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor + * May only be called on .pending_reads file + * + * Argument for INCFS_IOC_PERMIT_FILL + */ +struct incfs_permit_fill { + /* File to permit fills on */ + __u32 file_descriptor; +}; + enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_NONE = 0, INCFS_HASH_TREE_SHA256 = 1 diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 1cd1226f4e44..7031561c0173 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -204,15 +204,43 @@ static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) return strdup(path); } -int open_file_by_id(char *mnt_dir, incfs_uuid_t id) +int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) { char *path = get_index_filename(mnt_dir, id); + int cmd_fd = open_commands_file(mnt_dir); int fd = open(path, O_RDWR); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int error = 0; - free(path); if (fd < 0) { print_error("Can't open file by id."); + error = -errno; + goto out; + } + + if (use_ioctl && ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + print_error("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1 || + errno != EPERM) { + print_error( + "Successfully called PERMIT_FILL on non pending_read file"); return -errno; + goto out; + } + +out: + free(path); + close(cmd_fd); + + if (error) { + close(fd); + return error; } return fd; @@ -258,12 +286,6 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, int i = 0; int blocks_written = 0; - fd = open_file_by_id(mnt_dir, file->id); - if (fd <= 0) { - error = -errno; - goto out; - } - for (i = 0; i < block_count; i++) { int block_index = blocks[i]; bool compress = (file->index + block_index) % 2 == 0; @@ -315,6 +337,24 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, } if (!error) { + fd = open_file_by_id(mnt_dir, file->id, false); + if (fd < 0) { + error = -errno; + goto out; + } + write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + if (write_res >= 0) { + ksft_print_msg("Wrote to file via normal fd error\n"); + error = -EPERM; + goto out; + } + + close(fd); + fd = open_file_by_id(mnt_dir, file->id, true); + if (fd < 0) { + error = -errno; + goto out; + } write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); if (write_res < 0) error = -errno; @@ -706,7 +746,6 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) int err; int i; int fd; - char *file_path; struct incfs_fill_blocks fill_blocks = { .count = file->mtree_block_count, }; @@ -729,9 +768,7 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) }; } - file_path = concat_file_name(mount_dir, file->name); - fd = open(file_path, O_RDWR); - free(file_path); + fd = open_file_by_id(mount_dir, file->id, false); if (fd < 0) { err = errno; goto failure; @@ -739,7 +776,19 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); close(fd); + if (err >= 0) { + err = -EPERM; + goto failure; + } + + fd = open_file_by_id(mount_dir, file->id, true); + if (fd < 0) { + err = errno; + goto failure; + } + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); if (err < fill_blocks.count) err = errno; else { -- GitLab From ffee24cf3053e942fc694ed52d79dbda6e6c01c8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Dec 2018 08:47:44 -0800 Subject: [PATCH 0440/1278] phy: Revert toggling reset changes. commit 7b566f70e1bf65b189b66eb3de6f431c30f7dff2 upstream. This reverts: ef1b5bf506b1 ("net: phy: Fix not to call phy_resume() if PHY is not attached") 8c85f4b81296 ("net: phy: micrel: add toggling phy reset if PHY is not attached") Andrew Lunn informs me that there are alternative efforts underway to fix this more properly. Signed-off-by: David S. Miller [just take the ef1b5bf506b1 revert - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 99dae55cd334..a98c227a4c2e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -76,7 +76,7 @@ static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); #ifdef CONFIG_PM -static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) +static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); @@ -88,11 +88,10 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) /* PHY not attached? May suspend if the PHY has not already been * suspended as part of a prior call to phy_disconnect() -> * phy_detach() -> phy_suspend() because the parent netdev might be the - * MDIO bus driver and clock gated at this point. Also may resume if - * PHY is not attached. + * MDIO bus driver and clock gated at this point. */ if (!netdev) - return suspend ? !phydev->suspended : phydev->suspended; + return !phydev->suspended; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -122,7 +121,7 @@ static int mdio_bus_phy_suspend(struct device *dev) if (phydev->attached_dev && phydev->adjust_link) phy_stop_machine(phydev); - if (!mdio_bus_phy_may_suspend(phydev, true)) + if (!mdio_bus_phy_may_suspend(phydev)) return 0; return phy_suspend(phydev); @@ -133,7 +132,7 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev, false)) + if (!mdio_bus_phy_may_suspend(phydev)) goto no_resume; ret = phy_resume(phydev); -- GitLab From 139fe35030c33abc54e0cee38a0c50388a38cdd9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Feb 2020 15:34:53 -0800 Subject: [PATCH 0441/1278] net: phy: Avoid multiple suspends commit 503ba7c6961034ff0047707685644cad9287c226 upstream. It is currently possible for a PHY device to be suspended as part of a network device driver's suspend call while it is still being attached to that net_device, either via phy_suspend() or implicitly via phy_stop(). Later on, when the MDIO bus controller get suspended, we would attempt to suspend again the PHY because it is still attached to a network device. This is both a waste of time and creates an opportunity for improper clock/power management bugs to creep in. Fixes: 803dd9c77ac3 ("net: phy: avoid suspending twice a PHY") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a98c227a4c2e..31ef3e47edf6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -91,7 +91,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) * MDIO bus driver and clock gated at this point. */ if (!netdev) - return !phydev->suspended; + goto out; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -106,7 +106,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) -- GitLab From 81ab17d70bfbcebc328556ab9896b3015af76c60 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 5 Mar 2020 17:45:57 +0300 Subject: [PATCH 0442/1278] cgroup, netclassid: periodically release file_lock on classid updating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 018d26fcd12a75fb9b5fe233762aa3f2f0854b88 ] In our production environment we have faced with problem that updating classid in cgroup with heavy tasks cause long freeze of the file tables in this tasks. By heavy tasks we understand tasks with many threads and opened sockets (e.g. balancers). This freeze leads to an increase number of client timeouts. This patch implements following logic to fix this issue: аfter iterating 1000 file descriptors file table lock will be released thus providing a time gap for socket creation/deletion. Now update is non atomic and socket may be skipped using calls: dup2(oldfd, newfd); close(oldfd); But this case is not typical. Moreover before this patch skip is possible too by hiding socket fd in unix socket buffer. New sockets will be allocated with updated classid because cgroup state is updated before start of the file descriptors iteration. So in common cases this patch has no side effects. Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/netclassid_cgroup.c | 47 ++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 7bf833598615..67feeb207dad 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -57,30 +57,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -102,10 +132,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); -- GitLab From 31b4b975e3ab80af34d9dba33664e7cd01274e94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Mar 2020 22:05:14 -0800 Subject: [PATCH 0443/1278] gre: fix uninit-value in __iptunnel_pull_header [ Upstream commit 17c25cafd4d3e74c83dce56b158843b19c40b414 ] syzbot found an interesting case of the kernel reading an uninit-value [1] Problem is in the handling of ETH_P_WCCP in gre_parse_header() We look at the byte following GRE options to eventually decide if the options are four bytes longer. Use skb_header_pointer() to not pull bytes if we found that no more bytes were needed. All callers of gre_parse_header() are properly using pskb_may_pull() anyway before proceeding to next header. [1] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2303 [inline] BUG: KMSAN: uninit-value in __iptunnel_pull_header+0x30c/0xbd0 net/ipv4/ip_tunnel_core.c:94 CPU: 1 PID: 11784 Comm: syz-executor940 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 pskb_may_pull include/linux/skbuff.h:2303 [inline] __iptunnel_pull_header+0x30c/0xbd0 net/ipv4/ip_tunnel_core.c:94 iptunnel_pull_header include/net/ip_tunnels.h:411 [inline] gre_rcv+0x15e/0x19c0 net/ipv6/ip6_gre.c:606 ip6_protocol_deliver_rcu+0x181b/0x22c0 net/ipv6/ip6_input.c:432 ip6_input_finish net/ipv6/ip6_input.c:473 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip6_input net/ipv6/ip6_input.c:482 [inline] ip6_mc_input+0xdf2/0x1460 net/ipv6/ip6_input.c:576 dst_input include/net/dst.h:442 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:76 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ipv6_rcv+0x683/0x710 net/ipv6/ip6_input.c:306 __netif_receive_skb_one_core net/core/dev.c:5198 [inline] __netif_receive_skb net/core/dev.c:5312 [inline] netif_receive_skb_internal net/core/dev.c:5402 [inline] netif_receive_skb+0x66b/0xf20 net/core/dev.c:5461 tun_rx_batched include/linux/skbuff.h:4321 [inline] tun_get_user+0x6aef/0x6f60 drivers/net/tun.c:1997 tun_chr_write_iter+0x1f2/0x360 drivers/net/tun.c:2026 call_write_iter include/linux/fs.h:1901 [inline] new_sync_write fs/read_write.c:483 [inline] __vfs_write+0xa5a/0xca0 fs/read_write.c:496 vfs_write+0x44a/0x8f0 fs/read_write.c:558 ksys_write+0x267/0x450 fs/read_write.c:611 __do_sys_write fs/read_write.c:623 [inline] __se_sys_write fs/read_write.c:620 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:620 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f62d99 Code: 90 e8 0b 00 00 00 f3 90 0f ae e8 eb f9 8d 74 26 00 89 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 002b:00000000fffedb2c EFLAGS: 00000217 ORIG_RAX: 0000000000000004 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000020002580 RDX: 0000000000000fca RSI: 0000000000000036 RDI: 0000000000000004 RBP: 0000000000008914 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2793 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4401 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1051 [inline] alloc_skb_with_frags+0x18c/0xa70 net/core/skbuff.c:5766 sock_alloc_send_pskb+0xada/0xc60 net/core/sock.c:2242 tun_alloc_skb drivers/net/tun.c:1529 [inline] tun_get_user+0x10ae/0x6f60 drivers/net/tun.c:1843 tun_chr_write_iter+0x1f2/0x360 drivers/net/tun.c:2026 call_write_iter include/linux/fs.h:1901 [inline] new_sync_write fs/read_write.c:483 [inline] __vfs_write+0xa5a/0xca0 fs/read_write.c:496 vfs_write+0x44a/0x8f0 fs/read_write.c:558 ksys_write+0x267/0x450 fs/read_write.c:611 __do_sys_write fs/read_write.c:623 [inline] __se_sys_write fs/read_write.c:620 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:620 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 Fixes: 95f5c64c3c13 ("gre: Move utility functions to common headers") Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/gre_demux.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7efe740c06eb..4a5e55e94a9e 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -60,7 +60,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -114,8 +116,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; -- GitLab From 4dcb3398fbb14f613eda33b770f641b273ef8f69 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 10 Mar 2020 15:27:37 +0800 Subject: [PATCH 0444/1278] ipv6/addrconf: call ipv6_mc_up() for non-Ethernet interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 60380488e4e0b95e9e82aa68aa9705baa86de84c ] Rafał found an issue that for non-Ethernet interface, if we down and up frequently, the memory will be consumed slowly. The reason is we add allnodes/allrouters addressed in multicast list in ipv6_add_dev(). When link down, we call ipv6_mc_down(), store all multicast addresses via mld_add_delrec(). But when link up, we don't call ipv6_mc_up() for non-Ethernet interface to remove the addresses. This makes idev->mc_tomb getting bigger and bigger. The call stack looks like: addrconf_notify(NETDEV_REGISTER) ipv6_add_dev ipv6_dev_mc_inc(ff01::1) ipv6_dev_mc_inc(ff02::1) ipv6_dev_mc_inc(ff02::2) addrconf_notify(NETDEV_UP) addrconf_dev_config /* Alas, we support only Ethernet autoconfiguration. */ return; addrconf_notify(NETDEV_DOWN) addrconf_ifdown ipv6_mc_down igmp6_group_dropped(ff02::2) mld_add_delrec(ff02::2) igmp6_group_dropped(ff02::1) igmp6_group_dropped(ff01::1) After investigating, I can't found a rule to disable multicast on non-Ethernet interface. In RFC2460, the link could be Ethernet, PPP, ATM, tunnels, etc. In IPv4, it doesn't check the dev type when calls ip_mc_up() in inetdev_event(). Even for IPv6, we don't check the dev type and call ipv6_add_dev(), ipv6_dev_mc_inc() after register device. So I think it's OK to fix this memory consumer by calling ipv6_mc_up() for non-Ethernet interface. v2: Also check IFF_MULTICAST flag to make sure the interface supports multicast Reported-by: Rafał Miłecki Tested-by: Rafał Miłecki Fixes: 74235a25c673 ("[IPV6] addrconf: Fix IPv6 on tuntap tunnels") Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a81201dd3a1a..092e72d6a61d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3223,6 +3223,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } -- GitLab From 6c5251993d38c59521b645a29eb2479957ba512f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 9 Mar 2020 15:57:02 -0700 Subject: [PATCH 0445/1278] ipvlan: add cond_resched_rcu() while processing muticast backlog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e18b353f102e371580f3f01dd47567a25acc3c1d ] If there are substantial number of slaves created as simulated by Syzbot, the backlog processing could take much longer and result into the issue found in the Syzbot report. INFO: rcu_sched detected stalls on CPUs/tasks: (detected by 1, t=10502 jiffies, g=5049, c=5048, q=752) All QSes seen, last rcu_sched kthread activity 10502 (4294965563-4294955061), jiffies_till_next_fqs=1, root ->qsmask 0x0 syz-executor.1 R running task on cpu 1 10984 11210 3866 0x30020008 179034491270 Call Trace: [] _sched_show_task kernel/sched/core.c:8063 [inline] [] _sched_show_task.cold+0x2fd/0x392 kernel/sched/core.c:8030 [] sched_show_task+0xb/0x10 kernel/sched/core.c:8073 [] print_other_cpu_stall kernel/rcu/tree.c:1577 [inline] [] check_cpu_stall kernel/rcu/tree.c:1695 [inline] [] __rcu_pending kernel/rcu/tree.c:3478 [inline] [] rcu_pending kernel/rcu/tree.c:3540 [inline] [] rcu_check_callbacks.cold+0xbb4/0xc29 kernel/rcu/tree.c:2876 [] update_process_times+0x32/0x80 kernel/time/timer.c:1635 [] tick_sched_handle+0xa0/0x180 kernel/time/tick-sched.c:161 [] tick_sched_timer+0x44/0x130 kernel/time/tick-sched.c:1193 [] __run_hrtimer kernel/time/hrtimer.c:1393 [inline] [] __hrtimer_run_queues+0x307/0xd90 kernel/time/hrtimer.c:1455 [] hrtimer_interrupt+0x2ea/0x730 kernel/time/hrtimer.c:1513 [] local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1031 [inline] [] smp_apic_timer_interrupt+0x144/0x5e0 arch/x86/kernel/apic/apic.c:1056 [] apic_timer_interrupt+0x8e/0xa0 arch/x86/entry/entry_64.S:778 RIP: 0010:do_raw_read_lock+0x22/0x80 kernel/locking/spinlock_debug.c:153 RSP: 0018:ffff8801dad07ab8 EFLAGS: 00000a02 ORIG_RAX: ffffffffffffff12 RAX: 0000000000000000 RBX: ffff8801c4135680 RCX: 0000000000000000 RDX: 1ffff10038826afe RSI: ffff88019d816bb8 RDI: ffff8801c41357f0 RBP: ffff8801dad07ac0 R08: 0000000000004b15 R09: 0000000000310273 R10: ffff88019d816bb8 R11: 0000000000000001 R12: ffff8801c41357e8 R13: 0000000000000000 R14: ffff8801cfb19850 R15: ffff8801cfb198b0 [] __raw_read_lock_bh include/linux/rwlock_api_smp.h:177 [inline] [] _raw_read_lock_bh+0x3e/0x50 kernel/locking/spinlock.c:240 [] ipv6_chk_mcast_addr+0x11a/0x6f0 net/ipv6/mcast.c:1006 [] ip6_mc_input+0x319/0x8e0 net/ipv6/ip6_input.c:482 [] dst_input include/net/dst.h:449 [inline] [] ip6_rcv_finish+0x408/0x610 net/ipv6/ip6_input.c:78 [] NF_HOOK include/linux/netfilter.h:292 [inline] [] NF_HOOK include/linux/netfilter.h:286 [inline] [] ipv6_rcv+0x10e/0x420 net/ipv6/ip6_input.c:278 [] __netif_receive_skb_one_core+0x12a/0x1f0 net/core/dev.c:5303 [] __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:5417 [] process_backlog+0x216/0x6c0 net/core/dev.c:6243 [] napi_poll net/core/dev.c:6680 [inline] [] net_rx_action+0x47b/0xfb0 net/core/dev.c:6748 [] __do_softirq+0x2c8/0x99a kernel/softirq.c:317 [] invoke_softirq kernel/softirq.c:399 [inline] [] irq_exit+0x16a/0x1a0 kernel/softirq.c:439 [] exiting_irq arch/x86/include/asm/apic.h:561 [inline] [] smp_apic_timer_interrupt+0x165/0x5e0 arch/x86/kernel/apic/apic.c:1058 [] apic_timer_interrupt+0x8e/0xa0 arch/x86/entry/entry_64.S:778 RIP: 0010:__sanitizer_cov_trace_pc+0x26/0x50 kernel/kcov.c:102 RSP: 0018:ffff880196033bd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff12 RAX: ffff88019d8161c0 RBX: 00000000ffffffff RCX: ffffc90003501000 RDX: 0000000000000002 RSI: ffffffff816236d1 RDI: 0000000000000005 RBP: ffff880196033bd8 R08: ffff88019d8161c0 R09: 0000000000000000 R10: 1ffff10032c067f0 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000080 R14: 0000000000000000 R15: 0000000000000000 [] do_futex+0x151/0x1d50 kernel/futex.c:3548 [] C_SYSC_futex kernel/futex_compat.c:201 [inline] [] compat_SyS_futex+0x270/0x3b0 kernel/futex_compat.c:175 [] do_syscall_32_irqs_on arch/x86/entry/common.c:353 [inline] [] do_fast_syscall_32+0x357/0xe1c arch/x86/entry/common.c:415 [] entry_SYSENTER_compat+0x8b/0x9d arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f23c69 RSP: 002b:00000000f5d1f12c EFLAGS: 00000282 ORIG_RAX: 00000000000000f0 RAX: ffffffffffffffda RBX: 000000000816af88 RCX: 0000000000000080 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000816af8c RBP: 00000000f5d1f228 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 rcu_sched kthread starved for 10502 jiffies! g5049 c5048 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x0 ->cpu=1 rcu_sched R running task on cpu 1 13048 8 2 0x90000000 179099587640 Call Trace: [] context_switch+0x60f/0xa60 kernel/sched/core.c:3209 [] __schedule+0x5aa/0x1da0 kernel/sched/core.c:3934 [] schedule+0x8f/0x1b0 kernel/sched/core.c:4011 [] schedule_timeout+0x50d/0xee0 kernel/time/timer.c:1803 [] rcu_gp_kthread+0xda1/0x3b50 kernel/rcu/tree.c:2327 [] kthread+0x348/0x420 kernel/kthread.c:246 [] ret_from_fork+0x56/0x70 arch/x86/entry/entry_64.S:393 Fixes: ba35f8588f47 (“ipvlan: Defer multicast / broadcast processing to a work-queue”) Signed-off-by: Mahesh Bandewar Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 71ff6bd4be9f..91886b5323df 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -240,6 +240,7 @@ void ipvlan_process_multicast(struct work_struct *work) } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); local_bh_enable(); + cond_resched_rcu(); } rcu_read_unlock(); -- GitLab From 7c315855c6f490d0bb70bc38a85b536011b9bd82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Mar 2020 18:22:58 -0700 Subject: [PATCH 0446/1278] ipvlan: do not use cond_resched_rcu() in ipvlan_process_multicast() [ Upstream commit afe207d80a61e4d6e7cfa0611a4af46d0ba95628 ] Commit e18b353f102e ("ipvlan: add cond_resched_rcu() while processing muticast backlog") added a cond_resched_rcu() in a loop using rcu protection to iterate over slaves. This is breaking rcu rules, so lets instead use cond_resched() at a point we can reschedule Fixes: e18b353f102e ("ipvlan: add cond_resched_rcu() while processing muticast backlog") Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 91886b5323df..1d97d6958e4b 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -240,7 +240,6 @@ void ipvlan_process_multicast(struct work_struct *work) } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); local_bh_enable(); - cond_resched_rcu(); } rcu_read_unlock(); @@ -257,6 +256,7 @@ void ipvlan_process_multicast(struct work_struct *work) } if (dev) dev_put(dev); + cond_resched(); } } -- GitLab From d5f90b1703867035e4b80014d3341a65cf6d8f31 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Feb 2020 19:47:34 +0100 Subject: [PATCH 0447/1278] netlink: Use netlink header as base to calculate bad attribute offset [ Upstream commit 84b3268027641401bb8ad4427a90a3cce2eb86f5 ] Userspace might send a batch that is composed of several netlink messages. The netlink_ack() function must use the pointer to the netlink header as base to calculate the bad attribute offset. Fixes: 2d4bc93368f5 ("netlink: extended ACK reporting") Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 07924559cb10..3e4e07559272 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2389,7 +2389,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, in_skb->len)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - - in_skb->data)); + (u8 *)nlh)); } else { if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, -- GitLab From 3f9e0b25fc13589071f93642b6de033e4792ddf0 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 10 Mar 2020 18:22:24 +0300 Subject: [PATCH 0448/1278] net: macsec: update SCI upon MAC address change. [ Upstream commit 6fc498bc82929ee23aa2f35a828c6178dfd3f823 ] SCI should be updated, because it contains MAC in its first 6 octets. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Dmitry Bogdanov Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9bb65e0af7dd..ed2cb3ac578a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2871,6 +2871,11 @@ static void macsec_dev_set_rx_mode(struct net_device *dev) dev_uc_sync(real_dev, dev); } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static int macsec_set_mac_address(struct net_device *dev, void *p) { struct macsec_dev *macsec = macsec_priv(dev); @@ -2892,6 +2897,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) out: ether_addr_copy(dev->dev_addr, addr->sa_data); + macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); return 0; } @@ -3159,11 +3165,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } -static sci_t dev_to_sci(struct net_device *dev, __be16 port) -{ - return make_sci(dev->dev_addr, port); -} - static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); -- GitLab From ac07a9a4de593a9a3d94aa92f98e09f979c9eb03 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Mar 2020 17:24:31 +0300 Subject: [PATCH 0449/1278] net: nfc: fix bounds checking bugs on "pipe" [ Upstream commit a3aefbfe45751bf7b338c181b97608e276b5bb73 ] This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands") and commit d7ee81ad09f0 ("NFC: nci: Add some bounds checking in nci_hci_cmd_received()") which added range checks on "pipe". The "pipe" variable comes skb->data[0] in nfc_hci_msg_rx_work(). It's in the 0-255 range. We're using it as the array index into the hdev->pipes[] array which has NFC_HCI_MAX_PIPES (128) members. Fixes: 118278f20aa8 ("NFC: hci: Add pipes table to reference them with a tuple {gate, host}") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/hci/core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6bf14f4f4b42..ae315dbd3732 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -193,13 +193,20 @@ void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result, void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - u8 gate = hdev->pipes[pipe].gate; u8 status = NFC_HCI_ANY_OK; struct hci_create_pipe_resp *create_info; struct hci_delete_pipe_noti *delete_info; struct hci_all_pipe_cleared_noti *cleared_info; + u8 gate; - pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + pr_debug("from pipe %x cmd %x\n", pipe, cmd); + + if (pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + + gate = hdev->pipes[pipe].gate; switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: @@ -387,8 +394,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = hdev->pipes[pipe].gate; + u8 gate; + + if (pipe >= NFC_HCI_MAX_PIPES) { + pr_err("Discarded event %x to invalid pipe %x\n", event, pipe); + goto exit; + } + gate = hdev->pipes[pipe].gate; if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; -- GitLab From db159fd8e2c71257e988612ab18e367a672f0243 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Mar 2020 11:34:35 -0400 Subject: [PATCH 0450/1278] net/packet: tpacket_rcv: do not increment ring index on drop [ Upstream commit 46e4c421a053c36bf7a33dda2272481bcaf3eed3 ] In one error case, tpacket_rcv drops packets after incrementing the ring producer index. If this happens, it does not update tp_status to TP_STATUS_USER and thus the reader is stalled for an iteration of the ring, causing out of order arrival. The only such error path is when virtio_net_hdr_from_skb fails due to encountering an unknown GSO type. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 46b7fac82775..387589a4a340 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2299,6 +2299,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2311,12 +2318,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, status |= TP_STATUS_LOSING; } - if (do_vnet && - virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) - goto drop_n_account; - po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; -- GitLab From 9c7cef12efca42f66c494d8e2c63dc7a76cb46ae Mon Sep 17 00:00:00 2001 From: You-Sheng Yang Date: Wed, 26 Feb 2020 23:37:10 +0800 Subject: [PATCH 0451/1278] r8152: check disconnect status after long sleep [ Upstream commit d64c7a08034b32c285e576208ae44fc3ba3fa7df ] Dell USB Type C docking WD19/WD19DC attaches additional peripherals as: /: Bus 02.Port 1: Dev 1, Class=root_hub, Driver=xhci_hcd/6p, 5000M |__ Port 1: Dev 11, If 0, Class=Hub, Driver=hub/4p, 5000M |__ Port 3: Dev 12, If 0, Class=Hub, Driver=hub/4p, 5000M |__ Port 4: Dev 13, If 0, Class=Vendor Specific Class, Driver=r8152, 5000M where usb 2-1-3 is a hub connecting all USB Type-A/C ports on the dock. When hotplugging such dock with additional usb devices already attached on it, the probing process may reset usb 2.1 port, therefore r8152 ethernet device is also reset. However, during r8152 device init there are several for-loops that, when it's unable to retrieve hardware registers due to being disconnected from USB, may take up to 14 seconds each in practice, and that has to be completed before USB may re-enumerate devices on the bus. As a result, devices attached to the dock will only be available after nearly 1 minute after the dock was plugged in: [ 216.388290] [250] r8152 2-1.4:1.0: usb_probe_interface [ 216.388292] [250] r8152 2-1.4:1.0: usb_probe_interface - got id [ 258.830410] r8152 2-1.4:1.0 (unnamed net_device) (uninitialized): PHY not ready [ 258.830460] r8152 2-1.4:1.0 (unnamed net_device) (uninitialized): Invalid header when reading pass-thru MAC addr [ 258.830464] r8152 2-1.4:1.0 (unnamed net_device) (uninitialized): Get ether addr fail This happens in, for example, r8153_init: static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, void *data, u16 type) { if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; ... } static u16 ocp_read_word(struct r8152 *tp, u16 type, u16 index) { u32 data; ... generic_ocp_read(tp, index, sizeof(tmp), &tmp, type | byen); data = __le32_to_cpu(tmp); ... return (u16)data; } static void r8153_init(struct r8152 *tp) { ... if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; for (i = 0; i < 500; i++) { if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; msleep(20); } ... } Since ocp_read_word() doesn't check the return status of generic_ocp_read(), and the only exit condition for the loop is to have a match in the returned value, such loops will only ends after exceeding its maximum runs when the device has been marked as disconnected, which takes 500 * 20ms = 10 seconds in theory, 14 in practice. To solve this long latency another test to RTL8152_UNPLUG flag should be added after those 20ms sleep to skip unnecessary loops, so that the device probe can complete early and proceed to parent port reset/reprobe process. This can be reproduced on all kernel versions up to latest v5.6-rc2, but after v5.5-rc7 the reproduce rate is dramatically lowered to 1/30 or less while it was around 1/2. Signed-off-by: You-Sheng Yang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a7f9c1886bd4..cadf5ded45a9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2696,6 +2696,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } return data; @@ -4055,7 +4057,10 @@ static void r8153_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -4170,7 +4175,10 @@ static void r8153b_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); -- GitLab From e27f53b37d55635cd8e4eb9bd31d52998ed016d0 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 9 Mar 2020 18:16:24 +0000 Subject: [PATCH 0452/1278] sfc: detach from cb_page in efx_copy_channel() [ Upstream commit 4b1bd9db078f7d5332c8601a2f5bd43cf0458fd4 ] It's a resource, not a parameter, so we can't copy it into the new channel's TX queues, otherwise aliasing will lead to resource- management bugs if the channel is subsequently torn down without being initialised. Before the Fixes:-tagged commit there was a similar bug with tsoh_page, but I'm not sure it's worth doing another fix for such old kernels. Fixes: e9117e5099ea ("sfc: Firmware-Assisted TSO version 2") Suggested-by: Derek Shute Signed-off-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/efx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b2818..e0d4c1e850cf 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -505,6 +505,7 @@ efx_copy_channel(const struct efx_channel *old_channel) if (tx_queue->channel) tx_queue->channel = channel; tx_queue->buffer = NULL; + tx_queue->cb_page = NULL; memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); } -- GitLab From e69c7ad48309cecc7f9d708853e5e48b90be1683 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 1 Mar 2020 22:07:17 -0500 Subject: [PATCH 0453/1278] bnxt_en: reinitialize IRQs when MTU is modified [ Upstream commit a9b952d267e59a3b405e644930f46d252cea7122 ] MTU changes may affect the number of IRQs so we must call bnxt_close_nic()/bnxt_open_nic() with the irq_re_init parameter set to true. The reason is that a larger MTU may require aggregation rings not needed with smaller MTU. We may not be able to allocate the required number of aggregation rings and so we reduce the number of channels which will change the number of IRQs. Without this patch, it may crash eventually in pci_disable_msix() when the IRQs are not properly unwound. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 41bc7820d2dd..5163da01e54f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7310,13 +7310,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) struct bnxt *bp = netdev_priv(dev); if (netif_running(dev)) - bnxt_close_nic(bp, false, false); + bnxt_close_nic(bp, true, false); dev->mtu = new_mtu; bnxt_set_ring_params(bp); if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + return bnxt_open_nic(bp, true, false); return 0; } -- GitLab From 944f7205341501a8135daee53b4b959af132de0a Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 9 Mar 2020 22:16:05 -0700 Subject: [PATCH 0454/1278] cgroup: memcg: net: do not associate sock with unrelated cgroup [ Upstream commit e876ecc67db80dfdb8e237f71e5b43bb88ae549c ] We are testing network memory accounting in our setup and noticed inconsistent network memory usage and often unrelated cgroups network usage correlates with testing workload. On further inspection, it seems like mem_cgroup_sk_alloc() and cgroup_sk_alloc() are broken in irq context specially for cgroup v1. mem_cgroup_sk_alloc() and cgroup_sk_alloc() can be called in irq context and kind of assumes that this can only happen from sk_clone_lock() and the source sock object has already associated cgroup. However in cgroup v1, where network memory accounting is opt-in, the source sock can be unassociated with any cgroup and the new cloned sock can get associated with unrelated interrupted cgroup. Cgroup v2 can also suffer if the source sock object was created by process in the root cgroup or if sk_alloc() is called in irq context. The fix is to just do nothing in interrupt. WARNING: Please note that about half of the TCP sockets are allocated from the IRQ context, so, memory used by such sockets will not be accouted by the memcg. The stack trace of mem_cgroup_sk_alloc() from IRQ-context: CPU: 70 PID: 12720 Comm: ssh Tainted: 5.6.0-smp-DEV #1 Hardware name: ... Call Trace: dump_stack+0x57/0x75 mem_cgroup_sk_alloc+0xe9/0xf0 sk_clone_lock+0x2a7/0x420 inet_csk_clone_lock+0x1b/0x110 tcp_create_openreq_child+0x23/0x3b0 tcp_v6_syn_recv_sock+0x88/0x730 tcp_check_req+0x429/0x560 tcp_v6_rcv+0x72d/0xa40 ip6_protocol_deliver_rcu+0xc9/0x400 ip6_input+0x44/0xd0 ? ip6_protocol_deliver_rcu+0x400/0x400 ip6_rcv_finish+0x71/0x80 ipv6_rcv+0x5b/0xe0 ? ip6_sublist_rcv+0x2e0/0x2e0 process_backlog+0x108/0x1e0 net_rx_action+0x26b/0x460 __do_softirq+0x104/0x2a6 do_softirq_own_stack+0x2a/0x40 do_softirq.part.19+0x40/0x50 __local_bh_enable_ip+0x51/0x60 ip6_finish_output2+0x23d/0x520 ? ip6table_mangle_hook+0x55/0x160 __ip6_finish_output+0xa1/0x100 ip6_finish_output+0x30/0xd0 ip6_output+0x73/0x120 ? __ip6_finish_output+0x100/0x100 ip6_xmit+0x2e3/0x600 ? ipv6_anycast_cleanup+0x50/0x50 ? inet6_csk_route_socket+0x136/0x1e0 ? skb_free_head+0x1e/0x30 inet6_csk_xmit+0x95/0xf0 __tcp_transmit_skb+0x5b4/0xb20 __tcp_send_ack.part.60+0xa3/0x110 tcp_send_ack+0x1d/0x20 tcp_rcv_state_process+0xe64/0xe80 ? tcp_v6_connect+0x5d1/0x5f0 tcp_v6_do_rcv+0x1b1/0x3f0 ? tcp_v6_do_rcv+0x1b1/0x3f0 __release_sock+0x7f/0xd0 release_sock+0x30/0xa0 __inet_stream_connect+0x1c3/0x3b0 ? prepare_to_wait+0xb0/0xb0 inet_stream_connect+0x3b/0x60 __sys_connect+0x101/0x120 ? __sys_getsockopt+0x11b/0x140 __x64_sys_connect+0x1a/0x20 do_syscall_64+0x51/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The stack trace of mem_cgroup_sk_alloc() from IRQ-context: Fixes: 2d7580738345 ("mm: memcontrol: consolidate cgroup socket tracking") Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") Signed-off-by: Shakeel Butt Reviewed-by: Roman Gushchin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 4 ++++ mm/memcontrol.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 829943aad7be..1e727fbaa0e4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5799,6 +5799,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 326525a97c47..81400be03dcb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5881,6 +5881,10 @@ void mem_cgroup_sk_alloc(struct sock *sk) return; } + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) + return; + rcu_read_lock(); memcg = mem_cgroup_from_task(current); if (memcg == root_mem_cgroup) -- GitLab From 357ac1da6e4d1dd02c1437b56ca4c8e21f652b5d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 9 Mar 2020 22:16:06 -0700 Subject: [PATCH 0455/1278] net: memcg: late association of sock to memcg [ Upstream commit d752a4986532cb6305dfd5290a614cde8072769d ] If a TCP socket is allocated in IRQ context or cloned from unassociated (i.e. not associated to a memcg) in IRQ context then it will remain unassociated for its whole life. Almost half of the TCPs created on the system are created in IRQ context, so, memory used by such sockets will not be accounted by the memcg. This issue is more widespread in cgroup v1 where network memory accounting is opt-in but it can happen in cgroup v2 if the source socket for the cloning was created in root memcg. To fix the issue, just do the association of the sockets at the accept() time in the process context and then force charge the memory buffer already used and reserved by the socket. Signed-off-by: Shakeel Butt Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 14 -------------- net/core/sock.c | 5 ++++- net/ipv4/inet_connection_sock.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 81400be03dcb..5d6ebd1449f0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5867,20 +5867,6 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); - return; - } - /* Do not associate the sock with unrelated interrupted task's memcg. */ if (in_interrupt()) return; diff --git a/net/core/sock.c b/net/core/sock.c index 03ca2f638eb4..d2cb2051d045 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1684,7 +1684,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index da55ce62fe50..c786f81952f1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,6 +475,26 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + + if (mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * sk->sk_memcg. + */ + lock_sock(newsk); + + /* The sk has not been accepted yet, no need to look at + * sk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&sk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } out: release_sock(sk); if (req) -- GitLab From 69b1fc5ce9dd58e890a6549197d6cbadcfa257cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2020 11:44:26 -0700 Subject: [PATCH 0456/1278] net: memcg: fix lockdep splat in inet_csk_accept() commit 06669ea346e476a5339033d77ef175566a40efbb upstream. Locking newsk while still holding the listener lock triggered a lockdep splat [1] We can simply move the memcg code after we release the listener lock, as this can also help if multiple threads are sharing a common listener. Also fix a typo while reading socket sk_rmem_alloc. [1] WARNING: possible recursive locking detected 5.6.0-rc3-syzkaller #0 Not tainted -------------------------------------------- syz-executor598/9524 is trying to acquire lock: ffff88808b5b8b90 (sk_lock-AF_INET6){+.+.}, at: lock_sock include/net/sock.h:1541 [inline] ffff88808b5b8b90 (sk_lock-AF_INET6){+.+.}, at: inet_csk_accept+0x69f/0xd30 net/ipv4/inet_connection_sock.c:492 but task is already holding lock: ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: lock_sock include/net/sock.h:1541 [inline] ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: inet_csk_accept+0x8d/0xd30 net/ipv4/inet_connection_sock.c:445 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(sk_lock-AF_INET6); lock(sk_lock-AF_INET6); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by syz-executor598/9524: #0: ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: lock_sock include/net/sock.h:1541 [inline] #0: ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: inet_csk_accept+0x8d/0xd30 net/ipv4/inet_connection_sock.c:445 stack backtrace: CPU: 0 PID: 9524 Comm: syz-executor598 Not tainted 5.6.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 print_deadlock_bug kernel/locking/lockdep.c:2370 [inline] check_deadlock kernel/locking/lockdep.c:2411 [inline] validate_chain kernel/locking/lockdep.c:2954 [inline] __lock_acquire.cold+0x114/0x288 kernel/locking/lockdep.c:3954 lock_acquire+0x197/0x420 kernel/locking/lockdep.c:4484 lock_sock_nested+0xc5/0x110 net/core/sock.c:2947 lock_sock include/net/sock.h:1541 [inline] inet_csk_accept+0x69f/0xd30 net/ipv4/inet_connection_sock.c:492 inet_accept+0xe9/0x7c0 net/ipv4/af_inet.c:734 __sys_accept4_file+0x3ac/0x5b0 net/socket.c:1758 __sys_accept4+0x53/0x90 net/socket.c:1809 __do_sys_accept4 net/socket.c:1821 [inline] __se_sys_accept4 net/socket.c:1818 [inline] __x64_sys_accept4+0x93/0xf0 net/socket.c:1818 do_syscall_64+0xf6/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4445c9 Code: e8 0c 0d 03 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffc35b37608 EFLAGS: 00000246 ORIG_RAX: 0000000000000120 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004445c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000306777 R09: 0000000000306777 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004053d0 R14: 0000000000000000 R15: 0000000000000000 Fixes: d752a4986532 ("net: memcg: late association of sock to memcg") Signed-off-by: Eric Dumazet Cc: Shakeel Butt Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c786f81952f1..7826fba34b14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -476,27 +476,27 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) spin_unlock_bh(&queue->fastopenq.lock); } - if (mem_cgroup_sockets_enabled) { +out: + release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { int amt; /* atomically get the memory usage, set and charge the - * sk->sk_memcg. + * newsk->sk_memcg. */ lock_sock(newsk); - /* The sk has not been accepted yet, no need to look at - * sk->sk_wmem_queued. + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. */ amt = sk_mem_pages(newsk->sk_forward_alloc + - atomic_read(&sk->sk_rmem_alloc)); + atomic_read(&newsk->sk_rmem_alloc)); mem_cgroup_sk_alloc(newsk); if (newsk->sk_memcg && amt) mem_cgroup_charge_skmem(newsk->sk_memcg, amt); release_sock(newsk); } -out: - release_sock(sk); if (req) reqsk_put(req); return newsk; -- GitLab From 47186107f7f2aa381aa9d890d5d8a81f6aee430a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:13 -0800 Subject: [PATCH 0457/1278] fib: add missing attribute validation for tun_id [ Upstream commit 4c16d64ea04056f1b1b324ab6916019f6a064114 ] Add missing netlink policy entry for FRA_TUN_ID. Fixes: e7030878fc84 ("fib: Add fib rule match on tunnel id") Signed-off-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/fib_rules.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07..b8fd023ba625 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -102,6 +102,7 @@ struct fib_rule_notifier_info { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ -- GitLab From 40ab0c53bc77f49f3da9050760066e14e37a6e03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:14 -0800 Subject: [PATCH 0458/1278] nl802154: add missing attribute validation [ Upstream commit 9322cd7c4af2ccc7fe7c5f01adb53f4f77949e92 ] Add missing attribute validation for several u8 types. Fixes: 2c21d11518b6 ("net: add NL802154 interface for configuration of 802.15.4 devices") Signed-off-by: Jakub Kicinski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/nl_policy.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 35c432668454..7e7428afc357 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -30,6 +30,11 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, -- GitLab From 6fbf53205cd58683d8bd89e4df8a8bf2edb89f63 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:15 -0800 Subject: [PATCH 0459/1278] nl802154: add missing attribute validation for dev_type [ Upstream commit b60673c4c418bef7550d02faf53c34fbfeb366bf ] Add missing attribute type validation for IEEE802154_ATTR_DEV_TYPE to the netlink policy. Fixes: 90c049b2c6ae ("ieee802154: interface type to be added") Signed-off-by: Jakub Kicinski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/nl_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 7e7428afc357..040983fc15da 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -36,6 +36,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, -- GitLab From 13e91bc63dcac99b9d96a8459e309c27009c1eb3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:16 -0800 Subject: [PATCH 0460/1278] can: add missing attribute validation for termination [ Upstream commit ab02ad660586b94f5d08912a3952b939cf4c4430 ] Add missing attribute validation for IFLA_CAN_TERMINATION to the netlink policy. Fixes: 12a6075cabc0 ("can: dev: add CAN interface termination API") Signed-off-by: Jakub Kicinski Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index d92113db4fb9..05ad5ed145a3 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -867,6 +867,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], -- GitLab From f0247be8076aa43a1b795530ceded43e7773a4a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:17 -0800 Subject: [PATCH 0461/1278] macsec: add missing attribute validation for port [ Upstream commit 31d9a1c524964bac77b7f9d0a1ac140dc6b57461 ] Add missing attribute validation for IFLA_MACSEC_PORT to the netlink policy. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ed2cb3ac578a..c2c3ce5653db 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2980,6 +2980,7 @@ static const struct device_type macsec_type = { static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, + [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, -- GitLab From 2165d304e82cd5a5c36d4c7a7c8579b4d907b8a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:19 -0800 Subject: [PATCH 0462/1278] net: fq: add missing attribute validation for orphan mask [ Upstream commit 7e6dc03eeb023e18427a373522f1d247b916a641 ] Add missing attribute validation for TCA_FQ_ORPHAN_MASK to the netlink policy. Fixes: 06eb395fa985 ("pkt_sched: fq: better control of DDOS traffic") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_fq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 7a944f508cae..66f1d40b910a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -695,6 +695,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; -- GitLab From 994674f0ef734197359ac8e5876bb91103f526ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:21 -0800 Subject: [PATCH 0463/1278] team: add missing attribute validation for port ifindex [ Upstream commit dd25cb272ccce4db67dc8509278229099e4f5e99 ] Add missing attribute validation for TEAM_ATTR_OPTION_PORT_IFINDEX to the netlink policy. Fixes: 80f7c6683fe0 ("team: add support for per-port options") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f1aabf8a16c2..01c51a1526ef 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2207,6 +2207,7 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) -- GitLab From 41d830acc66107415ea37d92b325e21be7ea801c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:22 -0800 Subject: [PATCH 0464/1278] team: add missing attribute validation for array index [ Upstream commit 669fcd7795900cd1880237cbbb57a7db66cb9ac8 ] Add missing attribute validation for TEAM_ATTR_OPTION_ARRAY_INDEX to the netlink policy. Fixes: b13033262d24 ("team: introduce array options") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 01c51a1526ef..3dba58fa3433 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2208,6 +2208,7 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) -- GitLab From 69f30950da6d4e954bc865a8c2632392d3dc3ed5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:24 -0800 Subject: [PATCH 0465/1278] nfc: add missing attribute validation for SE API [ Upstream commit 361d23e41ca6e504033f7e66a03b95788377caae ] Add missing attribute validation for NFC_ATTR_SE_INDEX to the netlink policy. Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 7b8d4d235a3a..587aea4ae8c0 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -55,6 +55,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, + [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, -- GitLab From 402f86abf2c3dae913b166c217147f2e4c05eca6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:26 -0800 Subject: [PATCH 0466/1278] nfc: add missing attribute validation for vendor subcommand [ Upstream commit 6ba3da446551f2150fadbf8c7788edcb977683d3 ] Add missing attribute validation for vendor subcommand attributes to the netlink policy. Fixes: 9e58095f9660 ("NFC: netlink: Implement vendor command support") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 587aea4ae8c0..6199f4334fbd 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -57,6 +57,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { .len = NFC_FIRMWARE_NAME_MAXSIZE }, [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; -- GitLab From ee8cb6a025b7e4cb0b8c2dec545e0b14579a8b99 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 12 Mar 2020 22:25:20 +0100 Subject: [PATCH 0467/1278] net: phy: fix MDIO bus PM PHY resuming [ Upstream commit 611d779af7cad2b87487ff58e4931a90c20b113c ] So far we have the unfortunate situation that mdio_bus_phy_may_suspend() is called in suspend AND resume path, assuming that function result is the same. After the original change this is no longer the case, resulting in broken resume as reported by Geert. To fix this call mdio_bus_phy_may_suspend() in the suspend path only, and let the phy_device store the info whether it was suspended by MDIO bus PM. Fixes: 503ba7c69610 ("net: phy: Avoid multiple suspends") Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 6 +++++- include/linux/phy.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 31ef3e47edf6..27f1f0b5b8f6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -125,6 +125,8 @@ static int mdio_bus_phy_suspend(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) return 0; + phydev->suspended_by_mdio_bus = true; + return phy_suspend(phydev); } @@ -133,9 +135,11 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev)) + if (!phydev->suspended_by_mdio_bus) goto no_resume; + phydev->suspended_by_mdio_bus = false; + ret = phy_resume(phydev); if (ret < 0) return ret; diff --git a/include/linux/phy.h b/include/linux/phy.h index efc04c2d92c9..8b6850707e62 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -372,6 +372,7 @@ struct phy_c45_device_ids { * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes @@ -410,6 +411,7 @@ struct phy_device { bool is_pseudo_fixed_link; bool has_fixups; bool suspended; + bool suspended_by_mdio_bus; bool sysfs_links; bool loopback_enabled; -- GitLab From c21c708ca257772691b99544937d3726c17d0ef9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2020 09:32:16 -0800 Subject: [PATCH 0468/1278] bonding/alb: make sure arp header is pulled before accessing it commit b7469e83d2add567e4e0b063963db185f3167cea upstream. Similar to commit 38f88c454042 ("bonding/alb: properly access headers in bond_alb_xmit()"), we need to make sure arp header was pulled in skb->head before blindly accessing it in rlb_arp_xmit(). Remove arp_pkt() private helper, since it is more readable/obvious to have the following construct back to back : if (!pskb_network_may_pull(skb, sizeof(*arp))) return NULL; arp = (struct arp_pkt *)skb_network_header(skb); syzbot reported : BUG: KMSAN: uninit-value in bond_slave_has_mac_rx include/net/bonding.h:704 [inline] BUG: KMSAN: uninit-value in rlb_arp_xmit drivers/net/bonding/bond_alb.c:662 [inline] BUG: KMSAN: uninit-value in bond_alb_xmit+0x575/0x25e0 drivers/net/bonding/bond_alb.c:1477 CPU: 0 PID: 12743 Comm: syz-executor.4 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 bond_slave_has_mac_rx include/net/bonding.h:704 [inline] rlb_arp_xmit drivers/net/bonding/bond_alb.c:662 [inline] bond_alb_xmit+0x575/0x25e0 drivers/net/bonding/bond_alb.c:1477 __bond_start_xmit drivers/net/bonding/bond_main.c:4257 [inline] bond_start_xmit+0x85d/0x2f70 drivers/net/bonding/bond_main.c:4282 __netdev_start_xmit include/linux/netdevice.h:4524 [inline] netdev_start_xmit include/linux/netdevice.h:4538 [inline] xmit_one net/core/dev.c:3470 [inline] dev_hard_start_xmit+0x531/0xab0 net/core/dev.c:3486 __dev_queue_xmit+0x37de/0x4220 net/core/dev.c:4063 dev_queue_xmit+0x4b/0x60 net/core/dev.c:4096 packet_snd net/packet/af_packet.c:2967 [inline] packet_sendmsg+0x8347/0x93b0 net/packet/af_packet.c:2992 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] __sys_sendto+0xc1b/0xc50 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:2006 __x64_sys_sendto+0x6e/0x90 net/socket.c:2006 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c479 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc77ffbbc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007fc77ffbc6d4 RCX: 000000000045c479 RDX: 000000000000000e RSI: 00000000200004c0 RDI: 0000000000000003 RBP: 000000000076bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000a04 R14: 00000000004cc7b0 R15: 000000000076bf2c Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2793 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4401 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1051 [inline] alloc_skb_with_frags+0x18c/0xa70 net/core/skbuff.c:5766 sock_alloc_send_pskb+0xada/0xc60 net/core/sock.c:2242 packet_alloc_skb net/packet/af_packet.c:2815 [inline] packet_snd net/packet/af_packet.c:2910 [inline] packet_sendmsg+0x66a0/0x93b0 net/packet/af_packet.c:2992 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] __sys_sendto+0xc1b/0xc50 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:2006 __x64_sys_sendto+0x6e/0x90 net/socket.c:2006 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_alb.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 755d588bbcb1..0b79ddec15b7 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -71,11 +71,6 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match); @@ -574,10 +569,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) spin_unlock(&bond->mode_lock); } -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; @@ -674,8 +670,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). @@ -685,7 +685,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); @@ -696,7 +696,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - rlb_choose_channel(skb, bond); + rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. -- GitLab From e3849fc5a293d721b1ac385c89aceba62760794c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2020 15:51:43 -0800 Subject: [PATCH 0469/1278] slip: make slhc_compress() more robust against malicious packets [ Upstream commit 110a40dfb708fe940a3f3704d470e431c368d256 ] Before accessing various fields in IPV4 network header and TCP header, make sure the packet : - Has IP version 4 (ip->version == 4) - Has not a silly network length (ip->ihl >= 5) - Is big enough to hold network and transport headers - Has not a silly TCP header size (th->doff >= sizeof(struct tcphdr) / 4) syzbot reported : BUG: KMSAN: uninit-value in slhc_compress+0x5b9/0x2e60 drivers/net/slip/slhc.c:270 CPU: 0 PID: 11728 Comm: syz-executor231 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 slhc_compress+0x5b9/0x2e60 drivers/net/slip/slhc.c:270 ppp_send_frame drivers/net/ppp/ppp_generic.c:1637 [inline] __ppp_xmit_process+0x1902/0x2970 drivers/net/ppp/ppp_generic.c:1495 ppp_xmit_process+0x147/0x2f0 drivers/net/ppp/ppp_generic.c:1516 ppp_write+0x6bb/0x790 drivers/net/ppp/ppp_generic.c:512 do_loop_readv_writev fs/read_write.c:717 [inline] do_iter_write+0x812/0xdc0 fs/read_write.c:1000 compat_writev+0x2df/0x5a0 fs/read_write.c:1351 do_compat_pwritev64 fs/read_write.c:1400 [inline] __do_compat_sys_pwritev fs/read_write.c:1420 [inline] __se_compat_sys_pwritev fs/read_write.c:1414 [inline] __ia32_compat_sys_pwritev+0x349/0x3f0 fs/read_write.c:1414 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f7cd99 Code: 90 e8 0b 00 00 00 f3 90 0f ae e8 eb f9 8d 74 26 00 89 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 002b:00000000ffdb84ac EFLAGS: 00000217 ORIG_RAX: 000000000000014e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000200001c0 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000040047459 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2793 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4401 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1051 [inline] ppp_write+0x115/0x790 drivers/net/ppp/ppp_generic.c:500 do_loop_readv_writev fs/read_write.c:717 [inline] do_iter_write+0x812/0xdc0 fs/read_write.c:1000 compat_writev+0x2df/0x5a0 fs/read_write.c:1351 do_compat_pwritev64 fs/read_write.c:1400 [inline] __do_compat_sys_pwritev fs/read_write.c:1420 [inline] __se_compat_sys_pwritev fs/read_write.c:1414 [inline] __ia32_compat_sys_pwritev+0x349/0x3f0 fs/read_write.c:1414 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 Fixes: b5451d783ade ("slip: Move the SLIP drivers") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/slip/slhc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index ea90db3c7705..01334aeac577 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, register struct cstate *cs = lcs->next; register unsigned long deltaS, deltaA; register short changes = 0; - int hlen; + int nlen, hlen; unsigned char new_seq[16]; register unsigned char *cp = new_seq; struct iphdr *ip; @@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, return isize; ip = (struct iphdr *) icp; + if (ip->version != 4 || ip->ihl < 5) + return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { @@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, comp->sls_o_tcp++; return isize; } - /* Extract TCP header */ + nlen = ip->ihl * 4; + if (isize < nlen + sizeof(*th)) + return isize; - th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4); - hlen = ip->ihl*4 + th->doff*4; + th = (struct tcphdr *)(icp + nlen); + if (th->doff < sizeof(struct tcphdr) / 4) + return isize; + hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if -- GitLab From daf9c84758b8d2a8e3423fc98d6815f052e34a69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Mar 2020 20:36:16 -0700 Subject: [PATCH 0470/1278] net: fec: validate the new settings in fec_enet_set_coalesce() [ Upstream commit ab14961d10d02d20767612c78ce148f6eb85bd58 ] fec_enet_set_coalesce() validates the previously set params and if they are within range proceeds to apply the new ones. The new ones, however, are not validated. This seems backwards, probably a copy-paste error? Compile tested only. Fixes: d851b47b22fc ("net: fec: add interrupt coalescence feature support") Signed-off-by: Jakub Kicinski Acked-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 62bc19bedb06..8ba915cc4c2e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2478,15 +2478,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs); if (cycle > 0xFFFF) { pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs); if (cycle > 0xFFFF) { - pr_err("Rx coalesced usec exceed hardware limitation\n"); + pr_err("Tx coalesced usec exceed hardware limitation\n"); return -EINVAL; } -- GitLab From d0a5359e5e2462ff529d1ae3dbbf177a6873824c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 9 Mar 2020 15:57:07 -0700 Subject: [PATCH 0471/1278] macvlan: add cond_resched() during multicast processing [ Upstream commit ce9a4186f9ac475c415ffd20348176a4ea366670 ] The Rx bound multicast packets are deferred to a workqueue and macvlan can also suffer from the same attack that was discovered by Syzbot for IPvlan. This solution is not as effective as in IPvlan. IPvlan defers all (Tx and Rx) multicast packet processing to a workqueue while macvlan does this way only for the Rx. This fix should address the Rx codition to certain extent. Tx is still suseptible. Tx multicast processing happens when .ndo_start_xmit is called, hence we cannot add cond_resched(). However, it's not that severe since the user which is generating / flooding will be affected the most. Fixes: 412ca1550cbe ("macvlan: Move broadcasts into a work queue") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ab539136d5bf..6989e84670e5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -338,6 +338,8 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); kfree_skb(skb); + + cond_resched(); } } -- GitLab From e6b382e861bcf841154e391f65c0e37e0441771c Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 5 Mar 2020 15:33:12 +0300 Subject: [PATCH 0472/1278] inet_diag: return classid for all socket types [ Upstream commit 83f73c5bb7b9a9135173f0ba2b1aa00c06664ff9 ] In commit 1ec17dbd90f8 ("inet_diag: fix reporting cgroup classid and fallback to priority") croup classid reporting was fixed. But this works only for TCP sockets because for other socket types icsk parameter can be NULL and classid code path is skipped. This change moves classid handling to inet_diag_msg_attrs_fill() function. Also inet_diag_msg_attrs_size() helper was added and addends in nlmsg_new() were reordered to save order from inet_sk_diag_fill(). Fixes: 1ec17dbd90f8 ("inet_diag: fix reporting cgroup classid and fallback to priority") Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/inet_diag.h | 18 ++++++++++------ net/ipv4/inet_diag.c | 44 ++++++++++++++++++--------------------- net/ipv4/raw_diag.c | 5 +++-- net/ipv4/udp_diag.c | 5 +++-- net/sctp/sctp_diag.c | 8 ++----- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..c91cf2dee12a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include #include -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index eb158badebc4..7ba013d6c00a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,13 +105,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -152,6 +148,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -289,24 +303,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; - - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) - goto errout; - } - out: nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 6367ecdf76c4..1d84b02ec765 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -99,8 +99,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d9ad986c7b2c..cc3f6da306c6 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -67,8 +67,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 75274a60b77a..6a5a3dfa6c8d 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -221,15 +221,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } -- GitLab From dd18c7005ec437c6d7de538a6571abde9b66c92d Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Sat, 7 Mar 2020 13:31:57 +0100 Subject: [PATCH 0473/1278] ipvlan: do not add hardware address of master to its unicast filter list [ Upstream commit 63aae7b17344d4b08a7d05cb07044de4c0f9dcc6 ] There is a problem when ipvlan slaves are created on a master device that is a vmxnet3 device (ipvlan in VMware guests). The vmxnet3 driver does not support unicast address filtering. When an ipvlan device is brought up in ipvlan_open(), the ipvlan driver calls dev_uc_add() to add the hardware address of the vmxnet3 master device to the unicast address list of the master device, phy_dev->uc. This inevitably leads to the vmxnet3 master device being forced into promiscuous mode by __dev_set_rx_mode(). Promiscuous mode is switched on the master despite the fact that there is still only one hardware address that the master device should use for filtering in order for the ipvlan device to be able to receive packets. The comment above struct net_device describes the uc_promisc member as a "counter, that indicates, that promiscuous mode has been enabled due to the need to listen to additional unicast addresses in a device that does not implement ndo_set_rx_mode()". Moreover, the design of ipvlan guarantees that only the hardware address of a master device, phy_dev->dev_addr, will be used to transmit and receive all packets from its ipvlan slaves. Thus, the unicast address list of the master device should not be modified by ipvlan_open() and ipvlan_stop() in order to make ipvlan a workable option on masters that do not support unicast address filtering. Fixes: 2ad7bf3638411 ("ipvlan: Initial check-in of the IPVLAN driver") Reported-by: Per Sundstrom Signed-off-by: Jiri Wiesner Reviewed-by: Eric Dumazet Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 09f6795cce53..cd32d6623f6a 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -236,7 +236,6 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -248,7 +247,7 @@ static int ipvlan_open(struct net_device *dev) list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); - return dev_uc_add(phy_dev, phy_dev->dev_addr); + return 0; } static int ipvlan_stop(struct net_device *dev) @@ -260,8 +259,6 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - dev_uc_del(phy_dev, phy_dev->dev_addr); - list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); -- GitLab From 72c457e5a3fef36c2979d40fcdf32c5c5ab0d957 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 28 Feb 2018 11:43:27 +0100 Subject: [PATCH 0474/1278] ipvlan: egress mcast packets are not exceptional commit cccc200fcaf04cff4342036a72e51d6adf6c98c1 upstream. Currently, if IPv6 is enabled on top of an ipvlan device in l3 mode, the following warning message: Dropped {multi|broad}cast of type= [86dd] is emitted every time that a RS is generated and dmseg is soon filled with irrelevant messages. Replace pr_warn with pr_debug, to preserve debuggability, without scaring the sysadmin. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1d97d6958e4b..cc0bd2ce4cc3 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -451,8 +451,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb) /* In this mode we dont care about multicast and broadcast traffic */ if (is_multicast_ether_addr(ethh->h_dest)) { - pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", - ntohs(skb->protocol)); + pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); kfree_skb(skb); goto out; } -- GitLab From c07b71b6f377c65942c35daf7005e8be548b756c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 9 Mar 2020 15:56:56 -0700 Subject: [PATCH 0475/1278] ipvlan: don't deref eth hdr before checking it's set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ad8192767c9f9cf97da57b9ffcea70fb100febef ] IPvlan in L3 mode discards outbound multicast packets but performs the check before ensuring the ether-header is set or not. This is an error that Eric found through code browsing. Fixes: 2ad7bf363841 (“ipvlan: Initial check-in of the IPVLAN driver.”) Signed-off-by: Mahesh Bandewar Reported-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index cc0bd2ce4cc3..baf8aab59f82 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -449,19 +449,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb) struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; - /* In this mode we dont care about multicast and broadcast traffic */ - if (is_multicast_ether_addr(ethh->h_dest)) { - pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", - ntohs(skb->protocol)); - kfree_skb(skb); - goto out; - } - /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { + /* In this mode we dont care about + * multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited( + "Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); -- GitLab From b58120a61b256e3c24b957fe36617bdc738efc9c Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 30 Jan 2020 13:34:59 +0300 Subject: [PATCH 0476/1278] cgroup: cgroup_procs_next should increase position index commit 2d4ecb030dcc90fb725ecbfc82ce5d6c37906e0e upstream. If seq_file .next fuction does not change position index, read after some lseek can generate unexpected output: 1) dd bs=1 skip output of each 2nd elements $ dd if=/sys/fs/cgroup/cgroup.procs bs=8 count=1 2 3 4 5 1+0 records in 1+0 records out 8 bytes copied, 0,000267297 s, 29,9 kB/s [test@localhost ~]$ dd if=/sys/fs/cgroup/cgroup.procs bs=1 count=8 2 4 <<< NB! 3 was skipped 6 <<< ... and 5 too 8 <<< ... and 7 8+0 records in 8+0 records out 8 bytes copied, 5,2123e-05 s, 153 kB/s This happen because __cgroup_procs_start() makes an extra extra cgroup_procs_next() call 2) read after lseek beyond end of file generates whole last line. 3) read after lseek into middle of last line generates expected rest of last line and unexpected whole line once again. Additionally patch removes an extra position index changes in __cgroup_procs_start() Cc: stable@vger.kernel.org https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1e727fbaa0e4..4dba8069f036 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4249,6 +4249,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4264,7 +4267,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4272,10 +4275,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } -- GitLab From 713f26696c8c8f7121cabe1a5a44353ffccda06e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 24 Jan 2020 12:40:15 +0100 Subject: [PATCH 0477/1278] cgroup: Iterate tasks that did not finish do_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9c974c77246460fa6a92c18554c3311c8c83c160 upstream. PF_EXITING is set earlier than actual removal from css_set when a task is exitting. This can confuse cgroup.procs readers who see no PF_EXITING tasks, however, rmdir is checking against css_set membership so it can transitionally fail with EBUSY. Fix this by listing tasks that weren't unlinked from css_set active lists. It may happen that other users of the task iterator (without CSS_TASK_ITER_PROCS) spot a PF_EXITING task before cgroup_exit(). This is equal to the state before commit c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") but it may be reviewed later. Reported-by: Suren Baghdasaryan Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup.c | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0e21619f1c03..61ab21c34866 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -61,6 +61,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4dba8069f036..2b3f2ea6a8a3 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4051,12 +4051,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4114,10 +4118,14 @@ static void css_task_iter_advance(struct css_task_iter *it) else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4136,11 +4144,12 @@ static void css_task_iter_advance(struct css_task_iter *it) goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } -- GitLab From 6e9c7d95ee119911feef7ef8426d177cf6949f53 Mon Sep 17 00:00:00 2001 From: Dan Moulding Date: Tue, 28 Jan 2020 02:31:07 -0700 Subject: [PATCH 0478/1278] iwlwifi: mvm: Do not require PHY_SKU NVM section for 3168 devices commit a9149d243f259ad8f02b1e23dfe8ba06128f15e1 upstream. The logic for checking required NVM sections was recently fixed in commit b3f20e098293 ("iwlwifi: mvm: fix NVM check for 3168 devices"). However, with that fixed the else is now taken for 3168 devices and within the else clause there is a mandatory check for the PHY_SKU section. This causes the parsing to fail for 3168 devices. The PHY_SKU section is really only mandatory for the IWL_NVM_EXT layout (the phy_sku parameter of iwl_parse_nvm_data is only used when the NVM type is IWL_NVM_EXT). So this changes the PHY_SKU section check so that it's only mandatory for IWL_NVM_EXT. Fixes: b3f20e098293 ("iwlwifi: mvm: fix NVM check for 3168 devices") Signed-off-by: Dan Moulding Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 8f3032b7174d..b2e393c4fab5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -326,7 +326,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) } /* PHY_SKU section is mandatory in B0 */ - if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { + if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT && + !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { IWL_ERR(mvm, "Can't parse phy_sku in B0, empty sections\n"); return NULL; -- GitLab From 81b00ac7528c3b146b737eb6d4d8cbe107f1d8e5 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 13 Feb 2020 13:37:27 +0100 Subject: [PATCH 0479/1278] virtio-blk: fix hw_queue stopped on arbitrary error commit f5f6b95c72f7f8bb46eace8c5306c752d0133daa upstream. Since nobody else is going to restart our hw_queue for us, the blk_mq_start_stopped_hw_queues() is in virtblk_done() is not sufficient necessarily sufficient to ensure that the queue will get started again. In case of global resource outage (-ENOMEM because mapping failure, because of swiotlb full) our virtqueue may be empty and we can get stuck with a stopped hw_queue. Let us not stop the queue on arbitrary errors, but only on -EONSPC which indicates a full virtqueue, where the hw_queue is guaranteed to get started by virtblk_done() before when it makes sense to carry on submitting requests. Let us also remove a stale comment. Signed-off-by: Halil Pasic Cc: Jens Axboe Fixes: f7728002c1c7 ("virtio_ring: fix return code on DMA mapping fails") Link: https://lore.kernel.org/r/20200213123728.61216-2-pasic@linux.ibm.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Greg Kroah-Hartman --- drivers/block/virtio_blk.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 8767401f75e0..19d226ff15ef 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -271,10 +271,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); - blk_mq_stop_hw_queue(hctx); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); - /* Out of mem doesn't actually happen, since we fall back - * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) return BLK_STS_RESOURCE; return BLK_STS_IOERR; -- GitLab From 209ac82ca0d70738764870af33bbbcb35dc4d2d0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 19:25:10 +0100 Subject: [PATCH 0480/1278] iommu/vt-d: quirk_ioat_snb_local_iommu: replace WARN_TAINT with pr_warn + add_taint commit 81ee85d0462410de8eeeec1b9761941fd6ed8c7b upstream. Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Fixes: 556ab45f9a77 ("ioat2: catch and recover from broken vtd configurations v6") Signed-off-by: Hans de Goede Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200309182510.373875-1-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=701847 Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b48666849dbe..b8aa5e60e4c3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3984,10 +3984,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) /* we know that the this iommu should be at offset 0xa000 from vtbar */ drhd = dmar_find_matched_drhd_unit(pdev); - if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, - TAINT_FIRMWARE_WORKAROUND, - "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); -- GitLab From 48c336253b74bba9bd9d8d1c0d27dc7ead2de9af Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Fri, 24 Jan 2020 20:14:45 -0500 Subject: [PATCH 0481/1278] workqueue: don't use wq_select_unbound_cpu() for bound works commit aa202f1f56960c60e7befaa0f49c72b8fa11b0a8 upstream. wq_select_unbound_cpu() is designed for unbound workqueues only, but it's wrongly called when using a bound workqueue too. Fixing this ensures work queued to a bound workqueue with cpu=WORK_CPU_UNBOUND always runs on the local CPU. Before, that would happen only if wq_unbound_cpumask happened to include it (likely almost always the case), or was empty, or we got lucky with forced round-robin placement. So restricting /sys/devices/virtual/workqueue/cpumask to a small subset of a machine's CPUs would cause some bound work items to run unexpectedly there. Fixes: ef557180447f ("workqueue: schedule WORK_CPU_UNBOUND work on wq_unbound_cpumask CPUs") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Hillf Danton [dj: massage changelog] Signed-off-by: Daniel Jordan Cc: Tejun Heo Cc: Lai Jiangshan Cc: linux-kernel@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a37f5dc7cb39..18fae55713b0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1386,14 +1386,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(!is_chained_work(wq))) return; retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be -- GitLab From 9797798d7d384bc9ca58b91c577be1aa42eab806 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Nov 2019 14:45:27 +0000 Subject: [PATCH 0482/1278] drm/amd/display: remove duplicated assignment to grph_obj_type commit d785476c608c621b345dd9396e8b21e90375cb0e upstream. Variable grph_obj_type is being assigned twice, one of these is redundant so remove it. Addresses-Coverity: ("Evaluation order violation") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 4779740421a8..2153f19e59cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -363,8 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_type= - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; -- GitLab From 9cc22f086221a6eb6f2f6dd76cf16eab9d599580 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 9 Mar 2020 16:00:11 -0400 Subject: [PATCH 0483/1278] ktest: Add timeout for ssh sync testing commit 4d00fc477a2ce8b6d2b09fb34ef9fe9918e7d434 upstream. Before rebooting the box, a "ssh sync" is called to the test machine to see if it is alive or not. But if the test machine is in a partial state, that ssh may never actually finish, and the ktest test hangs. Add a 10 second timeout to the sync test, which will fail after 10 seconds and then cause the test to reboot the test machine. Cc: stable@vger.kernel.org Fixes: 6474ace999edd ("ktest.pl: Powercycle the box on reboot if no connection can be made") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0c8b61f8398e..3bdd6a463819 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1345,7 +1345,7 @@ sub reboot { } else { # Make sure everything has been written to disk - run_ssh("sync"); + run_ssh("sync", 10); if (defined($time)) { start_monitor; -- GitLab From b4ef7d85fe8677effad26e1c869ab47414314a4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Mar 2020 18:25:20 -0400 Subject: [PATCH 0484/1278] cifs_atomic_open(): fix double-put on late allocation failure commit d9a9f4849fe0c9d560851ab22a85a666cddfdd24 upstream. several iterations of ->atomic_open() calling conventions ago, we used to need fput() if ->atomic_open() failed at some point after successful finish_open(). Now (since 2016) it's not needed - struct file carries enough state to make fput() work regardless of the point in struct file lifecycle and discarding it on failure exits in open() got unified. Unfortunately, I'd missed the fact that we had an instance of ->atomic_open() (cifs one) that used to need that fput(), as well as the stale comment in finish_open() demanding such late failure handling. Trivially fixed... Fixes: fe9ec8291fca "do_last(): take fput() on error after opening to out:" Cc: stable@kernel.org # v4.7+ Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/porting | 7 +++++++ fs/cifs/dir.c | 1 - fs/open.c | 3 --- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 93e0a2404532..c757c1c3cb81 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -606,3 +606,10 @@ in your dentry operations instead. dentry separately, and it now has request_mask and query_flags arguments to specify the fields and sync type requested by statx. Filesystems not supporting any statx-specific features may ignore the new arguments. +-- +[mandatory] + + [should've been added in 2016] stale comment in finish_open() + nonwithstanding, failure exits in ->atomic_open() instances should + *NOT* fput() the file, no matter what. Everything is handled by the + caller. diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f00a7ce3eb6e..03293e543c07 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -562,7 +562,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/open.c b/fs/open.c index 29a2cdcbcb17..49fd070be0ec 100644 --- a/fs/open.c +++ b/fs/open.c @@ -824,9 +824,6 @@ static int do_dentry_open(struct file *f, * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, -- GitLab From 365851b725f87527023a2448213e2f3c76002cd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Mar 2020 09:31:41 -0400 Subject: [PATCH 0485/1278] gfs2_atomic_open(): fix O_EXCL|O_CREAT handling on cold dcache commit 21039132650281de06a169cbe8a0f7e5c578fd8b upstream. with the way fs/namei.c:do_last() had been done, ->atomic_open() instances needed to recognize the case when existing file got found with O_EXCL|O_CREAT, either by falling back to finish_no_open() or failing themselves. gfs2 one didn't. Fixes: 6d4ade986f9c (GFS2: Add atomic_open support) Cc: stable@kernel.org # v3.11 Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c850579ae5a4..6c6401084d3d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1255,7 +1255,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(*opened & FILE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); -- GitLab From 02cba24f984cc796d6d62839b877ae553fff0d5c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 3 Mar 2020 15:33:15 +0100 Subject: [PATCH 0486/1278] KVM: x86: clear stale x86_emulate_ctxt->intercept value commit 342993f96ab24d5864ab1216f46c0b199c2baf8e upstream. After commit 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Hyper-V guests on KVM stopped booting with: kvm_nested_vmexit: rip fffff802987d6169 reason EPT_VIOLATION info1 181 info2 0 int_info 0 int_info_err 0 kvm_page_fault: address febd0000 error_code 181 kvm_emulate_insn: 0:fffff802987d6169: f3 a5 kvm_emulate_insn: 0:fffff802987d6169: f3 a5 FAIL kvm_inj_exception: #UD (0x0) "f3 a5" is a "rep movsw" instruction, which should not be intercepted at all. Commit c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") reduced the number of fields cleared by init_decode_cache() claiming that they are being cleared elsewhere, 'intercept', however, is left uncleared if the instruction does not have any of the "slow path" flags (NotImpl, Stack, Op3264, Sse, Mmx, CheckPerm, NearBranch, No16 and of course Intercept itself). Fixes: c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Cc: stable@vger.kernel.org Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 70f3636aff11..4cc8a4a6f1d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5062,6 +5062,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { -- GitLab From b08e88aca8ded59b810f8cb96d51541d60c3157c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 11 Mar 2020 19:26:43 +0300 Subject: [PATCH 0487/1278] ARC: define __ALIGN_STR and __ALIGN symbols for ARC commit 8d92e992a785f35d23f845206cf8c6cafbc264e0 upstream. The default defintions use fill pattern 0x90 for padding which for ARC generates unintended "ldh_s r12,[r0,0x20]" corresponding to opcode 0x9090 So use ".align 4" which insert a "nop_s" instruction instead. Cc: stable@vger.kernel.org Acked-by: Vineet Gupta Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/linkage.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index b29f1a9fd6f7..07c8e1a6c56e 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -14,6 +14,8 @@ #ifdef __ASSEMBLY__ #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm -- GitLab From 5c3d354b51be3546dacbe55bde67eccdd108d007 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 Mar 2020 09:08:54 +0100 Subject: [PATCH 0488/1278] efi: Fix a race and a buffer overflow while reading efivars via sysfs commit 286d3250c9d6437340203fb64938bea344729a0e upstream. There is a race and a buffer overflow corrupting a kernel memory while reading an EFI variable with a size more than 1024 bytes via the older sysfs method. This happens because accessing struct efi_variable in efivar_{attr,size,data}_read() and friends is not protected from a concurrent access leading to a kernel memory corruption and, at best, to a crash. The race scenario is the following: CPU0: CPU1: efivar_attr_read() var->DataSize = 1024; efivar_entry_get(... &var->DataSize) down_interruptible(&efivars_lock) efivar_attr_read() // same EFI var var->DataSize = 1024; efivar_entry_get(... &var->DataSize) down_interruptible(&efivars_lock) virt_efi_get_variable() // returns EFI_BUFFER_TOO_SMALL but // var->DataSize is set to a real // var size more than 1024 bytes up(&efivars_lock) virt_efi_get_variable() // called with var->DataSize set // to a real var size, returns // successfully and overwrites // a 1024-bytes kernel buffer up(&efivars_lock) This can be reproduced by concurrent reading of an EFI variable which size is more than 1024 bytes: ts# for cpu in $(seq 0 $(nproc --ignore=1)); do ( taskset -c $cpu \ cat /sys/firmware/efi/vars/KEKDefault*/size & ) ; done Fix this by using a local variable for a var's data buffer size so it does not get overwritten. Fixes: e14ab23dde12b80d ("efivars: efivar_entry API") Reported-by: Bob Sanders and the LTP testsuite Signed-off-by: Vladis Dronov Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Link: https://lore.kernel.org/r/20200305084041.24053-2-vdronov@redhat.com Link: https://lore.kernel.org/r/20200308080859.21568-24-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efivars.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 3e626fd9bd4e..c8688490f148 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -139,13 +139,16 @@ static ssize_t efivar_attr_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; if (var->Attributes & EFI_VARIABLE_NON_VOLATILE) @@ -172,13 +175,16 @@ static ssize_t efivar_size_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; str += sprintf(str, "0x%lx\n", var->DataSize); @@ -189,12 +195,15 @@ static ssize_t efivar_data_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; memcpy(buf, var->Data, var->DataSize); @@ -314,14 +323,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; struct compat_efi_variable *compat; + unsigned long datasize = sizeof(var->Data); size_t size; + int ret; if (!entry || !buf) return 0; - var->DataSize = 1024; - if (efivar_entry_get(entry, &entry->var.Attributes, - &entry->var.DataSize, entry->var.Data)) + ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data); + var->DataSize = datasize; + if (ret) return -EIO; if (is_compat()) { -- GitLab From ef0d4fec18b8cb0901058b46da75e3fcf7265f55 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 25 Feb 2020 17:17:37 -0800 Subject: [PATCH 0489/1278] x86/mce: Fix logic and comments around MSR_PPIN_CTL commit 59b5809655bdafb0767d3fd00a3e41711aab07e6 upstream. There are two implemented bits in the PPIN_CTL MSR: Bit 0: LockOut (R/WO) Set 1 to prevent further writes to MSR_PPIN_CTL. Bit 1: Enable_PPIN (R/W) If 1, enables MSR_PPIN to be accessible using RDMSR. If 0, an attempt to read MSR_PPIN will cause #GP. So there are four defined values: 0: PPIN is disabled, PPIN_CTL may be updated 1: PPIN is disabled. PPIN_CTL is locked against updates 2: PPIN is enabled. PPIN_CTL may be updated 3: PPIN is enabled. PPIN_CTL is locked against updates Code would only enable the X86_FEATURE_INTEL_PPIN feature for case "2". When it should have done so for both case "2" and case "3". Fix the final test to just check for the enable bit. Also fix some of the other comments in this function. Fixes: 3f5a7896a509 ("x86/mce: Include the PPIN in MCE records when available") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200226011737.9958-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d05be307d081..1d87b85150db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -489,17 +489,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } -- GitLab From f6efa6116f332a78791ae5b0b6076525184c09ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 11:11:17 +0000 Subject: [PATCH 0490/1278] iommu/dma: Fix MSI reservation allocation commit 65ac74f1de3334852fb7d9b1b430fa5a06524276 upstream. The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them. The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base granule size is 4kB. This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it). Fortunately, this only affects systems where the ITS isn't translated by the SMMU, which are both rare and non-standard. Fix it by allocating iommu_dma_msi_page structures one at a time. Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Cc: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Cc: stable@vger.kernel.org Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dma-iommu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c87764a4e212..8000b798e6e6 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -208,15 +208,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } -- GitLab From f680da6339f54ba3c522fc4ffa835513531eb5aa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 15:01:37 +0100 Subject: [PATCH 0491/1278] iommu/vt-d: dmar: replace WARN_TAINT with pr_warn + add_taint commit 59833696442c674acbbd297772ba89e7ad8c753d upstream. Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Some distros, e.g. Fedora, have tools watching for the kernel backtraces logged by the WARN macros and offer the user an option to file a bug for this when these are encountered. The WARN_TAINT in warn_invalid_dmar() + another iommu WARN_TAINT, addressed in another patch, have lead to over a 100 bugs being filed this way. This commit replaces the WARN_TAINT("...") calls, with pr_warn(FW_BUG "...") + add_taint(TAINT_FIRMWARE_WORKAROUND, ...) calls avoiding the backtrace and thus also avoiding bug-reports being filed about this against the kernel. Fixes: fd0c8894893c ("intel-iommu: Set a more specific taint flag for invalid BIOS DMAR tables") Fixes: e625b4a95d50 ("iommu/vt-d: Parse ANDD records") Signed-off-by: Hans de Goede Signed-off-by: Joerg Roedel Acked-by: Lu Baolu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200309140138.3753-2-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1564895 Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 38d0128b8135..f04a4edc5cfc 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -451,12 +451,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, /* Check for NUL termination within the designated length */ if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; ANDD object name is not NUL-terminated\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -EINVAL; } pr_info("ANDD device: %x name: %s\n", andd->device_number, @@ -482,14 +483,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) return 0; } } - WARN_TAINT( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", drhd->reg_base_addr, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return 0; } @@ -835,14 +836,14 @@ int __init dmar_table_init(void) static void warn_invalid_dmar(u64 addr, const char *message) { - WARN_TAINT_ONCE( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn_once(FW_BUG "Your BIOS is broken; DMAR reported at address %llx%s!\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", addr, message, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); } static int __ref -- GitLab From a691dd3d0563b495aa7d816119a51c508976d791 Mon Sep 17 00:00:00 2001 From: Yonghyun Hwang Date: Wed, 26 Feb 2020 12:30:06 -0800 Subject: [PATCH 0492/1278] iommu/vt-d: Fix a bug in intel_iommu_iova_to_phys() for huge page commit 77a1bce84bba01f3f143d77127b72e872b573795 upstream. intel_iommu_iova_to_phys() has a bug when it translates an IOVA for a huge page onto its corresponding physical address. This commit fixes the bug by accomodating the level of page entry for the IOVA and adds IOVA's lower address to the physical address. Cc: Acked-by: Lu Baolu Reviewed-by: Moritz Fischer Signed-off-by: Yonghyun Hwang Fixes: 3871794642579 ("VT-d: Changes to support KVM") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b8aa5e60e4c3..db1b546134f5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5124,8 +5124,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, u64 phys = 0; pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); - if (pte) - phys = dma_pte_addr(pte); + if (pte && dma_pte_present(pte)) + phys = dma_pte_addr(pte) + + (iova & (BIT_MASK(level_to_offset_bits(level) + + VTD_PAGE_SHIFT) - 1)); return phys; } -- GitLab From 217a7c2200944aec137d83838cbdb29d24d105da Mon Sep 17 00:00:00 2001 From: Nicolas Belin Date: Thu, 20 Feb 2020 14:15:12 +0100 Subject: [PATCH 0493/1278] pinctrl: meson-gxl: fix GPIOX sdio pins commit dc7a06b0dbbafac8623c2b7657e61362f2f479a7 upstream. In the gxl driver, the sdio cmd and clk pins are inverted. It has not caused any issue so far because devices using these pins always take both pins so the resulting configuration is OK. Fixes: 0f15f500ff2c ("pinctrl: meson: Add GXL pinctrl definitions") Reviewed-by: Jerome Brunet Signed-off-by: Nicolas Belin Link: https://lore.kernel.org/r/1582204512-7582-1-git-send-email-nbelin@baylibre.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/meson/pinctrl-meson-gxl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 36c14b85fc7c..8db182067ecb 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -158,8 +158,8 @@ static const unsigned int sdio_d0_pins[] = { PIN(GPIOX_0, EE_OFF) }; static const unsigned int sdio_d1_pins[] = { PIN(GPIOX_1, EE_OFF) }; static const unsigned int sdio_d2_pins[] = { PIN(GPIOX_2, EE_OFF) }; static const unsigned int sdio_d3_pins[] = { PIN(GPIOX_3, EE_OFF) }; -static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_4, EE_OFF) }; -static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_5, EE_OFF) }; +static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_4, EE_OFF) }; +static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_5, EE_OFF) }; static const unsigned int sdio_irq_pins[] = { PIN(GPIOX_7, EE_OFF) }; static const unsigned int nand_ce0_pins[] = { PIN(BOOT_8, EE_OFF) }; -- GitLab From 6f3817634e1d5b9006270fd7381ac7225551eabf Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Feb 2020 15:41:42 +0000 Subject: [PATCH 0494/1278] pinctrl: core: Remove extra kref_get which blocks hogs being freed commit aafd56fc79041bf36f97712d4b35208cbe07db90 upstream. kref_init starts with the reference count at 1, which will be balanced by the pinctrl_put in pinctrl_unregister. The additional kref_get in pinctrl_claim_hogs will increase this count to 2 and cause the hogs to not get freed when pinctrl_unregister is called. Fixes: 6118714275f0 ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200228154142.13860-1-ckeepax@opensource.cirrus.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c55517312485..08ea74177de2 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2031,7 +2031,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) return PTR_ERR(pctldev->p); } - kref_get(&pctldev->p->users); pctldev->hog_default = pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); if (IS_ERR(pctldev->hog_default)) { -- GitLab From 1cb937c39c8c6b4fc5933ca3c50f5426f8d3357c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:10:56 -0800 Subject: [PATCH 0495/1278] nl80211: add missing attribute validation for critical protocol indication commit 0e1a1d853ecedc99da9d27f9f5c376935547a0e2 upstream. Add missing attribute validation for critical protocol fields to the netlink policy. Fixes: 5de17984898c ("cfg80211: introduce critical protocol indication from user-space") Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20200303051058.4089398-2-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b248578aeb7b..8165e0c147a2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -395,6 +395,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, + [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, -- GitLab From 214022646ecf8750cfbc97366832d9aafae1b555 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:10:57 -0800 Subject: [PATCH 0496/1278] nl80211: add missing attribute validation for beacon report scanning commit 056e9375e1f3c4bf2fd49b70258c7daf788ecd9d upstream. Add missing attribute validation for beacon report scanning to the netlink policy. Fixes: 1d76250bd34a ("nl80211: support beacon report scanning") Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20200303051058.4089398-3-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8165e0c147a2..96e527fc131e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -347,6 +347,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 }, + [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, + [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, -- GitLab From 66be2d1ab3c6ad4c98014fbf508882b30a50a78f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:10:58 -0800 Subject: [PATCH 0497/1278] nl80211: add missing attribute validation for channel switch commit 5cde05c61cbe13cbb3fa66d52b9ae84f7975e5e6 upstream. Add missing attribute validation for NL80211_ATTR_OPER_CLASS to the netlink policy. Fixes: 1057d35ede5d ("cfg80211: introduce TDLS channel switch commands") Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20200303051058.4089398-4-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 96e527fc131e..d0b75781e6f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -424,6 +424,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, -- GitLab From 94d289a9e813b6b6cdb9c0255cb686d1b09a2284 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:08:31 -0800 Subject: [PATCH 0498/1278] netfilter: cthelper: add missing attribute validation for cthelper commit c049b3450072b8e3998053490e025839fecfef31 upstream. Add missing attribute validation for cthelper to the netlink policy. Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Jakub Kicinski Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d33ce6d5ebce..dd1030f5dd5e 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -733,6 +733,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, + [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, + [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { -- GitLab From a9755e81b69b83ddba3206cf51207fabdca4f538 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:08:32 -0800 Subject: [PATCH 0499/1278] netfilter: nft_payload: add missing attribute validation for payload csum flags commit 9d6effb2f1523eb84516e44213c00f2fd9e6afff upstream. Add missing attribute validation for NFTA_PAYLOAD_CSUM_FLAGS to the netlink policy. Fixes: 1814096980bb ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields") Signed-off-by: Jakub Kicinski Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_payload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index e110b0ebbf58..19446a89a2a8 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -121,6 +121,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, -- GitLab From 3628a53e6cde8a644cd12c1bb00eee7eb392d60f Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Mar 2020 14:09:54 +0800 Subject: [PATCH 0500/1278] iommu/vt-d: Fix the wrong printing in RHSA parsing commit b0bb0c22c4db623f2e7b1a471596fbf1c22c6dc5 upstream. When base address in RHSA structure doesn't match base address in each DRHD structure, the base address in last DRHD is printed out. This doesn't make sense when there are multiple DRHD units, fix it by printing the buggy RHSA's base address. Signed-off-by: Lu Baolu Signed-off-by: Zhenzhong Duan Fixes: fd0c8894893cb ("intel-iommu: Set a more specific taint flag for invalid BIOS DMAR tables") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index f04a4edc5cfc..a7cf733bcd33 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -486,7 +486,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, + rhsa->base_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); -- GitLab From 24de9d8fdd877d132dc552d63121f0d1a1cf3f4d Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 12 Mar 2020 14:09:55 +0800 Subject: [PATCH 0501/1278] iommu/vt-d: Ignore devices with out-of-spec domain number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit da72a379b2ec0bad3eb265787f7008bead0b040c upstream. VMD subdevices are created with a PCI domain ID of 0x10000 or higher. These subdevices are also handled like all other PCI devices by dmar_pci_bus_notifier(). However, when dmar_alloc_pci_notify_info() take records of such devices, it will truncate the domain ID to a u16 value (in info->seg). The device at (e.g.) 10000:00:02.0 is then treated by the DMAR code as if it is 0000:00:02.0. In the unlucky event that a real device also exists at 0000:00:02.0 and also has a device-specific entry in the DMAR table, dmar_insert_dev_scope() will crash on:   BUG_ON(i >= devices_cnt); That's basically a sanity check that only one PCI device matches a single DMAR entry; in this case we seem to have two matching devices. Fix this by ignoring devices that have a domain number higher than what can be looked up in the DMAR table. This problem was carefully diagnosed by Jian-Hong Pan. Signed-off-by: Lu Baolu Signed-off-by: Daniel Drake Fixes: 59ce0515cdaf3 ("iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index a7cf733bcd33..1f527ca60955 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) BUG_ON(dev->is_virtfn); + /* + * Ignore devices that have a domain number higher than what can + * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 + */ + if (pci_domain_nr(dev->bus) > U16_MAX) + return NULL; + /* Only generate path[] for device addition event */ if (event == BUS_NOTIFY_ADD_DEVICE) for (tmp = dev; tmp; tmp = tmp->bus->self) -- GitLab From 5916adba73830d6b8f7f4305e4151ab4952b7ede Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 12 Mar 2020 14:32:44 +0100 Subject: [PATCH 0502/1278] i2c: acpi: put device when verifying client fails commit 8daee952b4389729358665fb91949460641659d4 upstream. i2c_verify_client() can fail, so we need to put the device when that happens. Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications") Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-acpi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index df9800aaeac7..0d4d5dcf94f3 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -352,10 +352,18 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; + struct i2c_client *client; dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_find_match_device); - return dev ? i2c_verify_client(dev) : NULL; + if (!dev) + return NULL; + + client = i2c_verify_client(dev); + if (!client) + put_device(dev); + + return client; } static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, -- GitLab From e929f447bea484a36515bdaf1a73dfaa8afac605 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Feb 2020 11:52:29 -0800 Subject: [PATCH 0503/1278] ipv6: restrict IPV6_ADDRFORM operation commit b6f6118901d1e867ac9177bbff3b00b185bd4fdc upstream. IPV6_ADDRFORM is able to transform IPv6 socket to IPv4 one. While this operation sounds illogical, we have to support it. One of the things it does for TCP socket is to switch sk->sk_prot to tcp_prot. We now have other layers playing with sk->sk_prot, so we should make sure to not interfere with them. This patch makes sure sk_prot is the default pointer for TCP IPv6 socket. syzbot reported : BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD a0113067 P4D a0113067 PUD a8771067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 0 PID: 10686 Comm: syz-executor.0 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc9000281fce0 EFLAGS: 00010246 RAX: 1ffffffff15f48ac RBX: ffffffff8afa4560 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a69a8f40 RBP: ffffc9000281fd10 R08: ffffffff86ed9b0c R09: ffffed1014d351f5 R10: ffffed1014d351f5 R11: 0000000000000000 R12: ffff8880920d3098 R13: 1ffff1101241a613 R14: ffff8880a69a8f40 R15: 0000000000000000 FS: 00007f2ae75db700(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a3b85000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_release+0x165/0x1c0 net/ipv4/af_inet.c:427 __sock_release net/socket.c:605 [inline] sock_close+0xe1/0x260 net/socket.c:1283 __fput+0x2e4/0x740 fs/file_table.c:280 ____fput+0x15/0x20 fs/file_table.c:313 task_work_run+0x176/0x1b0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop arch/x86/entry/common.c:164 [inline] prepare_exit_to_usermode+0x480/0x5b0 arch/x86/entry/common.c:195 syscall_return_slowpath+0x113/0x4a0 arch/x86/entry/common.c:278 do_syscall_64+0x11f/0x1c0 arch/x86/entry/common.c:304 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45c429 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2ae75dac78 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: 0000000000000000 RBX: 00007f2ae75db6d4 RCX: 000000000045c429 RDX: 0000000000000001 RSI: 000000000000011a RDI: 0000000000000004 RBP: 000000000076bf20 R08: 0000000000000038 R09: 0000000000000000 R10: 0000000020000180 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000a9d R14: 00000000004ccfb4 R15: 000000000076bf2c Modules linked in: CR2: 0000000000000000 ---[ end trace 82567b5207e87bae ]--- RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc9000281fce0 EFLAGS: 00010246 RAX: 1ffffffff15f48ac RBX: ffffffff8afa4560 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a69a8f40 RBP: ffffc9000281fd10 R08: ffffffff86ed9b0c R09: ffffed1014d351f5 R10: ffffed1014d351f5 R11: 0000000000000000 R12: ffff8880920d3098 R13: 1ffff1101241a613 R14: ffff8880a69a8f40 R15: 0000000000000000 FS: 00007f2ae75db700(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a3b85000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Eric Dumazet Reported-by: syzbot+1938db17e275e85dc328@syzkaller.appspotmail.com Cc: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ipv6_sockglue.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5c91b05c8d8f..8c492471b0da 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -185,9 +185,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } else if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } break; - + } else { + break; + } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; -- GitLab From 5a09fc5060754baf59b28cf92719a776d1c3bfbd Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 26 Feb 2020 17:52:46 +0100 Subject: [PATCH 0504/1278] net/smc: check for valid ib_client_data commit a2f2ef4a54c0d97aa6a8386f4ff23f36ebb488cf upstream. In smc_ib_remove_dev() check if the provided ib device was actually initialized for SMC before. Reported-by: syzbot+84484ccebdd4e5451d91@syzkaller.appspotmail.com Fixes: a4cf0443c414 ("smc: introduce SMC as an IB-client") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_ib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 4410d0071515..7d89b0584944 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -513,6 +513,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) struct smc_ib_device *smcibdev; smcibdev = ib_get_client_data(ibdev, &smc_ib_client); + if (!smcibdev || smcibdev->ibdev != ibdev) + return; ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ -- GitLab From 02f13e4e682390d10d7ece6260d9ee4059c8f450 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 Mar 2020 09:08:55 +0100 Subject: [PATCH 0505/1278] efi: Add a sanity check to efivar_store_raw() commit d6c066fda90d578aacdf19771a027ed484a79825 upstream. Add a sanity check to efivar_store_raw() the same way efivar_{attr,size,data}_read() and efivar_show_raw() have it. Signed-off-by: Vladis Dronov Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Link: https://lore.kernel.org/r/20200305084041.24053-3-vdronov@redhat.com Link: https://lore.kernel.org/r/20200308080859.21568-25-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efivars.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index c8688490f148..1c65f5ac4368 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -272,6 +272,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; + if (!entry || !buf) + return -EINVAL; + if (is_compat()) { struct compat_efi_variable *compat; -- GitLab From c408b35da8ef4378b858d3f9f33f23a16989676b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:18 +0100 Subject: [PATCH 0506/1278] batman-adv: Avoid spurious warnings from bat_v neigh_cmp implementation commit 6a4bc44b012cbc29c9d824be2c7ab9eac8ee6b6f upstream. The neighbor compare API implementation for B.A.T.M.A.N. V checks whether the neigh_ifinfo for this neighbor on a specific interface exists. A warning is printed when it isn't found. But it is not called inside a lock which would prevent that this information is lost right before batadv_neigh_ifinfo_get. It must therefore be expected that batadv_v_neigh_(cmp|is_sob) might not be able to get the requested neigh_ifinfo. A WARN_ON for such a situation seems not to be appropriate because this will only flood the kernel logs. The warnings must therefore be removed. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 371a1f1651b4..f81e67fbb352 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -19,7 +19,6 @@ #include "main.h" #include -#include #include #include #include @@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; @@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; -- GitLab From 1c2139faa923dcdd75f58e68959422c8475fd58f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:19 +0100 Subject: [PATCH 0507/1278] batman-adv: Always initialize fragment header priority commit fe77d8257c4d838c5976557ddb87bd789f312412 upstream. The batman-adv unuicast fragment header contains 3 bits for the priority of the packet. These bits will be initialized when the skb->priority contains a value between 256 and 263. But otherwise, the uninitialized bits from the stack will be used. Fixes: c0f25c802b33 ("batman-adv: Include frame priority in fragment header") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/fragmentation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c6d37d22bd12..788d62073964 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -500,6 +500,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ if (skb->priority >= 256 && skb->priority <= 263) frag_header.priority = skb->priority - 256; + else + frag_header.priority = 0; ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); -- GitLab From f0455763b0f4140f18d7f83d88b048552bbc57c8 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:20 +0100 Subject: [PATCH 0508/1278] batman-adv: Fix check of retrieved orig_gw in batadv_v_gw_is_eligible commit 198a62ddffa4a4ffaeb741f642b7b52f2d91ae9b upstream. The batadv_v_gw_is_eligible function already assumes that orig_node is not NULL. But batadv_gw_node_get may have failed to find the originator. It must therefore be checked whether the batadv_gw_node_get failed and not whether orig_node is NULL to detect this error. Fixes: 50164d8f500f ("batman-adv: B.A.T.M.A.N. V - implement GW selection logic") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index f81e67fbb352..eb8cec14b854 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, } orig_gw = batadv_gw_node_get(bat_priv, orig_node); - if (!orig_node) + if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) -- GitLab From 16e33df6dfa83a3f7078cdc01797f502fc9a6bf4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:21 +0100 Subject: [PATCH 0509/1278] batman-adv: Fix lock for ogm cnt access in batadv_iv_ogm_calc_tq commit 5ba7dcfe77037b67016263ea597a8b431692ecab upstream. The originator node object orig_neigh_node is used to when accessing the bcast_own(_sum) and real_packet_count information. The access to them has to be protected with the spinlock in orig_neigh_node. But the function uses the lock in orig_node instead. This is incorrect because they could be two different originator node objects. Fixes: 0ede9f41b217 ("batman-adv: protect bit operations to count OGMs with spinlock") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 8b3f9441b3a0..1dda8949734e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1220,7 +1220,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); @@ -1230,7 +1230,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, } else { neigh_rq_count = 0; } - spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) -- GitLab From 78855971878cc5f6542bef42d4ef2eb403498fcd Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:22 +0100 Subject: [PATCH 0510/1278] batman-adv: Fix internal interface indices types commit f22e08932c2960f29b5e828e745c9f3fb7c1bb86 upstream. batman-adv uses internal indices for each enabled and active interface. It is currently used by the B.A.T.M.A.N. IV algorithm to identifify the correct position in the ogm_cnt bitmaps. The type for the number of enabled interfaces (which defines the next interface index) was set to char. This type can be (depending on the architecture) either signed (limiting batman-adv to 127 active slave interfaces) or unsigned (limiting batman-adv to 255 active slave interfaces). This limit was not correctly checked when an interface was enabled and thus an overflow happened. This was only catched on systems with the signed char type when the B.A.T.M.A.N. IV code tried to resize its counter arrays with a negative size. The if_num interface index was only a s16 and therefore significantly smaller than the ifindex (int) used by the code net code. Both &batadv_hard_iface->if_num and &batadv_priv->num_ifaces must be (unsigned) int to support the same number of slave interfaces as the net core code. And the interface activation code must check the number of active slave interfaces to avoid integer overflows. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 24 ++++++++++++++---------- net/batman-adv/hard-interface.c | 9 +++++++-- net/batman-adv/originator.c | 4 ++-- net/batman-adv/originator.h | 4 ++-- net/batman-adv/types.h | 11 ++++++----- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1dda8949734e..0b2f69924444 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -149,7 +149,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, - int max_if_num) + unsigned int max_if_num) { void *data_ptr; size_t old_size; @@ -193,7 +193,8 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t chunk_size; size_t if_offset; @@ -231,7 +232,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t if_offset; void *data_ptr; @@ -268,7 +270,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -302,7 +305,8 @@ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; - int size, hash_added; + int hash_added; + size_t size; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) @@ -890,7 +894,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) u32 i; size_t word_index; u8 *w; - int if_num; + unsigned int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1020,7 +1024,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - int if_num; + unsigned int if_num; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; @@ -1179,7 +1183,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int if_num; + unsigned int if_num; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; unsigned int tq_iface_penalty; @@ -1698,9 +1702,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; - int offset; + size_t offset; s32 bit_pos; - s16 if_num; + unsigned int if_num; u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 2e1a084b0bd2..4b67731677af 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -738,6 +738,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + if (bat_priv->num_ifaces >= UINT_MAX) { + ret = -ENOSPC; + goto err_dev; + } + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface, NULL, NULL); if (ret) @@ -845,7 +850,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) { + if (bat_priv->num_ifaces == 0) { batadv_gw_check_client_stop(bat_priv); if (autodel == BATADV_IF_CLEANUP_AUTO) @@ -881,7 +886,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (ret) goto free_if; - hard_iface->if_num = -1; + hard_iface->if_num = 0; hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257..653eaadcfefb 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1500,7 +1500,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) } int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_algo_ops *bao = bat_priv->algo_ops; @@ -1535,7 +1535,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, } int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d94220a6d21a..d6ca52220ec0 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -78,9 +78,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d5e3968619b8..dbeaa015edc9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -155,7 +155,7 @@ enum batadv_hard_iface_wifi_flags { */ struct batadv_hard_iface { struct list_head list; - s16 if_num; + unsigned int if_num; char if_status; u8 num_bcasts; u32 wifi_flags; @@ -1081,7 +1081,7 @@ struct batadv_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - char num_ifaces; + unsigned int num_ifaces; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -1479,9 +1479,10 @@ struct batadv_algo_neigh_ops { */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); - int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); - int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, - int del_if_num); + int (*add_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num, unsigned int del_if_num); #ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); -- GitLab From 416cada5adabd95ddd63776e367eb6dd134dff89 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 16 Mar 2020 23:30:23 +0100 Subject: [PATCH 0511/1278] batman-adv: update data pointers after skb_cow() commit bc44b78157f621ff2a2618fe287a827bcb094ac4 upstream. batadv_check_unicast_ttvn() calls skb_cow(), so pointers into the SKB data must be (re)set after calling it. The ethhdr variable is dropped altogether. Fixes: 7cdcf6dddc42 ("batman-adv: add UNICAST_4ADDR packet type") Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/routing.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index cd82cff716c7..f59aac06733e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -950,14 +950,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -977,12 +973,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -997,6 +995,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); -- GitLab From 6620d5e5d1764d89046a24be01de9977a7236ad2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:24 +0100 Subject: [PATCH 0512/1278] batman-adv: Avoid race in TT TVLV allocator helper commit 8ba0f9bd3bdea1058c2b2676bec7905724418e40 upstream. The functions batadv_tt_prepare_tvlv_local_data and batadv_tt_prepare_tvlv_global_data are responsible for preparing a buffer which can be used to store the TVLV container for TT and add the VLAN information to it. This will be done in three phases: 1. count the number of VLANs and their entries 2. allocate the buffer using the counters from the previous step and limits from the caller (parameter tt_len) 3. insert the VLAN information to the buffer The step 1 and 3 operate on a list which contains the VLANs. The access to these lists must be protected with an appropriate lock or otherwise they might operate on on different entries. This could for example happen when another context is adding VLAN entries to this list. This could lead to a buffer overflow in these functions when enough entries were added between step 1 and 3 to the VLAN lists that the buffer room for the entries (*tt_change) is smaller then the now required extra buffer for new VLAN entries. Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2c2670b85fa9..adc686087a26 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -872,7 +872,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; - rcu_read_lock(); + spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -910,7 +910,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } @@ -946,7 +946,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, u8 *tt_change_ptr; int change_offset; - rcu_read_lock(); + spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -984,7 +984,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } -- GitLab From 98a21317d0336cb203e352e2161bcef0c6c76beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 16 Mar 2020 23:30:25 +0100 Subject: [PATCH 0513/1278] batman-adv: Fix TT sync flags for intermediate TT responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7072337e52b3e9d5460500d8dc9cbc1ba2db084c upstream. The previous TT sync fix so far only fixed TT responses issued by the target node directly. So far, TT responses issued by intermediate nodes still lead to the wrong flags being added, leading to CRC mismatches. This behaviour was observed at Freifunk Hannover in a 800 nodes setup where a considerable amount of nodes were still infected with 'WI' TT flags even with (most) nodes having the previous TT sync fix applied. I was able to reproduce the issue with intermediate TT responses in a four node test setup and this patch fixes this issue by ensuring to use the per originator instead of the summarized, OR'd ones. Fixes: e9c00136a475 ("batman-adv: fix tt_global_entries flags update") Reported-by: Leonardo Mörlein Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 61 +++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index adc686087a26..c37611bea429 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1544,6 +1544,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, * by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list + * @flags: a pointer to store TT flags for the given @entry received + * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * @@ -1551,7 +1553,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) + const struct batadv_orig_node *orig_node, + u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; @@ -1559,6 +1562,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; + + if (flags) + *flags = orig_entry->flags; + batadv_tt_orig_list_entry_put(orig_entry); } @@ -1741,7 +1748,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) + orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; @@ -2884,23 +2891,46 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_valid - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given local TT entry. If it is, then the provided + * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ -static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; + + if (flags) + *flags = tt_common_entry->flags; + return true; } +/** + * batadv_tt_global_valid() - verify global tt entry and get flags + * @entry_ptr: to be checked global tt entry + * @data_ptr: an orig_node object (may be NULL) + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given global TT entry. If it is, then the provided + * flags pointer is updated either with the common (summed) TT flags if data_ptr + * is NULL or the specific, per originator TT flags otherwise. + * + * Return: true if the entry is a valid, false otherwise. + */ static bool batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2914,7 +2944,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr, struct batadv_tt_global_entry, common); - return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); + return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, + flags); } /** @@ -2924,25 +2955,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr, * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data - * @valid_cb: function to filter tt change entries + * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, - const void *), + const void *, + u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; + u8 flags; + bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + if (!valid_cb) + return; + rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2952,11 +2992,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if (tt_tot == tt_num_entries) break; - if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) + ret = valid_cb(tt_common_entry, cb_data, &flags); + if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); - tt_change->flags = tt_common_entry->flags; + tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); -- GitLab From 90ae6475b1753f0c1a4c66034b5666de3189fac8 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Mon, 16 Mar 2020 23:30:26 +0100 Subject: [PATCH 0514/1278] batman-adv: prevent TT request storms by not sending inconsistent TT TLVLs commit 16116dac23396e73c01eeee97b102e4833a4b205 upstream. A translation table TVLV changset sent with an OGM consists of a number of headers (one per VLAN) plus the changeset itself (addition and/or deletion of entries). The per-VLAN headers are used by OGM recipients for consistency checks. Said consistency check might determine that a full translation table request is needed to restore consistency. If the TT sender adds per-VLAN headers of empty VLANs into the OGM, recipients are led to believe to have reached an inconsistent state and thus request a full table update. The full table does not contain empty VLANs (due to missing entries) the cycle restarts when the next OGM is issued. Consequently, when the translation table TVLV headers are composed, empty VLANs are to be excluded. Fixes: 21a57f6e7a3b ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c37611bea429..dbc516824175 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -941,15 +941,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; - u16 num_entries = 0; + u16 vlan_entries = 0; + u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; } change_offset = sizeof(**tt_data); @@ -957,7 +962,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; @@ -974,6 +979,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); -- GitLab From 33dfa3bf0ec31a1c64425b61bae2206b7bcebf13 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:27 +0100 Subject: [PATCH 0515/1278] batman-adv: Fix debugfs path for renamed hardif commit 36dc621ceca1be3ec885aeade5fdafbbcc452a6d upstream. batman-adv is creating special debugfs directories in the init net_namespace for each valid hard-interface (net_device). But it is possible to rename a net_device to a completely different name then the original one. It can therefore happen that a user registers a new net_device which gets the name "wlan0" assigned by default. batman-adv is also adding a new directory under $debugfs/batman-adv/ with the name "wlan0". The user then decides to rename this device to "wl_pri" and registers a different device. The kernel may now decide to use the name "wlan0" again for this new device. batman-adv will detect it as a valid net_device and tries to create a directory with the name "wlan0" under $debugfs/batman-adv/. But there already exists one with this name under this path and thus this fails. batman-adv will detect a problem and rollback the registering of this device. batman-adv must therefore take care of renaming the debugfs directories for hard-interfaces whenever it detects such a net_device rename. Fixes: 5bc7c1eb44f2 ("batman-adv: add debugfs structure for information per interface") Reported-by: John Soros Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/debugfs.c | 22 +++++++++++++++++++++- net/batman-adv/debugfs.h | 6 ++++++ net/batman-adv/hard-interface.c | 3 +++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index e32ad47c6efd..7ee828cd9778 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -18,6 +18,7 @@ #include "debugfs.h" #include "main.h" +#include #include #include #include @@ -338,7 +339,26 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) } /** - * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif + * @hard_iface: hard interface which was renamed + */ +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ + const char *name = hard_iface->net_dev->name; + struct dentry *dir; + struct dentry *d; + + dir = hard_iface->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_hardif() - delete the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which is deleted. */ diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 9c5d4a65b98c..295e11146818 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -31,6 +31,7 @@ void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); #else @@ -58,6 +59,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) return 0; } +static inline +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ +} + static inline void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4b67731677af..e72e95208339 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1017,6 +1017,9 @@ static int batadv_hard_if_event(struct notifier_block *this, if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_hardif(hard_iface); + break; default: break; } -- GitLab From da2c2e3c63c6de983accd410de8c125481ee2eb3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:28 +0100 Subject: [PATCH 0516/1278] batman-adv: Fix debugfs path for renamed softif commit 6da7be7d24b2921f8215473ba7552796dff05fe1 upstream. batman-adv is creating special debugfs directories in the init net_namespace for each created soft-interface (batadv net_device). But it is possible to rename a net_device to a completely different name then the original one. It can therefore happen that a user registers a new batadv net_device with the name "bat0". batman-adv is then also adding a new directory under $debugfs/batman-adv/ with the name "wlan0". The user then decides to rename this device to "bat1" and registers a different batadv device with the name "bat0". batman-adv will then try to create a directory with the name "bat0" under $debugfs/batman-adv/ again. But there already exists one with this name under this path and thus this fails. batman-adv will detect a problem and rollback the registering of this device. batman-adv must therefore take care of renaming the debugfs directories for soft-interfaces whenever it detects such a net_device rename. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/debugfs.c | 24 +++++++++++++++++++++++ net/batman-adv/debugfs.h | 5 +++++ net/batman-adv/hard-interface.c | 34 +++++++++++++++++++++++++++------ 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 7ee828cd9778..4957d4824437 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -421,6 +421,30 @@ int batadv_debugfs_add_meshif(struct net_device *dev) return -ENOMEM; } +/** + * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif + * @dev: net_device which was renamed + */ +void batadv_debugfs_rename_meshif(struct net_device *dev) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + const char *name = dev->name; + struct dentry *dir; + struct dentry *d; + + dir = bat_priv->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries + * @dev: netdev struct of the soft interface + */ void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 295e11146818..901bbc357bf4 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -29,6 +29,7 @@ struct net_device; void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); +void batadv_debugfs_rename_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); @@ -49,6 +50,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev) return 0; } +static inline void batadv_debugfs_rename_meshif(struct net_device *dev) +{ +} + static inline void batadv_debugfs_del_meshif(struct net_device *dev) { } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e72e95208339..c43887fa29a9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -955,6 +955,32 @@ void batadv_hardif_remove_interfaces(void) rtnl_unlock(); } +/** + * batadv_hard_if_event_softif() - Handle events for soft interfaces + * @event: NETDEV_* event to handle + * @net_dev: net_device which generated an event + * + * Return: NOTIFY_* result + */ +static int batadv_hard_if_event_softif(unsigned long event, + struct net_device *net_dev) +{ + struct batadv_priv *bat_priv; + + switch (event) { + case NETDEV_REGISTER: + batadv_sysfs_add_meshif(net_dev); + bat_priv = netdev_priv(net_dev); + batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); + break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_meshif(net_dev); + break; + } + + return NOTIFY_DONE; +} + static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -963,12 +989,8 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; - if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { - batadv_sysfs_add_meshif(net_dev); - bat_priv = netdev_priv(net_dev); - batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); - return NOTIFY_DONE; - } + if (batadv_softif_is_valid(net_dev)) + return batadv_hard_if_event_softif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || -- GitLab From 43340e23e9f621a49dfafc9b862d612f24af3b4a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:29 +0100 Subject: [PATCH 0517/1278] batman-adv: Fix duplicated OGMs on NETDEV_UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9e6b5648bbc4cd48fab62cecbb81e9cc3c6e7e88 upstream. The state of slave interfaces are handled differently depending on whether the interface is up or not. All active interfaces (IFF_UP) will transmit OGMs. But for B.A.T.M.A.N. IV, also non-active interfaces are scheduling (low TTL) OGMs on active interfaces. The code which setups and schedules the OGMs must therefore already be called when the interfaces gets added as slave interface and the transmit function must then check whether it has to send out the OGM or not on the specific slave interface. But the commit f0d97253fb5f ("batman-adv: remove ogm_emit and ogm_schedule API calls") moved the setup code from the enable function to the activate function. The latter is called either when the added slave was already up when batadv_hardif_enable_interface processed the new interface or when a NETDEV_UP event was received for this slave interfac. As result, each NETDEV_UP would schedule a new OGM worker for the interface and thus OGMs would be send a lot more than expected. Fixes: f0d97253fb5f ("batman-adv: remove ogm_emit and ogm_schedule API calls") Reported-by: Linus Lüssing Tested-by: Linus Lüssing Acked-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 ++-- net/batman-adv/hard-interface.c | 3 +++ net/batman-adv/types.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 0b2f69924444..0ed33a9a41b7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2481,7 +2481,7 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, return ret; } -static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); @@ -2821,8 +2821,8 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { - .activate = batadv_iv_iface_activate, .enable = batadv_iv_ogm_iface_enable, + .enabled = batadv_iv_iface_enabled, .disable = batadv_iv_ogm_iface_disable, .update_mac = batadv_iv_ogm_iface_update_mac, .primary_set = batadv_iv_ogm_primary_iface_set, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c43887fa29a9..63760967712e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -795,6 +795,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(soft_iface); + if (bat_priv->algo_ops->iface.enabled) + bat_priv->algo_ops->iface.enabled(hard_iface); + out: return 0; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index dbeaa015edc9..7ecf268e6626 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1424,6 +1424,7 @@ struct batadv_forw_packet { * @activate: start routing mechanisms when hard-interface is brought up * (optional) * @enable: init routing info when hard-interface is enabled + * @enabled: notification when hard-interface was enabled (optional) * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information * belonging to this hard-interface @@ -1432,6 +1433,7 @@ struct batadv_forw_packet { struct batadv_algo_iface_ops { void (*activate)(struct batadv_hard_iface *hard_iface); int (*enable)(struct batadv_hard_iface *hard_iface); + void (*enabled)(struct batadv_hard_iface *hard_iface); void (*disable)(struct batadv_hard_iface *hard_iface); void (*update_mac)(struct batadv_hard_iface *hard_iface); void (*primary_set)(struct batadv_hard_iface *hard_iface); -- GitLab From 5427da3add52932440df60972cedab088769bb23 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:30 +0100 Subject: [PATCH 0518/1278] batman-adv: Avoid free/alloc race when handling OGM2 buffer commit a8d23cbbf6c9f515ed678204ad2962be7c336344 upstream. A B.A.T.M.A.N. V virtual interface has an OGM2 packet buffer which is initialized using data from the netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified data is sent out or the functions modifying the OGM2 buffer try to access already freed memory regions. Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v_ogm.c | 42 ++++++++++++++++++++++++++++++-------- net/batman-adv/types.h | 3 +++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e07f636160b6..cec31769bb3f 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -127,14 +129,12 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send - periodic worker broadcasting the own OGM - * @work: work queue item + * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the soft interface information */ -static void batadv_v_ogm_send(struct work_struct *work) +static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; - struct batadv_priv_bat_v *bat_v; - struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; @@ -142,8 +142,7 @@ static void batadv_v_ogm_send(struct work_struct *work) u16 tvlv_len = 0; int ret; - bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); - bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -234,6 +233,23 @@ static void batadv_v_ogm_send(struct work_struct *work) return; } +/** + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM + * @work: work queue item + */ +static void batadv_v_ogm_send(struct work_struct *work) +{ + struct batadv_priv_bat_v *bat_v; + struct batadv_priv *bat_priv; + + bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); + bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + batadv_v_ogm_send_softif(bat_priv); + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); +} + /** * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare @@ -260,11 +276,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) - return; + goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** @@ -886,6 +906,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); + mutex_init(&bat_priv->bat_v.ogm_buff_mutex); + return 0; } @@ -897,7 +919,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; + + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7ecf268e6626..21642fbe95c3 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include /* for linux/wait.h */ @@ -989,12 +990,14 @@ struct batadv_softif_vlan { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len * @ogm_wq: workqueue used to schedule OGM transmissions */ struct batadv_priv_bat_v { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; struct delayed_work ogm_wq; }; -- GitLab From ca6579b18e7983b26438c62b3291bd43fb34017a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:31 +0100 Subject: [PATCH 0519/1278] batman-adv: Avoid free/alloc race when handling OGM buffer commit 40e220b4218bb3d278e5e8cc04ccdfd1c7ff8307 upstream. Each slave interface of an B.A.T.M.A.N. IV virtual interface has an OGM packet buffer which is initialized using data from netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified/freed data is sent out or functions modifying the OGM buffer try to access already freed memory regions. Reported-by: syzbot+0cc629f19ccb8534935b@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 60 ++++++++++++++++++++++++++++----- net/batman-adv/hard-interface.c | 2 ++ net/batman-adv/types.h | 2 ++ 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 0ed33a9a41b7..30e774354d4e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -370,14 +371,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff; u32 random_seqno; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); - if (!ogm_buff) + if (!ogm_buff) { + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; + } hard_iface->bat_iv.ogm_buff = ogm_buff; @@ -389,35 +394,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; + + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ @@ -915,7 +944,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) } } -static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +/** + * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer + * @hard_iface: interface whose ogm buffer should be transmitted + */ +static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; @@ -926,9 +959,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) - return; + lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in @@ -996,6 +1027,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_hardif_put(primary_if); } +static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +{ + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) + return; + + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + batadv_iv_ogm_schedule_buff(hard_iface); + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); +} + /** * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an * originator diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 63760967712e..9fdfa9984f02 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -901,6 +902,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); + mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 21642fbe95c3..540a9c5c2270 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -82,11 +82,13 @@ enum batadv_dhcp_recipient { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct batadv_hard_iface_bat_iv { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; }; /** -- GitLab From e181bb93c904708962b1e92e0db830ccb82be51a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:32 +0100 Subject: [PATCH 0520/1278] batman-adv: Don't schedule OGM for disabled interface commit 8e8ce08198de193e3d21d42e96945216e3d9ac7f upstream. A transmission scheduling for an interface which is currently dropped by batadv_iv_ogm_iface_disable could still be in progress. The B.A.T.M.A.N. V is simply cancelling the workqueue item in an synchronous way but this is not possible with B.A.T.M.A.N. IV because the OGM submissions are intertwined. Instead it has to stop submitting the OGM when it detect that the buffer pointer is set to NULL. Reported-by: syzbot+a98f2016f40b9cd3818a@syzkaller.appspotmail.com Reported-by: syzbot+ac36b6a33c28a491e929@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Cc: Hillf Danton Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 30e774354d4e..7a723e124dbb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -961,6 +961,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); + /* interface already disabled by batadv_iv_ogm_iface_disable */ + if (!*ogm_buff) + return; + /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in * hardif_activate_interface() where the originator mac is set and -- GitLab From ba362da15ee2c915041b9d7582ff8ca017c615f6 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 11 Mar 2020 14:13:21 -0500 Subject: [PATCH 0521/1278] perf/amd/uncore: Replace manual sampling check with CAP_NO_INTERRUPT flag [ Upstream commit f967140dfb7442e2db0868b03b961f9c59418a1b ] Enable the sampling check in kernel/events/core.c::perf_event_open(), which returns the more appropriate -EOPNOTSUPP. BEFORE: $ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (l3_request_g1.caching_l3_cache_accesses). /bin/dmesg | grep -i perf may provide additional information. With nothing relevant in dmesg. AFTER: $ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: l3_request_g1.caching_l3_cache_accesses: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Fixes: c43ca5091a37 ("perf/x86/amd: Add support for AMD NB and L2I "uncore" counters") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200311191323.13124-1-kim.phillips@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/uncore.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index baa7e36073f9..604a8558752d 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -193,20 +193,18 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -314,6 +312,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -324,6 +323,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) -- GitLab From 140421867f98b8d944562a01cff0a04298dc3ad2 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 6 Feb 2020 16:58:45 +0100 Subject: [PATCH 0522/1278] ACPI: watchdog: Allow disabling WDAT at boot [ Upstream commit 3f9e12e0df012c4a9a7fd7eb0d3ae69b459d6b2c ] In case the WDAT interface is broken, give the user an option to ignore it to let a native driver bind to the watchdog device instead. Signed-off-by: Jean Delvare Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ drivers/acpi/acpi_watchdog.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7e0a4be3503d..ae51b1b7b67f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -137,6 +137,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 23cde3d8e8fb..0bd1899a287f 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -58,12 +58,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -91,6 +93,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; -- GitLab From f098e1a042b7f997d4348178df99a0f4821b86cf Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Wed, 29 Jan 2020 17:26:31 +1100 Subject: [PATCH 0523/1278] HID: apple: Add support for recent firmware on Magic Keyboards [ Upstream commit e433be929e63265b7412478eb7ff271467aee2d7 ] Magic Keyboards with more recent firmware (0x0100) report Fn key differently. Without this patch, Fn key may not behave as expected and may not be configurable via hid_apple fnmode module parameter. Signed-off-by: Mansour Behabadi Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-apple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d0a81a03ddbd..8ab8f2350bbc 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -343,7 +343,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); -- GitLab From 1d3e7e1be06f08c380a28cf1ed8578b7b87cbfea Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 14 Feb 2020 14:53:07 +0800 Subject: [PATCH 0524/1278] HID: i2c-hid: add Trekstor Surfbook E11B to descriptor override [ Upstream commit be0aba826c4a6ba5929def1962a90d6127871969 ] The Surfbook E11B uses the SIPODEV SP1064 touchpad, which does not supply descriptors, so it has to be added to the override list. BugLink: https://bugs.launchpad.net/bugs/1858299 Signed-off-by: Kai-Heng Feng Reviewed-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 10af8585c820..95052373a828 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -341,6 +341,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { -- GitLab From 8da8673cbe9525fd4439dbbd189766ba993e554d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2020 10:44:50 +0100 Subject: [PATCH 0525/1278] cfg80211: check reg_rule for NULL in handle_channel_custom() [ Upstream commit a7ee7d44b57c9ae174088e53a668852b7f4f452d ] We may end up with a NULL reg_rule after the loop in handle_channel_custom() if the bandwidth didn't fit, check if this is the case and bail out if so. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200221104449.3b558a50201c.I4ad3725c4dacaefd2d18d3cc65ba6d18acd5dbfe@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a520f433d476..b95d1c2bdef7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1733,7 +1733,7 @@ static void handle_channel_custom(struct wiphy *wiphy, break; } - if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { -- GitLab From 358e3a57a2558069863404249d8af3bdca7e1563 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 14 Jan 2020 14:43:19 +0000 Subject: [PATCH 0526/1278] scsi: libfc: free response frame from GPN_ID [ Upstream commit ff6993bb79b9f99bdac0b5378169052931b65432 ] fc_disc_gpn_id_resp() should be the last function using it so free it here to avoid memory leak. Link: https://lore.kernel.org/r/1579013000-14570-2-git-send-email-igor.druzhinin@citrix.com Reviewed-by: Hannes Reinecke Signed-off-by: Igor Druzhinin Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_disc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index bb9c1c016643..28b50ab2fbb0 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -652,6 +652,8 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** -- GitLab From 74f38a3033be28d4587f959e610419ac52295638 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 21 Feb 2020 14:17:05 +0100 Subject: [PATCH 0527/1278] net: usb: qmi_wwan: restore mtu min/max values after raw_ip switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eae7172f8141eb98e64e6e81acc9e9d5b2add127 ] usbnet creates network interfaces with min_mtu = 0 and max_mtu = ETH_MAX_MTU. These values are not modified by qmi_wwan when the network interface is created initially, allowing, for example, to set mtu greater than 1500. When a raw_ip switch is done (raw_ip set to 'Y', then set to 'N') the mtu values for the network interface are set through ether_setup, with min_mtu = ETH_MIN_MTU and max_mtu = ETH_DATA_LEN, not allowing anymore to set mtu greater than 1500 (error: mtu greater than device maximum). The patch restores the original min/max mtu values set by usbnet after a raw_ip switch. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 189715438328..a8d5561afc7d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -274,6 +274,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } -- GitLab From 51425a0d1e47af13c45016c5437abb0a0b2f2ebc Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Feb 2020 14:38:40 +0100 Subject: [PATCH 0528/1278] net: ks8851-ml: Fix IRQ handling and locking [ Upstream commit 44343418d0f2f623cb9da6f5000df793131cbe3b ] The KS8851 requires that packet RX and TX are mutually exclusive. Currently, the driver hopes to achieve this by disabling interrupt from the card by writing the card registers and by disabling the interrupt on the interrupt controller. This however is racy on SMP. Replace this approach by expanding the spinlock used around the ks_start_xmit() TX path to ks_irq() RX path to assure true mutual exclusion and remove the interrupt enabling/disabling, which is now not needed anymore. Furthermore, disable interrupts also in ks_net_stop(), which was missing before. Note that a massive improvement here would be to re-use the KS8851 driver approach, which is to move the TX path into a worker thread, interrupt handling to threaded interrupt, and synchronize everything with mutexes, but that would be a much bigger rework, for a separate patch. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 799154d7c047..c699a779757e 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -831,14 +831,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -864,6 +867,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -933,6 +937,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -989,10 +994,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -1006,9 +1010,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } -- GitLab From f4bb37a0f6c55e12865b39696ed2c6423a1eeddc Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sun, 23 Feb 2020 20:03:02 +0530 Subject: [PATCH 0529/1278] mac80211: rx: avoid RCU list traversal under mutex [ Upstream commit 253216ffb2a002a682c6f68bd3adff5b98b71de8 ] local->sta_mtx is held in __ieee80211_check_fast_rx_iface(). No need to use list_for_each_entry_rcu() as it also requires a cond argument to avoid false lockdep warnings when not used in RCU read-side section (with CONFIG_PROVE_RCU_LIST). Therefore use list_for_each_entry(); Signed-off-by: Madhuparna Bhowmik Link: https://lore.kernel.org/r/20200223143302.15390-1-madhuparnabhowmik10@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 31000622376d..7c92b1471c34 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3862,7 +3862,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->sta_mtx); - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue; -- GitLab From d8a4a55bdcf50206de12766400c989de2f840d1a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Feb 2020 12:47:14 -0800 Subject: [PATCH 0530/1278] signal: avoid double atomic counter increments for user accounting [ Upstream commit fda31c50292a5062332fa0343c084bd9f46604d9 ] When queueing a signal, we increment both the users count of pending signals (for RLIMIT_SIGPENDING tracking) and we increment the refcount of the user struct itself (because we keep a reference to the user in the signal structure in order to correctly account for it when freeing). That turns out to be fairly expensive, because both of them are atomic updates, and particularly under extreme signal handling pressure on big machines, you can get a lot of cache contention on the user struct. That can then cause horrid cacheline ping-pong when you do these multiple accesses. So change the reference counting to only pin the user for the _first_ pending signal, and to unpin it when the last pending signal is dequeued. That means that when a user sees a lot of concurrent signal queuing - which is the only situation when this matters - the only atomic access needed is generally the 'sigpending' count update. This was noticed because of a particularly odd timing artifact on a dual-socket 96C/192T Cascade Lake platform: when you get into bad contention, on that machine for some reason seems to be much worse when the contention happens in the upper 32-byte half of the cacheline. As a result, the kernel test robot will-it-scale 'signal1' benchmark had an odd performance regression simply due to random alignment of the 'struct user_struct' (and pointed to a completely unrelated and apparently nonsensical commit for the regression). Avoiding the double increments (and decrements on the dequeueing side, of course) makes for much less contention and hugely improved performance on that will-it-scale microbenchmark. Quoting Feng Tang: "It makes a big difference, that the performance score is tripled! bump from original 17000 to 54000. Also the gap between 5.0-rc6 and 5.0-rc6+Jiri's patch is reduced to around 2%" [ The "2% gap" is the odd cacheline placement difference on that platform: under the extreme contention case, the effect of which half of the cacheline was hot was 5%, so with the reduced contention the odd timing artifact is reduced too ] It does help in the non-contended case too, but is not nearly as noticeable. Reported-and-tested-by: Feng Tang Cc: Eric W. Biederman Cc: Huang, Ying Cc: Philip Li Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/signal.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 8fee1f2eba2f..c066168f8854 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -379,27 +379,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock(); - if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -413,8 +418,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); } -- GitLab From 085e871e8037849e9c574d86a79aa7b51ee5c952 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Wed, 26 Feb 2020 11:54:35 +0800 Subject: [PATCH 0531/1278] slip: not call free_netdev before rtnl_unlock in slip_open [ Upstream commit f596c87005f7b1baeb7d62d9a9e25d68c3dfae10 ] As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code. Signed-off-by: yangerkun Reviewed-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/slip/slip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index b07f367abd91..d7882b548b79 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -867,7 +867,10 @@ static int slip_open(struct tty_struct *tty) tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); -- GitLab From ebef8d300abea8bc838072d8b32d1106fbf133fa Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:43 +0000 Subject: [PATCH 0532/1278] hinic: fix a bug of setting hw_ioctxt [ Upstream commit d2ed69ce9ed3477e2a9527e6b89fe4689d99510e ] a reserved field is used to signify prime physical function index in the latest firmware version, so we must assign a value to it correctly Signed-off-by: Luo bin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 1 + drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 +- drivers/net/ethernet/huawei/hinic/hinic_hw_if.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 79b567447084..46aba02b8672 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -312,6 +312,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index 0f5563f3b779..a011fd2d2627 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -104,8 +104,8 @@ struct hinic_cmd_hw_ioctxt { u8 rsvd2; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 5b4760c0e9f5..f683ccbdfca0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -146,6 +146,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) -- GitLab From 5930339b092e27e9bf09267ea87f37d6fecd6298 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:23:52 +0000 Subject: [PATCH 0533/1278] net: rmnet: fix NULL pointer dereference in rmnet_newlink() [ Upstream commit 93b5cbfa9636d385126f211dca9efa7e3f683202 ] rmnet registers IFLA_LINK interface as a lower interface. But, IFLA_LINK could be NULL. In the current code, rmnet doesn't check IFLA_LINK. So, panic would occur. Test commands: modprobe rmnet ip link add rmnet0 type rmnet mux_id 1 Splat looks like: [ 36.826109][ T1115] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 36.838817][ T1115] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 36.839908][ T1115] CPU: 1 PID: 1115 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 36.840569][ T1115] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 36.841408][ T1115] RIP: 0010:rmnet_newlink+0x54/0x510 [rmnet] [ 36.841986][ T1115] Code: 83 ec 18 48 c1 e9 03 80 3c 01 00 0f 85 d4 03 00 00 48 8b 6a 28 48 b8 00 00 00 00 00 c [ 36.843923][ T1115] RSP: 0018:ffff8880b7e0f1c0 EFLAGS: 00010247 [ 36.844756][ T1115] RAX: dffffc0000000000 RBX: ffff8880d14cca00 RCX: 1ffff11016fc1e99 [ 36.845859][ T1115] RDX: 0000000000000000 RSI: ffff8880c3d04000 RDI: 0000000000000004 [ 36.846961][ T1115] RBP: 0000000000000000 R08: ffff8880b7e0f8b0 R09: ffff8880b6ac2d90 [ 36.848020][ T1115] R10: ffffffffc0589a40 R11: ffffed1016d585b7 R12: ffffffff88ceaf80 [ 36.848788][ T1115] R13: ffff8880c3d04000 R14: ffff8880b7e0f8b0 R15: ffff8880c3d04000 [ 36.849546][ T1115] FS: 00007f50ab3360c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 36.851784][ T1115] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.852422][ T1115] CR2: 000055871afe5ab0 CR3: 00000000ae246001 CR4: 00000000000606e0 [ 36.853181][ T1115] Call Trace: [ 36.853514][ T1115] __rtnl_newlink+0xbdb/0x1270 [ 36.853967][ T1115] ? lock_downgrade+0x6e0/0x6e0 [ 36.854420][ T1115] ? rtnl_link_unregister+0x220/0x220 [ 36.854936][ T1115] ? lock_acquire+0x164/0x3b0 [ 36.855376][ T1115] ? is_bpf_image_address+0xff/0x1d0 [ 36.855884][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.856304][ T1115] ? kernel_text_address+0x111/0x140 [ 36.856857][ T1115] ? __kernel_text_address+0xe/0x30 [ 36.857440][ T1115] ? unwind_get_return_address+0x5f/0xa0 [ 36.858063][ T1115] ? create_prof_cpu_mask+0x20/0x20 [ 36.858644][ T1115] ? arch_stack_walk+0x83/0xb0 [ 36.859171][ T1115] ? stack_trace_save+0x82/0xb0 [ 36.859710][ T1115] ? stack_trace_consume_entry+0x160/0x160 [ 36.860357][ T1115] ? deactivate_slab.isra.78+0x2c5/0x800 [ 36.860928][ T1115] ? kasan_unpoison_shadow+0x30/0x40 [ 36.861520][ T1115] ? kmem_cache_alloc_trace+0x135/0x350 [ 36.862125][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.864073][ T1115] rtnl_newlink+0x65/0x90 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 7d8303e45f09..b7df8c1121e3 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -157,6 +157,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; -- GitLab From 8419d8e01a6b52bfdfcbce5f5c887643200eeacf Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 21 Feb 2020 23:31:11 -0500 Subject: [PATCH 0534/1278] jbd2: fix data races at struct journal_head [ Upstream commit 6c5d911249290f41f7b50b43344a7520605b1acb ] journal_head::b_transaction and journal_head::b_next_transaction could be accessed concurrently as noticed by KCSAN, LTP: starting fsync04 /dev/zero: Can't open blockdev EXT4-fs (loop0): mounting ext3 file system using the ext4 subsystem EXT4-fs (loop0): mounted filesystem with ordered data mode. Opts: (null) ================================================================== BUG: KCSAN: data-race in __jbd2_journal_refile_buffer [jbd2] / jbd2_write_access_granted [jbd2] write to 0xffff99f9b1bd0e30 of 8 bytes by task 25721 on cpu 70: __jbd2_journal_refile_buffer+0xdd/0x210 [jbd2] __jbd2_journal_refile_buffer at fs/jbd2/transaction.c:2569 jbd2_journal_commit_transaction+0x2d15/0x3f20 [jbd2] (inlined by) jbd2_journal_commit_transaction at fs/jbd2/commit.c:1034 kjournald2+0x13b/0x450 [jbd2] kthread+0x1cd/0x1f0 ret_from_fork+0x27/0x50 read to 0xffff99f9b1bd0e30 of 8 bytes by task 25724 on cpu 68: jbd2_write_access_granted+0x1b2/0x250 [jbd2] jbd2_write_access_granted at fs/jbd2/transaction.c:1155 jbd2_journal_get_write_access+0x2c/0x60 [jbd2] __ext4_journal_get_write_access+0x50/0x90 [ext4] ext4_mb_mark_diskspace_used+0x158/0x620 [ext4] ext4_mb_new_blocks+0x54f/0xca0 [ext4] ext4_ind_map_blocks+0xc79/0x1b40 [ext4] ext4_map_blocks+0x3b4/0x950 [ext4] _ext4_get_block+0xfc/0x270 [ext4] ext4_get_block+0x3b/0x50 [ext4] __block_write_begin_int+0x22e/0xae0 __block_write_begin+0x39/0x50 ext4_write_begin+0x388/0xb50 [ext4] generic_perform_write+0x15d/0x290 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe 5 locks held by fsync04/25724: #0: ffff99f9911093f8 (sb_writers#13){.+.+}, at: vfs_write+0x21c/0x260 #1: ffff99f9db4c0348 (&sb->s_type->i_mutex_key#15){+.+.}, at: ext4_buffered_write_iter+0x65/0x210 [ext4] #2: ffff99f5e7dfcf58 (jbd2_handle){++++}, at: start_this_handle+0x1c1/0x9d0 [jbd2] #3: ffff99f9db4c0168 (&ei->i_data_sem){++++}, at: ext4_map_blocks+0x176/0x950 [ext4] #4: ffffffff99086b40 (rcu_read_lock){....}, at: jbd2_write_access_granted+0x4e/0x250 [jbd2] irq event stamp: 1407125 hardirqs last enabled at (1407125): [] __find_get_block+0x107/0x790 hardirqs last disabled at (1407124): [] __find_get_block+0x49/0x790 softirqs last enabled at (1405528): [] __do_softirq+0x34c/0x57c softirqs last disabled at (1405521): [] irq_exit+0xa2/0xc0 Reported by Kernel Concurrency Sanitizer on: CPU: 68 PID: 25724 Comm: fsync04 Tainted: G L 5.6.0-rc2-next-20200221+ #7 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 The plain reads are outside of jh->b_state_lock critical section which result in data races. Fix them by adding pairs of READ|WRITE_ONCE(). Reviewed-by: Jan Kara Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/20200222043111.2227-1-cai@lca.pw Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/transaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f2ff141a4479..a355ca418e78 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1050,8 +1050,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2466,8 +2466,8 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) -- GitLab From 1940bd214aeacfc27d27c082f0285fe3e673cff7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 28 Jan 2020 20:22:13 +0100 Subject: [PATCH 0535/1278] ARM: 8957/1: VDSO: Match ARMv8 timer in cntvct_functional() commit 45939ce292b4b11159719faaf60aba7d58d5fe33 upstream. It is possible for a system with an ARMv8 timer to run a 32-bit kernel. When this happens we will unconditionally have the vDSO code remove the __vdso_gettimeofday and __vdso_clock_gettime symbols because cntvct_functional() returns false since it does not match that compatibility string. Fixes: ecf99a439105 ("ARM: 8331/1: VDSO initialization, mapping, and synchronization") Signed-off-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/vdso.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index f4dd7f9663c1..0001742c131d 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -103,6 +103,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; -- GitLab From 3b61a9a6519744f9de3538586f75f1325091a861 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Feb 2020 02:04:17 +0100 Subject: [PATCH 0536/1278] ARM: 8958/1: rename missed uaccess .fixup section commit f87b1c49bc675da30d8e1e8f4b60b800312c7b90 upstream. When the uaccess .fixup section was renamed to .text.fixup, one case was missed. Under ld.bfd, the orphaned section was moved close to .text (since they share the "ax" bits), so things would work normally on uaccess faults. Under ld.lld, the orphaned section was placed outside the .text section, making it unreachable. Link: https://github.com/ClangBuiltLinux/linux/issues/282 Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1020633#c44 Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.1912032147340.17114@knanqh.ubzr Link: https://lore.kernel.org/lkml/202002071754.F5F073F1D@keescook/ Fixes: c4a84ae39b4a5 ("ARM: 8322/1: keep .text and .fixup regions closer together") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/lib/copy_from_user.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 6709a8d33963..f1e34f16cfab 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -100,7 +100,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} -- GitLab From 992a5c90af04da6e1e047f10c5ee5d6d8752a699 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 17 Mar 2020 01:28:45 +0100 Subject: [PATCH 0537/1278] mm: slub: add missing TID bump in kmem_cache_alloc_bulk() commit fd4d9c7d0c71866ec0c2825189ebd2ce35bd95b8 upstream. When kmem_cache_alloc_bulk() attempts to allocate N objects from a percpu freelist of length M, and N > M > 0, it will first remove the M elements from the percpu freelist, then call ___slab_alloc() to allocate the next element and repopulate the percpu freelist. ___slab_alloc() can re-enable IRQs via allocate_slab(), so the TID must be bumped before ___slab_alloc() to properly commit the freelist head change. Fix it by unconditionally bumping c->tid when entering the slowpath. Cc: stable@vger.kernel.org Fixes: ebe909e0fdb3 ("slub: improve bulk alloc strategy") Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 07aeb129f3f8..099c7a85ede0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3122,6 +3122,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { + /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist -- GitLab From 62a47c2e063e8ca601e1cd514d6bd27e6befe3e6 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 21 Feb 2020 12:28:38 +0100 Subject: [PATCH 0538/1278] ipv4: ensure rcu_read_lock() in cipso_v4_error() commit 3e72dfdf8227b052393f71d820ec7599909dddc2 upstream. Similarly to commit c543cb4a5f07 ("ipv4: ensure rcu_read_lock() in ipv4_link_failure()"), __ip_options_compile() must be called under rcu protection. Fixes: 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") Suggested-by: Guillaume Nault Signed-off-by: Matteo Croce Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f0165c5f376b..1c21dc5d6dd4 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1738,6 +1738,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1749,7 +1750,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) -- GitLab From 01364dad1d4577e27a57729d41053f661bb8a5b9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Mar 2020 10:54:27 +0100 Subject: [PATCH 0539/1278] Linux 4.14.174 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9a524b5c1d55..edc6b62bd892 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 173 +SUBLEVEL = 174 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 9affadc76e49835ea4156294aa95cba8408508fb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Mar 2020 10:48:13 +0100 Subject: [PATCH 0540/1278] UPSTREAM: bpf: Explicitly memset the bpf_attr structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the bpf syscall, we are relying on the compiler to properly zero out the bpf_attr union that we copy userspace data into. Unfortunately that doesn't always work properly, padding and other oddities might not be correctly zeroed, and in some tests odd things have been found when the stack is pre-initialized to other values. Fix this by explicitly memsetting the structure to 0 before using it. Reported-by: Maciej Żenczykowski Reported-by: John Stultz Reported-by: Alexander Potapenko Reported-by: Alistair Delva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://android-review.googlesource.com/c/kernel/common/+/1235490 Link: https://lore.kernel.org/bpf/20200320094813.GA421650@kroah.com (cherry picked from commit 8096f229421f7b22433775e928d506f0342e5907) Signed-off-by: Greg Kroah-Hartman Change-Id: I2dc28cd45024da5cc6861ff4a9b25fae389cc6d8 --- kernel/bpf/syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 89d58554eb99..071ab8ae8e49 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1583,7 +1583,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { - union bpf_attr attr = {}; + union bpf_attr attr; int err; if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) @@ -1595,6 +1595,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz size = min_t(u32, size, sizeof(attr)); /* copy attributes from user space, may be less than sizeof(bpf_attr) */ + memset(&attr, 0, sizeof(attr)); if (copy_from_user(&attr, uattr, size) != 0) return -EFAULT; -- GitLab From bb8c715f7f5a49a20065b810696401149d00cf70 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Mar 2020 17:22:58 +0100 Subject: [PATCH 0541/1278] UPSTREAM: bpf: Explicitly memset some bpf info structures declared on the stack Trying to initialize a structure with "= {};" will not always clean out all padding locations in a structure. So be explicit and call memset to initialize everything for a number of bpf information structures that are then copied from userspace, sometimes from smaller memory locations than the size of the structure. Reported-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200320162258.GA794295@kroah.com (cherry picked from commit 269efb7fc478563a7e7b22590d8076823f4ac82a) Signed-off-by: Greg Kroah-Hartman Change-Id: I52a2cab20aa310085ec104bd811ac4f2b83657b6 --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 071ab8ae8e49..d346cc46c9d5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1473,7 +1473,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, union bpf_attr __user *uattr) { struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_prog_info info = {}; + struct bpf_prog_info info; u32 info_len = attr->info.info_len; char __user *uinsns; u32 ulen; @@ -1484,6 +1484,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return err; info_len = min_t(u32, sizeof(info), info_len); + memset(&info, 0, sizeof(info)); if (copy_from_user(&info, uinfo, info_len)) return -EFAULT; @@ -1529,7 +1530,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, union bpf_attr __user *uattr) { struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_map_info info = {}; + struct bpf_map_info info; u32 info_len = attr->info.info_len; int err; @@ -1538,6 +1539,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, return err; info_len = min_t(u32, sizeof(info), info_len); + memset(&info, 0, sizeof(info)); info.type = map->map_type; info.id = map->id; info.key_size = map->key_size; -- GitLab From 63bf2738ec49e6dca98c19381a72f44394f6e589 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 19 Mar 2020 14:45:18 -0700 Subject: [PATCH 0542/1278] ANDROID: scsi: ufs: add ->map_sg_crypto() variant op Add a variant op ->map_sg_crypto() which runs at the end of ufshcd_map_sg(). This is needed to support inline crypto on nonstandard hardware that enables and configures crypto in the PRD entries (struct ufshcd_sg_entry) instead of in the UTRD (struct utp_transfer_req_desc) which the UFS standard specifies. Bug: 129991660 Change-Id: I92bb52d0022d2e9de79a40d9888e84db4a8116b3 Signed-off-by: Eric Biggers --- drivers/scsi/ufs/ufshcd-crypto.c | 8 ++++++++ drivers/scsi/ufs/ufshcd-crypto.h | 8 ++++++++ drivers/scsi/ufs/ufshcd.c | 2 +- drivers/scsi/ufs/ufshcd.h | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index e3de448c9bbe..6999970235b3 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -456,6 +456,14 @@ int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, return ufshcd_prepare_lrbp_crypto_spec(hba, cmd, lrbp); } +int ufshcd_map_sg_crypto(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) +{ + if (hba->crypto_vops && hba->crypto_vops->map_sg_crypto) + return hba->crypto_vops->map_sg_crypto(hba, lrbp); + + return 0; +} + int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp) diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h index 95f37c9f7672..f223a06fbf93 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.h +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -80,6 +80,8 @@ int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); +int ufshcd_map_sg_crypto(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); + int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); @@ -133,6 +135,12 @@ static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, return 0; } +static inline int ufshcd_map_sg_crypto(struct ufs_hba *hba, + struct ufshcd_lrb *lrbp) +{ + return 0; +} + static inline bool ufshcd_lrbp_crypto_enabled(struct ufshcd_lrb *lrbp) { return false; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 74be1791e135..1126fa3baf50 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2018,7 +2018,7 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) lrbp->utr_descriptor_ptr->prd_table_length = 0; } - return 0; + return ufshcd_map_sg_crypto(hba, lrbp); } /** diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 73124e85dada..52b5a070afa8 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -354,6 +354,7 @@ struct ufs_hba_crypto_variant_ops { int (*prepare_lrbp_crypto)(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); + int (*map_sg_crypto)(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); int (*complete_lrbp_crypto)(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); -- GitLab From bec540fac475142085d5e8b2950ffa639e6c7eee Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 23 Mar 2020 14:44:24 -0700 Subject: [PATCH 0543/1278] ANDROID: kbuild: do not merge .section..* into .section in modules Sections with double dots, e.g. .data..percpu are named intentionally to avoid matching rules that apply to .section.*. Change module section merging rules to skip these. Bug: 151981957 Change-Id: I23787aa40d69da1c6ca622a5f111704d2459e163 Signed-off-by: Sami Tolvanen --- scripts/module-lto.lds | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/module-lto.lds b/scripts/module-lto.lds index f5ee544a877d..5ba0e9461e13 100644 --- a/scripts/module-lto.lds +++ b/scripts/module-lto.lds @@ -12,11 +12,11 @@ SECTIONS { *(.eh_frame) } - .bss : { *(.bss .bss[.0-9a-zA-Z_]*) } - .data : { *(.data .data[.0-9a-zA-Z_]*) } - .rela.data : { *(.rela.data .rela.data[.0-9a-zA-Z_]*) } - .rela.rodata : { *(.rela.rodata .rela.rodata[.0-9a-zA-Z_]*) } - .rela.text : { *(.rela.text .rela.text[.0-9a-zA-Z_]*) } - .rodata : { *(.rodata .rodata[.0-9a-zA-Z_]*) } - .text : { *(.text .text[.0-9a-zA-Z_]*) } + .bss : { *(.bss .bss.[0-9a-zA-Z_]*) } + .data : { *(.data .data.[0-9a-zA-Z_]*) } + .rela.data : { *(.rela.data .rela.data.[0-9a-zA-Z_]*) } + .rela.rodata : { *(.rela.rodata .rela.rodata.[0-9a-zA-Z_]*) } + .rela.text : { *(.rela.text .rela.text.[0-9a-zA-Z_]*) } + .rodata : { *(.rodata .rodata.[0-9a-zA-Z_]*) } + .text : { *(.text .text.[0-9a-zA-Z_]*) } } -- GitLab From 4bdce1e542df13fb66946ac00be944bf5f1025c0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 20 Jun 2019 16:12:37 -0600 Subject: [PATCH 0544/1278] UPSTREAM: coresight: Potential uninitialized variable in probe() (Upstream commit 0530ef6b41e80c5cc979e0e50682302161edb6b7). The "drvdata->atclk" clock is optional, but if it gets set to an error pointer then we're accidentally return an uninitialized variable instead of success. Fixes: 78e6427b4e7b ("coresight: funnel: Support static funnel") Signed-off-by: Dan Carpenter Signed-off-by: Mathieu Poirier Cc: stable Link: https://lore.kernel.org/r/20190620221237.3536-6-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman Bug: 140266694 Bug: 151593592 Signed-off-by: Yabin Cui Change-Id: Ie1f1a45b66f351b53c440d788888c201cb4bce7f --- drivers/hwtracing/coresight/coresight-funnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 6b9fce3a94be..82ab1da0cd28 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -269,6 +269,7 @@ static int funnel_probe(struct device *dev, struct resource *res) } pm_runtime_put(dev); + ret = 0; out_disable_clk: if (ret && !IS_ERR_OR_NULL(drvdata->atclk)) -- GitLab From 1de6fe1a466f8c9a4c30f26d4bc35df0ddc4d724 Mon Sep 17 00:00:00 2001 From: Dylan Chang Date: Mon, 23 Mar 2020 14:56:02 +0800 Subject: [PATCH 0545/1278] ANDROID: dm-bow: Fix free_show value is incorrect Fix free_show value is incorrect Change-Id: If7f36a64afa600c980ee8a25393f8716a6ffd474 Signed-off-by: Dylan Chang Bug: 152116739 (cherry picked from commit feff0b2236497c0651baa7f59ea50c2377fff053) Signed-off-by: Paul Lawrence --- drivers/md/dm-bow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index 28df18633853..89913a23cc0c 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -793,6 +793,7 @@ static int prepare_unchanged_range(struct bow_context *bc, struct bow_range *br, */ original_type = br->type; sector0 = backup_br->sector; + bc->trims_total -= range_size(backup_br); if (backup_br->type == TRIMMED) list_del(&backup_br->trimmed_list); backup_br->type = br->type == SECTOR0_CURRENT ? SECTOR0_CURRENT -- GitLab From e83cc6a4d6125ac1f7dfc53d2a43f5ca68b3a624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 23 Mar 2020 18:10:19 -0700 Subject: [PATCH 0546/1278] UPSTREAM: ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is trivial since we already have support for the entirely identical (from the kernel's point of view) RDNSS, DNSSL, etc. that also contain opaque data that needs to be passed down to userspace for further processing. As specified in draft-ietf-6man-ra-pref64-09 (while it is still a draft, it is purely waiting on the RFC Editor for cleanups and publishing): PREF64 option contains lifetime and a (up to) 96-bit IPv6 prefix. The 8-bit identifier of the option type as assigned by the IANA is 38. Since we lack DNS64/NAT64/CLAT support in kernel at the moment, thus this option should also be passed on to userland. See: https://tools.ietf.org/html/draft-ietf-6man-ra-pref64-09 https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml#icmpv6-parameters-5 Cc: Erik Kline Cc: Jen Linkova Cc: Lorenzo Colitti Cc: Michael Haro Signed-off-by: Maciej Żenczykowski Acked-By: Lorenzo Colitti Signed-off-by: David S. Miller (cherry picked from commit c24a77edc9a7ac9b5fea75407f197fe1469262f4) Bug: 150648313 Change-Id: I02bff2103194a8171f907e82f811d7ab66962138 --- include/net/ndisc.h | 1 + net/ipv6/ndisc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d2a016ef5c41..35d7c887a5b9 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -39,6 +39,7 @@ enum { ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ + ND_OPT_PREF64 = 38, /* RFC-ietf-6man-ra-pref64-09 */ __ND_OPT_MAX }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 381708112f7d..2241c5f52394 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -197,6 +197,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev, return opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || + opt->nd_opt_type == ND_OPT_PREF64 || ndisc_ops_is_useropt(dev, opt->nd_opt_type); } -- GitLab From 09950ac31a568fd39d5b226f1cf8771d2ae6bcd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 23 Mar 2020 18:10:19 -0700 Subject: [PATCH 0547/1278] UPSTREAM: ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is trivial since we already have support for the entirely identical (from the kernel's point of view) RDNSS, DNSSL, etc. that also contain opaque data that needs to be passed down to userspace for further processing. As specified in draft-ietf-6man-ra-pref64-09 (while it is still a draft, it is purely waiting on the RFC Editor for cleanups and publishing): PREF64 option contains lifetime and a (up to) 96-bit IPv6 prefix. The 8-bit identifier of the option type as assigned by the IANA is 38. Since we lack DNS64/NAT64/CLAT support in kernel at the moment, thus this option should also be passed on to userland. See: https://tools.ietf.org/html/draft-ietf-6man-ra-pref64-09 https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml#icmpv6-parameters-5 Cc: Erik Kline Cc: Jen Linkova Cc: Lorenzo Colitti Cc: Michael Haro Signed-off-by: Maciej Żenczykowski Acked-By: Lorenzo Colitti Signed-off-by: David S. Miller (cherry picked from commit c24a77edc9a7ac9b5fea75407f197fe1469262f4) Bug: 150648313 Change-Id: I02bff2103194a8171f907e82f811d7ab66962138 --- include/net/ndisc.h | 1 + net/ipv6/ndisc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d2a016ef5c41..35d7c887a5b9 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -39,6 +39,7 @@ enum { ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ + ND_OPT_PREF64 = 38, /* RFC-ietf-6man-ra-pref64-09 */ __ND_OPT_MAX }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 381708112f7d..2241c5f52394 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -197,6 +197,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev, return opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || + opt->nd_opt_type == ND_OPT_PREF64 || ndisc_ops_is_useropt(dev, opt->nd_opt_type); } -- GitLab From e771685bad3d8555d7931876463b542b36ff0c83 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Sat, 28 Mar 2020 00:28:08 -0700 Subject: [PATCH 0548/1278] mm/swapfile.c: move inode_lock out of claim_swapfile claim_swapfile() currently keeps the inode locked when it is successful, or the file is already swapfile (with -ebusy). and, on the other error cases, it does not lock the inode. this inconsistency of the lock state and return value is quite confusing and actually causing a bad unlock balance as below in the "bad_swap" section of __do_sys_swapon(). this commit fixes this issue by moving the inode_lock() and is_swapfile check out of claim_swapfile(). the inode is unlocked in "bad_swap_unlock_inode" section, so that the inode is ensured to be unlocked at "bad_swap". thus, error handling codes after the locking now jumps to "bad_swap_unlock_inode" instead of "bad_swap". ===================================== warning: bad unlock balance detected! 5.5.0-rc7+ #176 not tainted ------------------------------------- swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: [] __do_sys_swapon+0x94b/0x3550 but there are no more locks to release! other info that might help us debug this: no locks held by swapon/4294. stack backtrace: cpu: 5 pid: 4294 comm: swapon not tainted 5.5.0-rc7-btrfs-zns+ #176 hardware name: asus all series/h87-pro, bios 2102 07/29/2014 call trace: dump_stack+0xa1/0xea ? __do_sys_swapon+0x94b/0x3550 print_unlock_imbalance_bug.cold+0x114/0x123 ? __do_sys_swapon+0x94b/0x3550 lock_release+0x562/0xed0 ? kvfree+0x31/0x40 ? lock_downgrade+0x770/0x770 ? kvfree+0x31/0x40 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 up_write+0x2d/0x490 ? kfree+0x293/0x2f0 __do_sys_swapon+0x94b/0x3550 ? putname+0xb0/0xf0 ? kmem_cache_free+0x2e7/0x370 ? do_sys_open+0x184/0x3e0 ? generic_max_swapfile_size+0x40/0x40 ? do_syscall_64+0x27/0x4b0 ? entry_syscall_64_after_hwframe+0x49/0xbe ? lockdep_hardirqs_on+0x38c/0x590 __x64_sys_swapon+0x54/0x80 do_syscall_64+0xa4/0x4b0 entry_syscall_64_after_hwframe+0x49/0xbe rip: 0033:0x7f15da0a0dc7 link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com fixes: 1638045c3677 ("mm: set s_swapfile on blockdev swap devices") signed-off-by: naohiro aota reviewed-by: andrew morton reviewed-by: darrick j. wong tested-by: qais youef cc: christoph hellwig cc: --- mm/swapfile.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index d2bd49059c3a..9dceda94bf8b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2896,10 +2896,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->bdev = inode->i_sb->s_bdev; } - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - return 0; } @@ -3135,36 +3131,40 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host; - /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap; + inode_lock(inode); + if (IS_SWAPFILE(inode)) { + error = -EBUSY; + goto bad_swap_unlock_inode; + } /* * Read the swap header. */ if (!mapping->a_ops->readpage) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); - goto bad_swap; + goto bad_swap_unlock_inode; } swap_header = kmap(page); maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) @@ -3186,7 +3186,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for (ci = 0; ci < nr_cluster; ci++) @@ -3195,7 +3195,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; @@ -3207,13 +3207,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = swap_cgroup_swapon(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; - goto bad_swap; + goto bad_swap_unlock_inode; } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) @@ -3252,7 +3252,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = init_swap_address_space(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; /* * Flush any pending IO and dirty mappings before we start using this @@ -3262,7 +3262,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap; + goto bad_swap_unlock_inode; } mutex_lock(&swapon_mutex); @@ -3287,6 +3287,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = 0; goto out; +bad_swap_unlock_inode: + inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -3294,6 +3296,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) set_blocksize(p->bdev, p->old_block_size); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } + inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); @@ -3303,13 +3306,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) vfree(swap_map); kvfree(cluster_info); kvfree(frontswap_map); - if (swap_file) { - if (inode) { - inode_unlock(inode); - inode = NULL; - } + if (swap_file) filp_close(swap_file, NULL); - } out: if (page && !IS_ERR(page)) { kunmap(page); -- GitLab From 4065ee1987dd3f367655e77f5cb14a29f4c51db0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:45:12 +0800 Subject: [PATCH 0549/1278] f2fs: fix to wait all node page writeback There is a race condition that we may miss to wait for all node pages writeback, fix it. - fsync() - shrink - f2fs_do_sync_file - __write_node_page - set_page_writeback(page#0) : remove DIRTY/TOWRITE flag - f2fs_fsync_node_pages : won't find page #0 as TOWRITE flag was removeD - f2fs_wait_on_node_pages_writeback : wont' wait page #0 writeback as it was not in fsync_node_list list. - f2fs_add_fsync_node_entry Fixes: 50fa53eccf9f ("f2fs: fix to avoid broken of dnode block list") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 494947d282d5..622353db4d5a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1562,15 +1562,16 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - set_page_writeback(page); - ClearPageError(page); - + /* should add to global list before clearing PAGECACHE status */ if (f2fs_in_warm_node_list(sbi, page)) { seq = f2fs_add_fsync_node_entry(sbi, page); if (seq_id) *seq_id = seq; } + set_page_writeback(page); + ClearPageError(page); + fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); -- GitLab From 4ef1e03a521be1cb186d516211804abf8ab6284e Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 18 Feb 2020 09:19:07 +0530 Subject: [PATCH 0550/1278] f2fs: fix the panic in do_checkpoint() There could be a scenario where f2fs_sync_meta_pages() will not ensure that all F2FS_DIRTY_META pages are submitted for IO. Thus, resulting in the below panic in do_checkpoint() - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && !f2fs_cp_error(sbi)); This can happen in a low-memory condition, where shrinker could also be doing the writepage operation (stack shown below) at the same time when checkpoint is running on another core. schedule down_write f2fs_submit_page_write -> by this time, this page in page cache is tagged as PAGECACHE_TAG_WRITEBACK and PAGECACHE_TAG_DIRTY is cleared, due to which f2fs_sync_meta_pages() cannot sync this page in do_checkpoint() path. f2fs_do_write_meta_page __f2fs_write_meta_page f2fs_write_meta_page shrink_page_list shrink_inactive_list shrink_node_memcg shrink_node kswapd Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 16 +++++++--------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0cbd5e7b13f9..485cae1ccc90 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1250,20 +1250,20 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!get_pages(sbi, F2FS_WB_CP_DATA)) + if (!get_pages(sbi, type)) break; if (unlikely(f2fs_cp_error(sbi))) break; - io_schedule_timeout(5*HZ); + io_schedule_timeout(HZ/50); } finish_wait(&sbi->cp_wait, &wait); } @@ -1384,8 +1384,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && - !f2fs_cp_error(sbi)); /* * modify checkpoint @@ -1493,11 +1491,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && - !f2fs_cp_error(sbi)); + /* Wait for all dirty meta pages to be submitted for IO */ + f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META); /* wait for previous submitted meta pages writeback */ - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); @@ -1506,7 +1504,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); /* * invalidate intermediate page cache borrowed from meta inode which are diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 720f2f44ed66..53b4cfe4f5d2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3305,7 +3305,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); -void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi); +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); int __init f2fs_create_checkpoint_caches(void); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1ee49e33f480..cdd5152cee89 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1180,7 +1180,7 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); f2fs_bug_on(sbi, sbi->fsync_node_num); -- GitLab From e5bab07107b5198895491da4c61d0ebde515c8a6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 20 Feb 2020 20:50:37 -0800 Subject: [PATCH 0551/1278] f2fs: fix leaking uninitialized memory in compressed clusters When the compressed data of a cluster doesn't end on a page boundary, the remainder of the last page must be zeroed in order to avoid leaking uninitialized memory to disk. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 17e10c4cd880..44fd2d8df14f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -390,11 +390,15 @@ static int f2fs_compress_pages(struct compress_ctx *cc) for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); + nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); + + /* zero out any unused part of the last page */ + memset(&cc->cbuf->cdata[cc->clen], 0, + (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE)); + vunmap(cc->cbuf); vunmap(cc->rbuf); - nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); - for (i = nr_cpages; i < cc->nr_cpages; i++) { f2fs_put_compressed_page(cc->cpages[i]); cc->cpages[i] = NULL; -- GitLab From 43b0ade092231b6b5e3c55ae0dfc330aa2b9e273 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:17 +0800 Subject: [PATCH 0552/1278] f2fs: fix to avoid NULL pointer dereference Unable to handle kernel NULL pointer dereference at virtual address 00000000 PC is at f2fs_free_dic+0x60/0x2c8 LR is at f2fs_decompress_pages+0x3c4/0x3e8 f2fs_free_dic+0x60/0x2c8 f2fs_decompress_pages+0x3c4/0x3e8 __read_end_io+0x78/0x19c f2fs_post_read_work+0x6c/0x94 process_one_work+0x210/0x48c worker_thread+0x2e8/0x44c kthread+0x110/0x120 ret_from_fork+0x10/0x18 In f2fs_free_dic(), we can not use f2fs_put_page(,1) to release dic->tpages[i], as the page's mapping is NULL. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 44fd2d8df14f..ae316742c771 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1142,7 +1142,8 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) continue; - f2fs_put_page(dic->tpages[i], 1); + unlock_page(dic->tpages[i]); + put_page(dic->tpages[i]); } kfree(dic->tpages); } -- GitLab From a2e0d212941e4549c95f8fd7293a451aa1d660c1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:18 +0800 Subject: [PATCH 0553/1278] f2fs: recycle unused compress_data.chksum feild In Struct compress_data, chksum field was never used, remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 1 - fs/f2fs/f2fs.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index ae316742c771..fae3005150fe 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -385,7 +385,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc) } cc->cbuf->clen = cpu_to_le32(cc->clen); - cc->cbuf->chksum = cpu_to_le32(0); for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 53b4cfe4f5d2..03c737262916 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1212,10 +1212,9 @@ enum compress_algorithm_type { COMPRESS_MAX, }; -#define COMPRESS_DATA_RESERVED_SIZE 4 +#define COMPRESS_DATA_RESERVED_SIZE 5 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* checksum of compressed data */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; -- GitLab From 22476071300969cca68b9026929eaf0719e7fddc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:19 +0800 Subject: [PATCH 0554/1278] f2fs: add missing function name in kernel message Otherwise, we can not distinguish the exact location of messages, when there are more than one places printing same message. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 03c737262916..89b9edc78176 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2208,7 +2208,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, dquot_free_inode(inode); } else { if (unlikely(inode->i_blocks == 0)) { - f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%lu, iblocks:%llu", inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 622353db4d5a..fe63e74bd02a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1186,8 +1186,9 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), + "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } -- GitLab From e930e37f2ee05e209b1e86c4bfc0897759391664 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:16 +0800 Subject: [PATCH 0555/1278] f2fs: fix to avoid potential deadlock Using f2fs_trylock_op() in f2fs_write_compressed_pages() to avoid potential deadlock like we did in f2fs_write_single_data_page(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index fae3005150fe..4550d3531e2e 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -780,7 +780,6 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .encrypted_page = NULL, .compressed_page = NULL, .submitted = false, - .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, .encrypted = f2fs_encrypted_file(cc->inode), @@ -793,9 +792,10 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + if (!f2fs_trylock_op(sbi)) + return -EAGAIN; - f2fs_lock_op(sbi); + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (err) -- GitLab From 2d41ac81081fe70a59434c2bd5718ac376754c72 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 19 Mar 2020 07:43:40 -0700 Subject: [PATCH 0556/1278] ANDROID: Incremental fs: Fix two typos Test: incfs_test passes Bug: 151240628 Signed-off-by: Paul Lawrence Change-Id: I627e683b562329fd57aedc8393e22449ff09ee1f (cherry picked from commit 06c715e275dc65e720759cfeacb4120289e2a306) --- fs/incfs/data_mgmt.c | 2 +- fs/incfs/integrity.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index afdb3dfd3355..f260c7620088 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -797,7 +797,7 @@ static int process_file_signature_md(struct incfs_file_signature *sg, int error = 0; struct incfs_df_signature *signature = kzalloc(sizeof(*signature), GFP_NOFS); - void *buf = 0; + void *buf = NULL; ssize_t read; if (!df || !df->df_backing_file_context || diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index f8af9a83ea8a..96e016a91542 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -57,7 +57,7 @@ struct signature_info { struct mem_range root_hash; }; -static u32 read_u32(u8 **p, u8 *top, u32 *result) +static bool read_u32(u8 **p, u8 *top, u32 *result) { if (*p + sizeof(u32) > top) return false; -- GitLab From 76f5f1ca65594ae7b98ef1cc3c5ae194ea6cbdf4 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 18 Mar 2020 09:28:29 -0700 Subject: [PATCH 0557/1278] ANDROID: Incremental fs: Add INCFS_IOC_GET_FILLED_BLOCKS Test: incfs_test passes Bug: 151240628 Signed-off-by: Paul Lawrence Change-Id: I66d0ba1911adc5d68ed404585222e6a81a7eb94f (cherry picked from commit 8d963bb24076b60cb2de0f2d49deaff1d52e8270) --- fs/incfs/data_mgmt.c | 120 +++++- fs/incfs/data_mgmt.h | 12 +- fs/incfs/format.c | 49 ++- fs/incfs/format.h | 21 +- fs/incfs/vfs.c | 46 ++- include/uapi/linux/incrementalfs.h | 63 ++++ .../selftests/filesystems/incfs/incfs_test.c | 343 ++++++++++++++++++ 7 files changed, 614 insertions(+), 40 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index f260c7620088..1f9016b1a3f5 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -118,8 +118,8 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) error = mutex_lock_interruptible(&bfc->bc_mutex); if (error) goto out; - error = incfs_read_file_header(bfc, &df->df_metadata_off, - &df->df_id, &size); + error = incfs_read_file_header(bfc, &df->df_metadata_off, &df->df_id, + &size, &df->df_header_flags); mutex_unlock(&bfc->bc_mutex); if (error) @@ -127,7 +127,7 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) df->df_size = size; if (size > 0) - df->df_block_count = get_blocks_count_for_size(size); + df->df_data_block_count = get_blocks_count_for_size(size); md_records = incfs_scan_metadata_chain(df); if (md_records < 0) @@ -352,7 +352,7 @@ static int get_data_file_block(struct data_file *df, int index, blockmap_off = df->df_blockmap_off; bfc = df->df_backing_file_context; - if (index < 0 || index >= df->df_block_count || blockmap_off == 0) + if (index < 0 || blockmap_off == 0) return -EINVAL; error = incfs_read_blockmap_entry(bfc, index, blockmap_off, &bme); @@ -372,6 +372,96 @@ static int get_data_file_block(struct data_file *df, int index, return 0; } +static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, + u32 size, u32 *size_out) +{ + if (*size_out + sizeof(*range) > size) + return -ERANGE; + + if (copy_to_user(((char *)buffer) + *size_out, range, sizeof(*range))) + return -EFAULT; + + *size_out += sizeof(*range); + return 0; +} + +int incfs_get_filled_blocks(struct data_file *df, + struct incfs_get_filled_blocks_args *arg) +{ + int error = 0; + bool in_range = false; + struct incfs_filled_range range; + void *buffer = u64_to_user_ptr(arg->range_buffer); + u32 size = arg->range_buffer_size; + u32 end_index = + arg->end_index ? arg->end_index : df->df_total_block_count; + u32 *size_out = &arg->range_buffer_size_out; + + *size_out = 0; + if (end_index > df->df_total_block_count) + end_index = df->df_total_block_count; + arg->total_blocks_out = df->df_total_block_count; + + if (df->df_header_flags & INCFS_FILE_COMPLETE) { + pr_debug("File marked full, fast get_filled_blocks"); + if (arg->start_index > end_index) { + arg->index_out = arg->start_index; + return 0; + } + + range = (struct incfs_filled_range){ + .begin = arg->start_index, + .end = end_index, + }; + + arg->index_out = end_index; + return copy_one_range(&range, buffer, size, size_out); + } + + for (arg->index_out = arg->start_index; arg->index_out < end_index; + ++arg->index_out) { + struct data_file_block dfb; + + error = get_data_file_block(df, arg->index_out, &dfb); + if (error) + break; + + if (is_data_block_present(&dfb) == in_range) + continue; + + if (!in_range) { + in_range = true; + range.begin = arg->index_out; + } else { + range.end = arg->index_out; + error = copy_one_range(&range, buffer, size, size_out); + if (error) + break; + in_range = false; + } + } + + if (in_range) { + range.end = arg->index_out; + error = copy_one_range(&range, buffer, size, size_out); + } + + if (!error && in_range && arg->start_index == 0 && + end_index == df->df_total_block_count && + *size_out == sizeof(struct incfs_filled_range)) { + int result; + + df->df_header_flags |= INCFS_FILE_COMPLETE; + result = incfs_update_file_header_flags( + df->df_backing_file_context, df->df_header_flags); + + /* Log failure only, since it's just a failed optimization */ + pr_debug("Marked file full with result %d", result); + } + + return error; +} + static bool is_read_done(struct pending_read *read) { return atomic_read_acquire(&read->done) != 0; @@ -471,7 +561,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (!df || !res_block) return -EFAULT; - if (block_index < 0 || block_index >= df->df_block_count) + if (block_index < 0 || block_index >= df->df_data_block_count) return -EINVAL; if (df->df_blockmap_off <= 0) @@ -641,7 +731,7 @@ int incfs_process_new_data_block(struct data_file *df, bfc = df->df_backing_file_context; mi = df->df_mount_info; - if (block->block_index >= df->df_block_count) + if (block->block_index >= df->df_data_block_count) return -ERANGE; segment = get_file_segment(df, block->block_index); @@ -747,7 +837,7 @@ int incfs_process_new_hash_block(struct data_file *df, if (!error) error = incfs_write_hash_block_to_backing_file( bfc, range(data, block->data_len), block->block_index, - hash_area_base); + hash_area_base, df->df_blockmap_off, df->df_size); mutex_unlock(&bfc->bc_mutex); return error; } @@ -763,9 +853,10 @@ static int process_blockmap_md(struct incfs_blockmap *bm, if (!df) return -EFAULT; - if (df->df_block_count != block_count) + if (df->df_data_block_count > block_count) return -EBADMSG; + df->df_total_block_count = block_count; df->df_blockmap_off = base_off; return error; } @@ -830,7 +921,7 @@ static int process_file_signature_md(struct incfs_file_signature *sg, } hash_tree = incfs_alloc_mtree(range(buf, signature->sig_size), - df->df_block_count); + df->df_data_block_count); if (IS_ERR(hash_tree)) { error = PTR_ERR(hash_tree); hash_tree = NULL; @@ -912,6 +1003,17 @@ int incfs_scan_metadata_chain(struct data_file *df) result = records_count; } mutex_unlock(&bfc->bc_mutex); + + if (df->df_hash_tree) { + int hash_block_count = get_blocks_count_for_size( + df->df_hash_tree->hash_tree_area_size); + + if (df->df_data_block_count + hash_block_count != + df->df_total_block_count) + result = -EINVAL; + } else if (df->df_data_block_count != df->df_total_block_count) + result = -EINVAL; + out: kfree(handler); return result; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 01045403026a..41f74e68187d 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -209,7 +209,14 @@ struct data_file { /* File size in bytes */ loff_t df_size; - int df_block_count; /* File size in DATA_FILE_BLOCK_SIZE blocks */ + /* File header flags */ + u32 df_header_flags; + + /* File size in DATA_FILE_BLOCK_SIZE blocks */ + int df_data_block_count; + + /* Total number of blocks, data + hash */ + int df_total_block_count; struct file_attr n_attr; @@ -256,6 +263,9 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, int index, int timeout_ms, struct mem_range tmp); +int incfs_get_filled_blocks(struct data_file *df, + struct incfs_get_filled_blocks_args *arg); + int incfs_read_file_signature(struct data_file *df, struct mem_range dst); int incfs_process_new_data_block(struct data_file *df, diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 8c8213ee325d..96f4e3d54f58 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -13,6 +13,7 @@ #include #include "format.h" +#include "data_mgmt.h" struct backing_file_context *incfs_alloc_bfc(struct file *backing_file) { @@ -214,12 +215,23 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, return result; } +int incfs_update_file_header_flags(struct backing_file_context *bfc, u32 flags) +{ + if (!bfc) + return -EFAULT; + + return write_to_bf(bfc, &flags, sizeof(flags), + offsetof(struct incfs_file_header, + fh_file_header_flags), + false); +} + /* * Reserve 0-filled space for the blockmap body, and append * incfs_blockmap metadata record pointing to it. */ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, - u32 block_count, loff_t *map_base_off) + u32 block_count) { struct incfs_blockmap blockmap = {}; int result = 0; @@ -245,12 +257,9 @@ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, /* Write blockmap metadata record pointing to the body written above. */ blockmap.m_base_offset = cpu_to_le64(file_end); result = append_md_to_backing_file(bfc, &blockmap.m_header); - if (result) { + if (result) /* Error, rollback file changes */ truncate_backing_file(bfc, file_end); - } else if (map_base_off) { - *map_base_off = file_end; - } return result; } @@ -438,12 +447,19 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, } int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, - struct mem_range block, - int block_index, loff_t hash_area_off) + struct mem_range block, + int block_index, + loff_t hash_area_off, + loff_t bm_base_off, int file_size) { + struct incfs_blockmap_entry bm_entry = {}; + int result; loff_t data_offset = 0; loff_t file_end = 0; - + loff_t bm_entry_off = + bm_base_off + + sizeof(struct incfs_blockmap_entry) * + (block_index + get_blocks_count_for_size(file_size)); if (!bfc) return -EFAULT; @@ -457,7 +473,17 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, return -EINVAL; } - return write_to_bf(bfc, block.data, block.len, data_offset, false); + result = write_to_bf(bfc, block.data, block.len, data_offset, false); + if (result) + return result; + + bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); + bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); + bm_entry.me_data_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); + bm_entry.me_flags = cpu_to_le16(INCFS_BLOCK_HASH); + + return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off, + false); } /* Initialize a new image in a given backing file. */ @@ -517,10 +543,9 @@ int incfs_read_blockmap_entries(struct backing_file_context *bfc, return 0; } - int incfs_read_file_header(struct backing_file_context *bfc, loff_t *first_md_off, incfs_uuid_t *uuid, - u64 *file_size) + u64 *file_size, u32 *flags) { ssize_t bytes_read = 0; struct incfs_file_header fh = {}; @@ -554,6 +579,8 @@ int incfs_read_file_header(struct backing_file_context *bfc, *uuid = fh.fh_uuid; if (file_size) *file_size = le64_to_cpu(fh.fh_file_size); + if (flags) + *flags = le32_to_cpu(fh.fh_file_header_flags); return 0; } diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 55e6938b30d6..33e5ea4eba56 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -121,6 +121,10 @@ enum incfs_metadata_type { INCFS_MD_SIGNATURE = 3 }; +enum incfs_file_header_flags { + INCFS_FILE_COMPLETE = 1 << 0, +}; + /* Header included at the beginning of all metadata records on the disk. */ struct incfs_md_header { __u8 h_md_entry_type; @@ -159,8 +163,8 @@ struct incfs_file_header { /* INCFS_DATA_FILE_BLOCK_SIZE */ __le16 fh_data_block_size; - /* Padding, also reserved for future use. */ - __le32 fh_dummy; + /* File flags, from incfs_file_header_flags */ + __le32 fh_file_header_flags; /* Offset of the first metadata record */ __le64 fh_first_md_offset; @@ -178,6 +182,7 @@ struct incfs_file_header { enum incfs_block_map_entry_flags { INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), + INCFS_BLOCK_HASH = (1 << 1), }; /* Block map entry pointing to an actual location of the data block. */ @@ -284,7 +289,7 @@ void incfs_free_bfc(struct backing_file_context *bfc); /* Writing stuff */ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, - u32 block_count, loff_t *map_base_off); + u32 block_count); int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); @@ -295,8 +300,10 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, u16 flags); int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, - struct mem_range block, - int block_index, loff_t hash_area_off); + struct mem_range block, + int block_index, + loff_t hash_area_off, + loff_t bm_base_off, int file_size); int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, struct mem_range value, struct incfs_file_attr *attr); @@ -304,13 +311,15 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size); +int incfs_update_file_header_flags(struct backing_file_context *bfc, u32 flags); + int incfs_make_empty_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); /* Reading stuff */ int incfs_read_file_header(struct backing_file_context *bfc, loff_t *first_md_off, incfs_uuid_t *uuid, - u64 *file_size); + u64 *file_size, u32 *flags); int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, loff_t bm_base_off, diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index f911c5eb9290..f531e28a90c8 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -907,19 +907,6 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, if (error) goto out; - block_count = (u32)get_blocks_count_for_size(size); - error = incfs_write_blockmap_to_backing_file(bfc, block_count, NULL); - if (error) - goto out; - - /* This fill has data, reserve space for the block map. */ - if (block_count > 0) { - error = incfs_write_blockmap_to_backing_file( - bfc, block_count, NULL); - if (error) - goto out; - } - if (attr.data && attr.len) { error = incfs_write_file_attr_to_backing_file(bfc, attr, NULL); @@ -927,6 +914,8 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, goto out; } + block_count = (u32)get_blocks_count_for_size(size); + if (user_signature_info) { raw_signature = incfs_copy_signature_info_from_user( user_signature_info, signature_size); @@ -948,8 +937,16 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, bfc, raw_signature, hash_tree->hash_tree_area_size); if (error) goto out; + + block_count += get_blocks_count_for_size( + hash_tree->hash_tree_area_size); } + if (block_count) + error = incfs_write_blockmap_to_backing_file(bfc, block_count); + + if (error) + goto out; out: if (bfc) { mutex_unlock(&bfc->bc_mutex); @@ -1439,6 +1436,27 @@ static long ioctl_read_file_signature(struct file *f, void __user *arg) return error; } +static long ioctl_get_filled_blocks(struct file *f, void __user *arg) +{ + struct incfs_get_filled_blocks_args __user *args_usr_ptr = arg; + struct incfs_get_filled_blocks_args args = {}; + struct data_file *df = get_incfs_data_file(f); + int error; + + if (!df) + return -EINVAL; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + error = incfs_get_filled_blocks(df, &args); + + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) + return -EFAULT; + + return error; +} + static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { struct mount_info *mi = get_mount_info(file_superblock(f)); @@ -1452,6 +1470,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); + case INCFS_IOC_GET_FILLED_BLOCKS: + return ioctl_get_filled_blocks(f, (void __user *)arg); default: return -EINVAL; } diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index fd65f575cdf0..0fb1c86d2f9d 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -68,6 +68,30 @@ #define INCFS_IOC_PERMIT_FILL \ _IOW(INCFS_IOCTL_BASE_CODE, 33, struct incfs_permit_fill) +/* + * Fills buffer with ranges of populated blocks + * + * Returns 0 if all ranges written + * error otherwise + * + * Either way, range_buffer_size_out is set to the number + * of bytes written. Should be set to 0 by caller. The ranges + * filled are valid, but if an error was returned there might + * be more ranges to come. + * + * Ranges are ranges of filled blocks: + * + * 1 2 7 9 + * + * means blocks 1, 2, 7, 8, 9 are filled, 0, 3, 4, 5, 6 and 10 on + * are not + * + * If hashing is enabled for the file, the hash blocks are simply + * treated as though they immediately followed the data blocks. + */ +#define INCFS_IOC_GET_FILLED_BLOCKS \ + _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) + enum incfs_compression_alg { COMPRESSION_NONE = 0, COMPRESSION_LZ4 = 1 @@ -272,4 +296,43 @@ struct incfs_get_file_sig_args { __u32 file_signature_len_out; }; +struct incfs_filled_range { + __u32 begin; + __u32 end; +}; + +/* + * Request ranges of filled blocks + * Argument for INCFS_IOC_GET_FILLED_BLOCKS + */ +struct incfs_get_filled_blocks_args { + /* + * A buffer to populate with ranges of filled blocks + * + * Equivalent to struct incfs_filled_ranges *range_buffer + */ + __aligned_u64 range_buffer; + + /* Size of range_buffer */ + __u32 range_buffer_size; + + /* Start index to read from */ + __u32 start_index; + + /* + * End index to read to. 0 means read to end. This is a range, + * so incfs will read from start_index to end_index - 1 + */ + __u32 end_index; + + /* Actual number of blocks in file */ + __u32 total_blocks_out; + + /* Number of bytes written to range buffer */ + __u32 range_buffer_size_out; + + /* Sector scanned up to, if the call was interrupted */ + __u32 index_out; +}; + #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 7031561c0173..df2d224d322f 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2099,6 +2099,347 @@ static int read_log_test(char *mount_dir) return TEST_FAILURE; } +static int emit_partial_test_file_data(char *mount_dir, struct test_file *file) +{ + int i, j; + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int result = 0; + int blocks_written = 0; + + if (file->size == 0) + return 0; + + /* Emit 2 blocks, skip 2 blocks etc*/ + block_indexes = calloc(block_cnt, sizeof(*block_indexes)); + for (i = 0, j = 0; i < block_cnt; ++i) + if ((i & 2) == 0) { + block_indexes[j] = i; + ++j; + } + + for (i = 0; i < j; i += blocks_written) { + blocks_written = emit_test_blocks(mount_dir, file, + block_indexes + i, j - i); + if (blocks_written < 0) { + result = blocks_written; + goto out; + } + if (blocks_written == 0) { + result = -EIO; + goto out; + } + } +out: + free(block_indexes); + return result; +} + +static int validate_ranges(const char *mount_dir, struct test_file *file) +{ + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + char *filename = concat_file_name(mount_dir, file->name); + int fd; + struct incfs_filled_range ranges[128]; + struct incfs_get_filled_blocks_args fba = { + .range_buffer = ptr_to_u64(ranges), + .range_buffer_size = sizeof(ranges), + }; + int error = TEST_SUCCESS; + int i; + int range_cnt; + + fd = open(filename, O_RDONLY); + free(filename); + if (fd <= 0) + return TEST_FAILURE; + + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + if (error && errno != ERANGE) + goto out; + + if (error && errno == ERANGE && block_cnt < 509) + goto out; + + if (!error && block_cnt >= 509) { + error = -ERANGE; + goto out; + } + + if (fba.total_blocks_out != block_cnt) { + error = -EINVAL; + goto out; + } + + range_cnt = (block_cnt + 3) / 4; + if (range_cnt > 128) + range_cnt = 128; + if (range_cnt != fba.range_buffer_size_out / sizeof(*ranges)) { + error = -ERANGE; + goto out; + } + + error = TEST_SUCCESS; + for (i = 0; i < fba.range_buffer_size_out / sizeof(*ranges) - 1; ++i) + if (ranges[i].begin != i * 4 || ranges[i].end != i * 4 + 2) { + error = -EINVAL; + goto out; + } + + if (ranges[i].begin != i * 4 || + (ranges[i].end != i * 4 + 1 && ranges[i].end != i * 4 + 2)) { + error = -EINVAL; + goto out; + } + + for (i = 0; i < 64; ++i) { + fba.start_index = i * 2; + fba.end_index = i * 2 + 2; + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + if (error) + goto out; + + if (fba.total_blocks_out != block_cnt) { + error = -EINVAL; + goto out; + } + + if (fba.start_index >= block_cnt) { + if (fba.index_out != fba.start_index) { + printf("Paul: %d, %d\n", (int)fba.index_out, + (int)fba.start_index); + error = -EINVAL; + goto out; + } + + break; + } + + if (i % 2) { + if (fba.range_buffer_size_out != 0) { + error = -EINVAL; + goto out; + } + } else { + if (fba.range_buffer_size_out != sizeof(*ranges)) { + error = -EINVAL; + goto out; + } + + if (ranges[0].begin != i * 2) { + error = -EINVAL; + goto out; + } + + if (ranges[0].end != i * 2 + 1 && + ranges[0].end != i * 2 + 2) { + error = -EINVAL; + goto out; + } + } + } + +out: + close(fd); + return error; +} + +static int get_blocks_test(char *mount_dir) +{ + char *backing_dir; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, file->size, + NULL)) + goto failure; + + if (emit_partial_test_file_data(mount_dir, file)) + goto failure; + } + + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_ranges(mount_dir, file)) + goto failure; + + /* + * The smallest files are filled completely, so this checks that + * the fast get_filled_blocks path is not causing issues + */ + if (validate_ranges(mount_dir, file)) + goto failure; + } + + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return TEST_FAILURE; +} + +static int emit_partial_test_file_hash(char *mount_dir, struct test_file *file) +{ + int err; + int fd; + struct incfs_fill_blocks fill_blocks = { + .count = 1, + }; + struct incfs_fill_block *fill_block_array = + calloc(fill_blocks.count, sizeof(struct incfs_fill_block)); + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + + if (file->size <= 4096 / 32 * 4096) + return 0; + + if (fill_blocks.count == 0) + return 0; + + if (!fill_block_array) + return -ENOMEM; + fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); + + rnd_buf(data, sizeof(data), 0); + + fill_block_array[0] = + (struct incfs_fill_block){ .block_index = 1, + .data_len = + INCFS_DATA_FILE_BLOCK_SIZE, + .data = ptr_to_u64(data), + .flags = INCFS_BLOCK_FLAGS_HASH }; + + fd = open_file_by_id(mount_dir, file->id, true); + if (fd < 0) { + err = errno; + goto failure; + } + + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); + if (err < fill_blocks.count) + err = errno; + else + err = 0; + +failure: + free(fill_block_array); + return err; +} + +static int validate_hash_ranges(const char *mount_dir, struct test_file *file) +{ + char *filename = concat_file_name(mount_dir, file->name); + int fd; + struct incfs_filled_range ranges[128]; + struct incfs_get_filled_blocks_args fba = { + .range_buffer = ptr_to_u64(ranges), + .range_buffer_size = sizeof(ranges), + }; + int error = TEST_SUCCESS; + int file_blocks = (file->size + INCFS_DATA_FILE_BLOCK_SIZE - 1) / + INCFS_DATA_FILE_BLOCK_SIZE; + + if (file->size <= 4096 / 32 * 4096) + return 0; + + fd = open(filename, O_RDONLY); + free(filename); + if (fd <= 0) + return TEST_FAILURE; + + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + if (error) + goto out; + + if (fba.range_buffer_size_out != sizeof(struct incfs_filled_range)) { + error = -EINVAL; + goto out; + } + + if (ranges[0].begin != file_blocks + 1 || + ranges[0].end != file_blocks + 2) { + error = -EINVAL; + goto out; + } + +out: + close(fd); + return error; +} + +static int get_hash_blocks_test(char *mount_dir) +{ + char *backing_dir; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.add_data)) + goto failure; + + if (emit_partial_test_file_hash(mount_dir, file)) + goto failure; + } + + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_hash_ranges(mount_dir, file)) + goto failure; + } + + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return TEST_FAILURE; +} + static char *setup_mount_dir() { struct stat st; @@ -2166,6 +2507,8 @@ int main(int argc, char *argv[]) MAKE_TEST(multiple_providers_test), MAKE_TEST(hash_tree_test), MAKE_TEST(read_log_test), + MAKE_TEST(get_blocks_test), + MAKE_TEST(get_hash_blocks_test), }; #undef MAKE_TEST -- GitLab From dc4d38d5b5378f5953e94b47dee40fc1eb48f011 Mon Sep 17 00:00:00 2001 From: "glider@google.com" Date: Thu, 12 Mar 2020 16:59:20 +0100 Subject: [PATCH 0558/1278] FROMLIST: arm64: define __alloc_zeroed_user_highpage When running the kernel with init_on_alloc=1, calling the default implementation of __alloc_zeroed_user_highpage() from include/linux/highmem.h leads to double-initialization of the allocated page (first by the page allocator, then by clear_user_page(). Calling alloc_page_vma() with __GFP_ZERO, similarly to e.g. x86, seems to be enough to ensure the user page is zeroed only once. Signed-off-by: Alexander Potapenko Signed-off-by: Catalin Marinas Link: https://lore.kernel.org/linux-arm-kernel/20200312155920.50067-1-glider@google.com/ Signed-off-by: Alexander Potapenko Change-Id: I3ff7c3fb082b420491313e0fdd841ce340989d72 --- arch/arm64/include/asm/page.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2..6b9c3025c817 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -32,6 +32,10 @@ extern void __cpu_copy_user_page(void *to, const void *from, extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + #define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) #define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) -- GitLab From dfd92c69e3fe2f27362b6ba24fda9ad29ab82bb5 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Sun, 29 Mar 2020 23:14:34 -0700 Subject: [PATCH 0559/1278] ANDROID: clang: update to 10.0.5 Bug: 151950245 Signed-off-by: Alistair Delva Change-Id: I81669ba0a201de3e5d0139166d613681cdb1b4e1 --- build.config.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.common b/build.config.common index be3afbc1c6b5..6176b14b9b88 100644 --- a/build.config.common +++ b/build.config.common @@ -3,7 +3,7 @@ KERNEL_DIR=common CC=clang LD=ld.lld -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r377782b/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r377782c/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' -- GitLab From eaa0fdc0fa297521551ce493d55fe6e4a3c59995 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 11 Dec 2019 16:38:57 +0900 Subject: [PATCH 0560/1278] UPSTREAM: usb: gadget: move choice ... endchoice to legacy/Kconfig (Upstream commit 10e5e6c2496354f0afec82dba459339c421badbf.) drivers/usb/gadget/Kconfig includes drivers/usb/gadget/legacy/Kconfig inside the 'choice' block. The current Kconfig allows this, but I'd like to discourage this usage. People tend to mess up the structure without noticing that entire drivers/usb/gadget/legacy/Kconfig is placed in the choice context. In fact, legacy/Kconfig mixes up bool and tristate in the choice, and creates nested choice, etc. This commit does not change the behavior, but it will help people notice how badly this Kconfig file is written. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20191211073857.16780-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman Bug: 147413187 Signed-off-by: Andrey Konovalov Change-Id: Ib2b96fd9c2acb681b2186eb89ec1f0f2b18d0e89 --- drivers/usb/gadget/Kconfig | 28 ---------------------------- drivers/usb/gadget/legacy/Kconfig | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 9141b8cc9b24..d11762085c98 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -513,34 +513,6 @@ config USB_CONFIGFS_F_TCM Both protocols can work on USB2.0 and USB3.0. UAS utilizes the USB 3.0 feature called streams support. -choice - tristate "USB Gadget precomposed configurations" - default USB_ETH - optional - help - A Linux "Gadget Driver" talks to the USB Peripheral Controller - driver through the abstract "gadget" API. Some other operating - systems call these "client" drivers, of which "class drivers" - are a subset (implementing a USB device class specification). - A gadget driver implements one or more USB functions using - the peripheral hardware. - - Gadget drivers are hardware-neutral, or "platform independent", - except that they sometimes must understand quirks or limitations - of the particular controllers they work with. For example, when - a controller doesn't support alternate configurations or provide - enough of the right types of endpoints, the gadget driver might - not be able work with that controller, or might need to implement - a less common variant of a device class protocol. - - The available choices each represent a single precomposed USB - gadget configuration. In the device model, each option contains - both the device instantiation as a child for a USB gadget - controller, and the relevant drivers for each function declared - by the device. - source "drivers/usb/gadget/legacy/Kconfig" -endchoice - endif # USB_GADGET diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig index a12fb459dbd9..75416e54624b 100644 --- a/drivers/usb/gadget/legacy/Kconfig +++ b/drivers/usb/gadget/legacy/Kconfig @@ -13,6 +13,32 @@ # both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG). # +choice + tristate "USB Gadget precomposed configurations" + default USB_ETH + optional + help + A Linux "Gadget Driver" talks to the USB Peripheral Controller + driver through the abstract "gadget" API. Some other operating + systems call these "client" drivers, of which "class drivers" + are a subset (implementing a USB device class specification). + A gadget driver implements one or more USB functions using + the peripheral hardware. + + Gadget drivers are hardware-neutral, or "platform independent", + except that they sometimes must understand quirks or limitations + of the particular controllers they work with. For example, when + a controller doesn't support alternate configurations or provide + enough of the right types of endpoints, the gadget driver might + not be able work with that controller, or might need to implement + a less common variant of a device class protocol. + + The available choices each represent a single precomposed USB + gadget configuration. In the device model, each option contains + both the device instantiation as a child for a USB gadget + controller, and the relevant drivers for each function declared + by the device. + config USB_ZERO tristate "Gadget Zero (DEVELOPMENT)" select USB_LIBCOMPOSITE @@ -490,3 +516,5 @@ config USB_G_WEBCAM Say "y" to link the driver statically, or "m" to build a dynamically linked module called "g_webcam". + +endchoice -- GitLab From f706056cab017dc49dcebfcc0d6706bb774cf329 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 24 Feb 2020 17:13:03 +0100 Subject: [PATCH 0561/1278] BACKPORT: usb: gadget: add raw-gadget interface Conflict: documentation/usb/index.rst is missing. (Upstream commit f2c2e717642c66f7fe7e5dd69b2e8ff5849f4d10.) USB Raw Gadget is a kernel module that provides a userspace interface for the USB Gadget subsystem. Essentially it allows to emulate USB devices from userspace. Enabled with CONFIG_USB_RAW_GADGET. Raw Gadget is currently a strictly debugging feature and shouldn't be used in production. Raw Gadget is similar to GadgetFS, but provides a more low-level and direct access to the USB Gadget layer for the userspace. The key differences are: 1. Every USB request is passed to the userspace to get a response, while GadgetFS responds to some USB requests internally based on the provided descriptors. However note, that the UDC driver might respond to some requests on its own and never forward them to the Gadget layer. 2. GadgetFS performs some sanity checks on the provided USB descriptors, while Raw Gadget allows you to provide arbitrary data as responses to USB requests. 3. Raw Gadget provides a way to select a UDC device/driver to bind to, while GadgetFS currently binds to the first available UDC. 4. Raw Gadget uses predictable endpoint names (handles) across different UDCs (as long as UDCs have enough endpoints of each required transfer type). 5. Raw Gadget has ioctl-based interface instead of a filesystem-based one. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Andrey Konovalov Signed-off-by: Felipe Balbi Bug: 147413187 Signed-off-by: Andrey Konovalov Change-Id: Ib48759bda0651e685160c532bed55b7335a39034 --- Documentation/usb/raw-gadget.rst | 61 ++ drivers/usb/gadget/legacy/Kconfig | 11 + drivers/usb/gadget/legacy/Makefile | 1 + drivers/usb/gadget/legacy/raw_gadget.c | 1078 ++++++++++++++++++++++++ include/uapi/linux/usb/raw_gadget.h | 167 ++++ 5 files changed, 1318 insertions(+) create mode 100644 Documentation/usb/raw-gadget.rst create mode 100644 drivers/usb/gadget/legacy/raw_gadget.c create mode 100644 include/uapi/linux/usb/raw_gadget.h diff --git a/Documentation/usb/raw-gadget.rst b/Documentation/usb/raw-gadget.rst new file mode 100644 index 000000000000..9e78cb858f86 --- /dev/null +++ b/Documentation/usb/raw-gadget.rst @@ -0,0 +1,61 @@ +============== +USB Raw Gadget +============== + +USB Raw Gadget is a kernel module that provides a userspace interface for +the USB Gadget subsystem. Essentially it allows to emulate USB devices +from userspace. Enabled with CONFIG_USB_RAW_GADGET. Raw Gadget is +currently a strictly debugging feature and shouldn't be used in +production, use GadgetFS instead. + +Comparison to GadgetFS +~~~~~~~~~~~~~~~~~~~~~~ + +Raw Gadget is similar to GadgetFS, but provides a more low-level and +direct access to the USB Gadget layer for the userspace. The key +differences are: + +1. Every USB request is passed to the userspace to get a response, while + GadgetFS responds to some USB requests internally based on the provided + descriptors. However note, that the UDC driver might respond to some + requests on its own and never forward them to the Gadget layer. + +2. GadgetFS performs some sanity checks on the provided USB descriptors, + while Raw Gadget allows you to provide arbitrary data as responses to + USB requests. + +3. Raw Gadget provides a way to select a UDC device/driver to bind to, + while GadgetFS currently binds to the first available UDC. + +4. Raw Gadget uses predictable endpoint names (handles) across different + UDCs (as long as UDCs have enough endpoints of each required transfer + type). + +5. Raw Gadget has ioctl-based interface instead of a filesystem-based one. + +Userspace interface +~~~~~~~~~~~~~~~~~~~ + +To create a Raw Gadget instance open /dev/raw-gadget. Multiple raw-gadget +instances (bound to different UDCs) can be used at the same time. The +interaction with the opened file happens through the ioctl() calls, see +comments in include/uapi/linux/usb/raw_gadget.h for details. + +The typical usage of Raw Gadget looks like: + +1. Open Raw Gadget instance via /dev/raw-gadget. +2. Initialize the instance via USB_RAW_IOCTL_INIT. +3. Launch the instance with USB_RAW_IOCTL_RUN. +4. In a loop issue USB_RAW_IOCTL_EVENT_FETCH calls to receive events from + Raw Gadget and react to those depending on what kind of USB device + needs to be emulated. + +Potential future improvements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Implement ioctl's for setting/clearing halt status on endpoints. + +- Reporting more events (suspend, resume, etc.) through + USB_RAW_IOCTL_EVENT_FETCH. + +- Support O_NONBLOCK I/O. diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig index 75416e54624b..49b50e6efe22 100644 --- a/drivers/usb/gadget/legacy/Kconfig +++ b/drivers/usb/gadget/legacy/Kconfig @@ -517,4 +517,15 @@ config USB_G_WEBCAM Say "y" to link the driver statically, or "m" to build a dynamically linked module called "g_webcam". +config USB_RAW_GADGET + tristate "USB Raw Gadget" + help + USB Raw Gadget is a kernel module that provides a userspace interface + for the USB Gadget subsystem. Essentially it allows to emulate USB + devices from userspace. See Documentation/usb/raw-gadget.rst for + details. + + Say "y" to link the driver statically, or "m" to build a + dynamically linked module called "raw_gadget". + endchoice diff --git a/drivers/usb/gadget/legacy/Makefile b/drivers/usb/gadget/legacy/Makefile index abd0c3e66a05..4d864bf82799 100644 --- a/drivers/usb/gadget/legacy/Makefile +++ b/drivers/usb/gadget/legacy/Makefile @@ -43,3 +43,4 @@ obj-$(CONFIG_USB_G_WEBCAM) += g_webcam.o obj-$(CONFIG_USB_G_NCM) += g_ncm.o obj-$(CONFIG_USB_G_ACM_MS) += g_acm_ms.o obj-$(CONFIG_USB_GADGET_TARGET) += tcm_usb_gadget.o +obj-$(CONFIG_USB_RAW_GADGET) += raw_gadget.o diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c new file mode 100644 index 000000000000..76406343fbe5 --- /dev/null +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -0,0 +1,1078 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * USB Raw Gadget driver. + * See Documentation/usb/raw-gadget.rst for more details. + * + * Andrey Konovalov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define DRIVER_DESC "USB Raw Gadget" +#define DRIVER_NAME "raw-gadget" + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR("Andrey Konovalov"); +MODULE_LICENSE("GPL"); + +/*----------------------------------------------------------------------*/ + +#define RAW_EVENT_QUEUE_SIZE 16 + +struct raw_event_queue { + /* See the comment in raw_event_queue_fetch() for locking details. */ + spinlock_t lock; + struct semaphore sema; + struct usb_raw_event *events[RAW_EVENT_QUEUE_SIZE]; + int size; +}; + +static void raw_event_queue_init(struct raw_event_queue *queue) +{ + spin_lock_init(&queue->lock); + sema_init(&queue->sema, 0); + queue->size = 0; +} + +static int raw_event_queue_add(struct raw_event_queue *queue, + enum usb_raw_event_type type, size_t length, const void *data) +{ + unsigned long flags; + struct usb_raw_event *event; + + spin_lock_irqsave(&queue->lock, flags); + if (WARN_ON(queue->size >= RAW_EVENT_QUEUE_SIZE)) { + spin_unlock_irqrestore(&queue->lock, flags); + return -ENOMEM; + } + event = kmalloc(sizeof(*event) + length, GFP_ATOMIC); + if (!event) { + spin_unlock_irqrestore(&queue->lock, flags); + return -ENOMEM; + } + event->type = type; + event->length = length; + if (event->length) + memcpy(&event->data[0], data, length); + queue->events[queue->size] = event; + queue->size++; + up(&queue->sema); + spin_unlock_irqrestore(&queue->lock, flags); + return 0; +} + +static struct usb_raw_event *raw_event_queue_fetch( + struct raw_event_queue *queue) +{ + unsigned long flags; + struct usb_raw_event *event; + + /* + * This function can be called concurrently. We first check that + * there's at least one event queued by decrementing the semaphore, + * and then take the lock to protect queue struct fields. + */ + if (down_interruptible(&queue->sema)) + return NULL; + spin_lock_irqsave(&queue->lock, flags); + if (WARN_ON(!queue->size)) + return NULL; + event = queue->events[0]; + queue->size--; + memmove(&queue->events[0], &queue->events[1], + queue->size * sizeof(queue->events[0])); + spin_unlock_irqrestore(&queue->lock, flags); + return event; +} + +static void raw_event_queue_destroy(struct raw_event_queue *queue) +{ + int i; + + for (i = 0; i < queue->size; i++) + kfree(queue->events[i]); + queue->size = 0; +} + +/*----------------------------------------------------------------------*/ + +struct raw_dev; + +#define USB_RAW_MAX_ENDPOINTS 32 + +enum ep_state { + STATE_EP_DISABLED, + STATE_EP_ENABLED, +}; + +struct raw_ep { + struct raw_dev *dev; + enum ep_state state; + struct usb_ep *ep; + struct usb_request *req; + bool urb_queued; + bool disabling; + ssize_t status; +}; + +enum dev_state { + STATE_DEV_INVALID = 0, + STATE_DEV_OPENED, + STATE_DEV_INITIALIZED, + STATE_DEV_RUNNING, + STATE_DEV_CLOSED, + STATE_DEV_FAILED +}; + +struct raw_dev { + struct kref count; + spinlock_t lock; + + const char *udc_name; + struct usb_gadget_driver driver; + + /* Reference to misc device: */ + struct device *dev; + + /* Protected by lock: */ + enum dev_state state; + bool gadget_registered; + struct usb_gadget *gadget; + struct usb_request *req; + bool ep0_in_pending; + bool ep0_out_pending; + bool ep0_urb_queued; + ssize_t ep0_status; + struct raw_ep eps[USB_RAW_MAX_ENDPOINTS]; + + struct completion ep0_done; + struct raw_event_queue queue; +}; + +static struct raw_dev *dev_new(void) +{ + struct raw_dev *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + /* Matches kref_put() in raw_release(). */ + kref_init(&dev->count); + spin_lock_init(&dev->lock); + init_completion(&dev->ep0_done); + raw_event_queue_init(&dev->queue); + return dev; +} + +static void dev_free(struct kref *kref) +{ + struct raw_dev *dev = container_of(kref, struct raw_dev, count); + int i; + + kfree(dev->udc_name); + kfree(dev->driver.udc_name); + if (dev->req) { + if (dev->ep0_urb_queued) + usb_ep_dequeue(dev->gadget->ep0, dev->req); + usb_ep_free_request(dev->gadget->ep0, dev->req); + } + raw_event_queue_destroy(&dev->queue); + for (i = 0; i < USB_RAW_MAX_ENDPOINTS; i++) { + if (dev->eps[i].state != STATE_EP_ENABLED) + continue; + usb_ep_disable(dev->eps[i].ep); + usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req); + kfree(dev->eps[i].ep->desc); + dev->eps[i].state = STATE_EP_DISABLED; + } + kfree(dev); +} + +/*----------------------------------------------------------------------*/ + +static int raw_queue_event(struct raw_dev *dev, + enum usb_raw_event_type type, size_t length, const void *data) +{ + int ret = 0; + unsigned long flags; + + ret = raw_event_queue_add(&dev->queue, type, length, data); + if (ret < 0) { + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_FAILED; + spin_unlock_irqrestore(&dev->lock, flags); + } + return ret; +} + +static void gadget_ep0_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct raw_dev *dev = req->context; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (req->status) + dev->ep0_status = req->status; + else + dev->ep0_status = req->actual; + if (dev->ep0_in_pending) + dev->ep0_in_pending = false; + else + dev->ep0_out_pending = false; + spin_unlock_irqrestore(&dev->lock, flags); + + complete(&dev->ep0_done); +} + +static int gadget_bind(struct usb_gadget *gadget, + struct usb_gadget_driver *driver) +{ + int ret = 0; + struct raw_dev *dev = container_of(driver, struct raw_dev, driver); + struct usb_request *req; + unsigned long flags; + + if (strcmp(gadget->name, dev->udc_name) != 0) + return -ENODEV; + + set_gadget_data(gadget, dev); + req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL); + if (!req) { + dev_err(&gadget->dev, "usb_ep_alloc_request failed\n"); + set_gadget_data(gadget, NULL); + return -ENOMEM; + } + + spin_lock_irqsave(&dev->lock, flags); + dev->req = req; + dev->req->context = dev; + dev->req->complete = gadget_ep0_complete; + dev->gadget = gadget; + spin_unlock_irqrestore(&dev->lock, flags); + + /* Matches kref_put() in gadget_unbind(). */ + kref_get(&dev->count); + + ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL); + if (ret < 0) + dev_err(&gadget->dev, "failed to queue event\n"); + + return ret; +} + +static void gadget_unbind(struct usb_gadget *gadget) +{ + struct raw_dev *dev = get_gadget_data(gadget); + + set_gadget_data(gadget, NULL); + /* Matches kref_get() in gadget_bind(). */ + kref_put(&dev->count, dev_free); +} + +static int gadget_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *ctrl) +{ + int ret = 0; + struct raw_dev *dev = get_gadget_data(gadget); + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_err(&gadget->dev, "ignoring, device is not running\n"); + ret = -ENODEV; + goto out_unlock; + } + if (dev->ep0_in_pending || dev->ep0_out_pending) { + dev_dbg(&gadget->dev, "stalling, request already pending\n"); + ret = -EBUSY; + goto out_unlock; + } + if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) + dev->ep0_in_pending = true; + else + dev->ep0_out_pending = true; + spin_unlock_irqrestore(&dev->lock, flags); + + ret = raw_queue_event(dev, USB_RAW_EVENT_CONTROL, sizeof(*ctrl), ctrl); + if (ret < 0) + dev_err(&gadget->dev, "failed to queue event\n"); + goto out; + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); +out: + return ret; +} + +/* These are currently unused but present in case UDC driver requires them. */ +static void gadget_disconnect(struct usb_gadget *gadget) { } +static void gadget_suspend(struct usb_gadget *gadget) { } +static void gadget_resume(struct usb_gadget *gadget) { } +static void gadget_reset(struct usb_gadget *gadget) { } + +/*----------------------------------------------------------------------*/ + +static struct miscdevice raw_misc_device; + +static int raw_open(struct inode *inode, struct file *fd) +{ + struct raw_dev *dev; + + /* Nonblocking I/O is not supported yet. */ + if (fd->f_flags & O_NONBLOCK) + return -EINVAL; + + dev = dev_new(); + if (!dev) + return -ENOMEM; + fd->private_data = dev; + dev->state = STATE_DEV_OPENED; + dev->dev = raw_misc_device.this_device; + return 0; +} + +static int raw_release(struct inode *inode, struct file *fd) +{ + int ret = 0; + struct raw_dev *dev = fd->private_data; + unsigned long flags; + bool unregister = false; + + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_CLOSED; + if (!dev->gadget) { + spin_unlock_irqrestore(&dev->lock, flags); + goto out_put; + } + if (dev->gadget_registered) + unregister = true; + dev->gadget_registered = false; + spin_unlock_irqrestore(&dev->lock, flags); + + if (unregister) { + ret = usb_gadget_unregister_driver(&dev->driver); + if (ret != 0) + dev_err(dev->dev, + "usb_gadget_unregister_driver() failed with %d\n", + ret); + /* Matches kref_get() in raw_ioctl_run(). */ + kref_put(&dev->count, dev_free); + } + +out_put: + /* Matches dev_new() in raw_open(). */ + kref_put(&dev->count, dev_free); + return ret; +} + +/*----------------------------------------------------------------------*/ + +static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + struct usb_raw_init arg; + char *udc_driver_name; + char *udc_device_name; + unsigned long flags; + + ret = copy_from_user(&arg, (void __user *)value, sizeof(arg)); + if (ret) + return ret; + + switch (arg.speed) { + case USB_SPEED_UNKNOWN: + arg.speed = USB_SPEED_HIGH; + break; + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + return -EINVAL; + } + + udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); + if (!udc_driver_name) + return -ENOMEM; + ret = strscpy(udc_driver_name, &arg.driver_name[0], + UDC_NAME_LENGTH_MAX); + if (ret < 0) { + kfree(udc_driver_name); + return ret; + } + ret = 0; + + udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); + if (!udc_device_name) { + kfree(udc_driver_name); + return -ENOMEM; + } + ret = strscpy(udc_device_name, &arg.device_name[0], + UDC_NAME_LENGTH_MAX); + if (ret < 0) { + kfree(udc_driver_name); + kfree(udc_device_name); + return ret; + } + ret = 0; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_OPENED) { + dev_dbg(dev->dev, "fail, device is not opened\n"); + kfree(udc_driver_name); + kfree(udc_device_name); + ret = -EINVAL; + goto out_unlock; + } + dev->udc_name = udc_driver_name; + + dev->driver.function = DRIVER_DESC; + dev->driver.max_speed = arg.speed; + dev->driver.setup = gadget_setup; + dev->driver.disconnect = gadget_disconnect; + dev->driver.bind = gadget_bind; + dev->driver.unbind = gadget_unbind; + dev->driver.suspend = gadget_suspend; + dev->driver.resume = gadget_resume; + dev->driver.reset = gadget_reset; + dev->driver.driver.name = DRIVER_NAME; + dev->driver.udc_name = udc_device_name; + dev->driver.match_existing_only = 1; + + dev->state = STATE_DEV_INITIALIZED; + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + unsigned long flags; + + if (value) + return -EINVAL; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_INITIALIZED) { + dev_dbg(dev->dev, "fail, device is not initialized\n"); + ret = -EINVAL; + goto out_unlock; + } + spin_unlock_irqrestore(&dev->lock, flags); + + ret = usb_gadget_probe_driver(&dev->driver); + + spin_lock_irqsave(&dev->lock, flags); + if (ret) { + dev_err(dev->dev, + "fail, usb_gadget_probe_driver returned %d\n", ret); + dev->state = STATE_DEV_FAILED; + goto out_unlock; + } + dev->gadget_registered = true; + dev->state = STATE_DEV_RUNNING; + /* Matches kref_put() in raw_release(). */ + kref_get(&dev->count); + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_event_fetch(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + struct usb_raw_event arg; + unsigned long flags; + struct usb_raw_event *event; + uint32_t length; + + ret = copy_from_user(&arg, (void __user *)value, sizeof(arg)); + if (ret) + return ret; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + spin_unlock_irqrestore(&dev->lock, flags); + return -EINVAL; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + spin_unlock_irqrestore(&dev->lock, flags); + return -EBUSY; + } + spin_unlock_irqrestore(&dev->lock, flags); + + event = raw_event_queue_fetch(&dev->queue); + if (!event) { + dev_dbg(&dev->gadget->dev, "event fetching interrupted\n"); + return -EINTR; + } + length = min(arg.length, event->length); + ret = copy_to_user((void __user *)value, event, + sizeof(*event) + length); + return ret; +} + +static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr, + bool get_from_user) +{ + int ret; + void *data; + + ret = copy_from_user(io, ptr, sizeof(*io)); + if (ret) + return ERR_PTR(ret); + if (io->ep >= USB_RAW_MAX_ENDPOINTS) + return ERR_PTR(-EINVAL); + if (!usb_raw_io_flags_valid(io->flags)) + return ERR_PTR(-EINVAL); + if (io->length > PAGE_SIZE) + return ERR_PTR(-EINVAL); + if (get_from_user) + data = memdup_user(ptr + sizeof(*io), io->length); + else { + data = kmalloc(io->length, GFP_KERNEL); + if (!data) + data = ERR_PTR(-ENOMEM); + } + return data; +} + +static int raw_process_ep0_io(struct raw_dev *dev, struct usb_raw_ep_io *io, + void *data, bool in) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + if (dev->ep0_urb_queued) { + dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); + ret = -EBUSY; + goto out_unlock; + } + if ((in && !dev->ep0_in_pending) || + (!in && !dev->ep0_out_pending)) { + dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); + ret = -EBUSY; + goto out_unlock; + } + if (WARN_ON(in && dev->ep0_out_pending)) { + ret = -ENODEV; + dev->state = STATE_DEV_FAILED; + goto out_done; + } + if (WARN_ON(!in && dev->ep0_in_pending)) { + ret = -ENODEV; + dev->state = STATE_DEV_FAILED; + goto out_done; + } + + dev->req->buf = data; + dev->req->length = io->length; + dev->req->zero = usb_raw_io_flags_zero(io->flags); + dev->ep0_urb_queued = true; + spin_unlock_irqrestore(&dev->lock, flags); + + ret = usb_ep_queue(dev->gadget->ep0, dev->req, GFP_KERNEL); + if (ret) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_queue returned %d\n", ret); + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_FAILED; + goto out_done; + } + + ret = wait_for_completion_interruptible(&dev->ep0_done); + if (ret) { + dev_dbg(&dev->gadget->dev, "wait interrupted\n"); + usb_ep_dequeue(dev->gadget->ep0, dev->req); + wait_for_completion(&dev->ep0_done); + spin_lock_irqsave(&dev->lock, flags); + goto out_done; + } + + spin_lock_irqsave(&dev->lock, flags); + ret = dev->ep0_status; + +out_done: + dev->ep0_urb_queued = false; +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_ep0_write(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + void *data; + struct usb_raw_ep_io io; + + data = raw_alloc_io_data(&io, (void __user *)value, true); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep0_io(dev, &io, data, true); + kfree(data); + return ret; +} + +static int raw_ioctl_ep0_read(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + void *data; + struct usb_raw_ep_io io; + unsigned int length; + + data = raw_alloc_io_data(&io, (void __user *)value, false); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep0_io(dev, &io, data, false); + if (ret < 0) { + kfree(data); + return ret; + } + length = min(io.length, (unsigned int)ret); + ret = copy_to_user((void __user *)(value + sizeof(io)), data, length); + kfree(data); + return ret; +} + +static bool check_ep_caps(struct usb_ep *ep, + struct usb_endpoint_descriptor *desc) +{ + switch (usb_endpoint_type(desc)) { + case USB_ENDPOINT_XFER_ISOC: + if (!ep->caps.type_iso) + return false; + break; + case USB_ENDPOINT_XFER_BULK: + if (!ep->caps.type_bulk) + return false; + break; + case USB_ENDPOINT_XFER_INT: + if (!ep->caps.type_int) + return false; + break; + default: + return false; + } + + if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in) + return false; + if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out) + return false; + + return true; +} + +static int raw_ioctl_ep_enable(struct raw_dev *dev, unsigned long value) +{ + int ret = 0, i; + unsigned long flags; + struct usb_endpoint_descriptor *desc; + struct usb_ep *ep = NULL; + + desc = memdup_user((void __user *)value, sizeof(*desc)); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + /* + * Endpoints with a maxpacket length of 0 can cause crashes in UDC + * drivers. + */ + if (usb_endpoint_maxp(desc) == 0) { + dev_dbg(dev->dev, "fail, bad endpoint maxpacket\n"); + kfree(desc); + return -EINVAL; + } + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_free; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_free; + } + + for (i = 0; i < USB_RAW_MAX_ENDPOINTS; i++) { + if (dev->eps[i].state == STATE_EP_ENABLED) + continue; + break; + } + if (i == USB_RAW_MAX_ENDPOINTS) { + dev_dbg(&dev->gadget->dev, + "fail, no device endpoints available\n"); + ret = -EBUSY; + goto out_free; + } + + gadget_for_each_ep(ep, dev->gadget) { + if (ep->enabled) + continue; + if (!check_ep_caps(ep, desc)) + continue; + ep->desc = desc; + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_enable returned %d\n", ret); + goto out_free; + } + dev->eps[i].req = usb_ep_alloc_request(ep, GFP_ATOMIC); + if (!dev->eps[i].req) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_alloc_request failed\n"); + usb_ep_disable(ep); + ret = -ENOMEM; + goto out_free; + } + dev->eps[i].ep = ep; + dev->eps[i].state = STATE_EP_ENABLED; + ep->driver_data = &dev->eps[i]; + ret = i; + goto out_unlock; + } + + dev_dbg(&dev->gadget->dev, "fail, no gadget endpoints available\n"); + ret = -EBUSY; + +out_free: + kfree(desc); +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_ep_disable(struct raw_dev *dev, unsigned long value) +{ + int ret = 0, i = value; + unsigned long flags; + const void *desc; + + if (i < 0 || i >= USB_RAW_MAX_ENDPOINTS) + return -EINVAL; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + if (dev->eps[i].state != STATE_EP_ENABLED) { + dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + if (dev->eps[i].disabling) { + dev_dbg(&dev->gadget->dev, + "fail, disable already in progress\n"); + ret = -EINVAL; + goto out_unlock; + } + if (dev->eps[i].urb_queued) { + dev_dbg(&dev->gadget->dev, + "fail, waiting for urb completion\n"); + ret = -EINVAL; + goto out_unlock; + } + dev->eps[i].disabling = true; + spin_unlock_irqrestore(&dev->lock, flags); + + usb_ep_disable(dev->eps[i].ep); + + spin_lock_irqsave(&dev->lock, flags); + usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req); + desc = dev->eps[i].ep->desc; + dev->eps[i].ep = NULL; + dev->eps[i].state = STATE_EP_DISABLED; + kfree(desc); + dev->eps[i].disabling = false; + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static void gadget_ep_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct raw_ep *r_ep = (struct raw_ep *)ep->driver_data; + struct raw_dev *dev = r_ep->dev; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (req->status) + r_ep->status = req->status; + else + r_ep->status = req->actual; + spin_unlock_irqrestore(&dev->lock, flags); + + complete((struct completion *)req->context); +} + +static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io, + void *data, bool in) +{ + int ret = 0; + unsigned long flags; + struct raw_ep *ep = &dev->eps[io->ep]; + DECLARE_COMPLETION_ONSTACK(done); + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + if (ep->state != STATE_EP_ENABLED) { + dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); + ret = -EBUSY; + goto out_unlock; + } + if (ep->disabling) { + dev_dbg(&dev->gadget->dev, + "fail, endpoint is already being disabled\n"); + ret = -EBUSY; + goto out_unlock; + } + if (ep->urb_queued) { + dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); + ret = -EBUSY; + goto out_unlock; + } + if ((in && !ep->ep->caps.dir_in) || (!in && ep->ep->caps.dir_in)) { + dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); + ret = -EINVAL; + goto out_unlock; + } + + ep->dev = dev; + ep->req->context = &done; + ep->req->complete = gadget_ep_complete; + ep->req->buf = data; + ep->req->length = io->length; + ep->req->zero = usb_raw_io_flags_zero(io->flags); + ep->urb_queued = true; + spin_unlock_irqrestore(&dev->lock, flags); + + ret = usb_ep_queue(ep->ep, ep->req, GFP_KERNEL); + if (ret) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_queue returned %d\n", ret); + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_FAILED; + goto out_done; + } + + ret = wait_for_completion_interruptible(&done); + if (ret) { + dev_dbg(&dev->gadget->dev, "wait interrupted\n"); + usb_ep_dequeue(ep->ep, ep->req); + wait_for_completion(&done); + spin_lock_irqsave(&dev->lock, flags); + goto out_done; + } + + spin_lock_irqsave(&dev->lock, flags); + ret = ep->status; + +out_done: + ep->urb_queued = false; +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_ep_write(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + char *data; + struct usb_raw_ep_io io; + + data = raw_alloc_io_data(&io, (void __user *)value, true); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep_io(dev, &io, data, true); + kfree(data); + return ret; +} + +static int raw_ioctl_ep_read(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + char *data; + struct usb_raw_ep_io io; + unsigned int length; + + data = raw_alloc_io_data(&io, (void __user *)value, false); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep_io(dev, &io, data, false); + if (ret < 0) { + kfree(data); + return ret; + } + length = min(io.length, (unsigned int)ret); + ret = copy_to_user((void __user *)(value + sizeof(io)), data, length); + kfree(data); + return ret; +} + +static int raw_ioctl_configure(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + unsigned long flags; + + if (value) + return -EINVAL; + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + usb_gadget_set_state(dev->gadget, USB_STATE_CONFIGURED); + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_vbus_draw(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + usb_gadget_vbus_draw(dev->gadget, 2 * value); + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static long raw_ioctl(struct file *fd, unsigned int cmd, unsigned long value) +{ + struct raw_dev *dev = fd->private_data; + int ret = 0; + + if (!dev) + return -EBUSY; + + switch (cmd) { + case USB_RAW_IOCTL_INIT: + ret = raw_ioctl_init(dev, value); + break; + case USB_RAW_IOCTL_RUN: + ret = raw_ioctl_run(dev, value); + break; + case USB_RAW_IOCTL_EVENT_FETCH: + ret = raw_ioctl_event_fetch(dev, value); + break; + case USB_RAW_IOCTL_EP0_WRITE: + ret = raw_ioctl_ep0_write(dev, value); + break; + case USB_RAW_IOCTL_EP0_READ: + ret = raw_ioctl_ep0_read(dev, value); + break; + case USB_RAW_IOCTL_EP_ENABLE: + ret = raw_ioctl_ep_enable(dev, value); + break; + case USB_RAW_IOCTL_EP_DISABLE: + ret = raw_ioctl_ep_disable(dev, value); + break; + case USB_RAW_IOCTL_EP_WRITE: + ret = raw_ioctl_ep_write(dev, value); + break; + case USB_RAW_IOCTL_EP_READ: + ret = raw_ioctl_ep_read(dev, value); + break; + case USB_RAW_IOCTL_CONFIGURE: + ret = raw_ioctl_configure(dev, value); + break; + case USB_RAW_IOCTL_VBUS_DRAW: + ret = raw_ioctl_vbus_draw(dev, value); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/*----------------------------------------------------------------------*/ + +static const struct file_operations raw_fops = { + .open = raw_open, + .unlocked_ioctl = raw_ioctl, + .compat_ioctl = raw_ioctl, + .release = raw_release, + .llseek = no_llseek, +}; + +static struct miscdevice raw_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = DRIVER_NAME, + .fops = &raw_fops, +}; + +module_misc_device(raw_misc_device); diff --git a/include/uapi/linux/usb/raw_gadget.h b/include/uapi/linux/usb/raw_gadget.h new file mode 100644 index 000000000000..00cbded71061 --- /dev/null +++ b/include/uapi/linux/usb/raw_gadget.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * USB Raw Gadget driver. + * + * See Documentation/usb/raw-gadget.rst for more details. + */ + +#ifndef _UAPI__LINUX_USB_RAW_GADGET_H +#define _UAPI__LINUX_USB_RAW_GADGET_H + +#include +#include +#include + +/* Maximum length of driver_name/device_name in the usb_raw_init struct. */ +#define UDC_NAME_LENGTH_MAX 128 + +/* + * struct usb_raw_init - argument for USB_RAW_IOCTL_INIT ioctl. + * @speed: The speed of the emulated USB device, takes the same values as + * the usb_device_speed enum: USB_SPEED_FULL, USB_SPEED_HIGH, etc. + * @driver_name: The name of the UDC driver. + * @device_name: The name of a UDC instance. + * + * The last two fields identify a UDC the gadget driver should bind to. + * For example, Dummy UDC has "dummy_udc" as its driver_name and "dummy_udc.N" + * as its device_name, where N in the index of the Dummy UDC instance. + * At the same time the dwc2 driver that is used on Raspberry Pi Zero, has + * "20980000.usb" as both driver_name and device_name. + */ +struct usb_raw_init { + __u8 driver_name[UDC_NAME_LENGTH_MAX]; + __u8 device_name[UDC_NAME_LENGTH_MAX]; + __u8 speed; +}; + +/* The type of event fetched with the USB_RAW_IOCTL_EVENT_FETCH ioctl. */ +enum usb_raw_event_type { + USB_RAW_EVENT_INVALID = 0, + + /* This event is queued when the driver has bound to a UDC. */ + USB_RAW_EVENT_CONNECT = 1, + + /* This event is queued when a new control request arrived to ep0. */ + USB_RAW_EVENT_CONTROL = 2, + + /* The list might grow in the future. */ +}; + +/* + * struct usb_raw_event - argument for USB_RAW_IOCTL_EVENT_FETCH ioctl. + * @type: The type of the fetched event. + * @length: Length of the data buffer. Updated by the driver and set to the + * actual length of the fetched event data. + * @data: A buffer to store the fetched event data. + * + * Currently the fetched data buffer is empty for USB_RAW_EVENT_CONNECT, + * and contains struct usb_ctrlrequest for USB_RAW_EVENT_CONTROL. + */ +struct usb_raw_event { + __u32 type; + __u32 length; + __u8 data[0]; +}; + +#define USB_RAW_IO_FLAGS_ZERO 0x0001 +#define USB_RAW_IO_FLAGS_MASK 0x0001 + +static int usb_raw_io_flags_valid(__u16 flags) +{ + return (flags & ~USB_RAW_IO_FLAGS_MASK) == 0; +} + +static int usb_raw_io_flags_zero(__u16 flags) +{ + return (flags & USB_RAW_IO_FLAGS_ZERO); +} + +/* + * struct usb_raw_ep_io - argument for USB_RAW_IOCTL_EP0/EP_WRITE/READ ioctls. + * @ep: Endpoint handle as returned by USB_RAW_IOCTL_EP_ENABLE for + * USB_RAW_IOCTL_EP_WRITE/READ. Ignored for USB_RAW_IOCTL_EP0_WRITE/READ. + * @flags: When USB_RAW_IO_FLAGS_ZERO is specified, the zero flag is set on + * the submitted USB request, see include/linux/usb/gadget.h for details. + * @length: Length of data. + * @data: Data to send for USB_RAW_IOCTL_EP0/EP_WRITE. Buffer to store received + * data for USB_RAW_IOCTL_EP0/EP_READ. + */ +struct usb_raw_ep_io { + __u16 ep; + __u16 flags; + __u32 length; + __u8 data[0]; +}; + +/* + * Initializes a Raw Gadget instance. + * Accepts a pointer to the usb_raw_init struct as an argument. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_INIT _IOW('U', 0, struct usb_raw_init) + +/* + * Instructs Raw Gadget to bind to a UDC and start emulating a USB device. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_RUN _IO('U', 1) + +/* + * A blocking ioctl that waits for an event and returns fetched event data to + * the user. + * Accepts a pointer to the usb_raw_event struct. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_EVENT_FETCH _IOR('U', 2, struct usb_raw_event) + +/* + * Queues an IN (OUT for READ) urb as a response to the last control request + * received on endpoint 0, provided that was an IN (OUT for READ) request and + * waits until the urb is completed. Copies received data to user for READ. + * Accepts a pointer to the usb_raw_ep_io struct as an argument. + * Returns length of trasferred data on success or negative error code on + * failure. + */ +#define USB_RAW_IOCTL_EP0_WRITE _IOW('U', 3, struct usb_raw_ep_io) +#define USB_RAW_IOCTL_EP0_READ _IOWR('U', 4, struct usb_raw_ep_io) + +/* + * Finds an endpoint that supports the transfer type specified in the + * descriptor and enables it. + * Accepts a pointer to the usb_endpoint_descriptor struct as an argument. + * Returns enabled endpoint handle on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_EP_ENABLE _IOW('U', 5, struct usb_endpoint_descriptor) + +/* Disables specified endpoint. + * Accepts endpoint handle as an argument. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_EP_DISABLE _IOW('U', 6, __u32) + +/* + * Queues an IN (OUT for READ) urb as a response to the last control request + * received on endpoint usb_raw_ep_io.ep, provided that was an IN (OUT for READ) + * request and waits until the urb is completed. Copies received data to user + * for READ. + * Accepts a pointer to the usb_raw_ep_io struct as an argument. + * Returns length of trasferred data on success or negative error code on + * failure. + */ +#define USB_RAW_IOCTL_EP_WRITE _IOW('U', 7, struct usb_raw_ep_io) +#define USB_RAW_IOCTL_EP_READ _IOWR('U', 8, struct usb_raw_ep_io) + +/* + * Switches the gadget into the configured state. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_CONFIGURE _IO('U', 9) + +/* + * Constrains UDC VBUS power usage. + * Accepts current limit in 2 mA units as an argument. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_VBUS_DRAW _IOW('U', 10, __u32) + +#endif /* _UAPI__LINUX_USB_RAW_GADGET_H */ -- GitLab From c1a6370ee684266c191d773106bcd9895ed7e555 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 17 Mar 2020 15:54:31 +0100 Subject: [PATCH 0562/1278] UPSTREAM: usb: raw_gadget: fix compilation warnings in uapi headers (Upstream commit 956ae8df7fed0a2acc7ae934f24493eaeb031f62.) Mark usb_raw_io_flags_valid() and usb_raw_io_flags_zero() as inline to fix the following warnings: ./usr/include/linux/usb/raw_gadget.h:69:12: warning: unused function 'usb_raw_io_flags_valid' [-Wunused-function] ./usr/include/linux/usb/raw_gadget.h:74:12: warning: unused function 'usb_raw_io_flags_zero' [-Wunused-function] Reported-by: kernelci.org bot Reported-by: Stephen Rothwell Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/6206b80b3810f95bfe1d452de45596609a07b6ea.1584456779.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman Bug: 147413187 Signed-off-by: Andrey Konovalov Change-Id: Ief46cc5076f202fe058508ee11839708395842dd --- include/uapi/linux/usb/raw_gadget.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/usb/raw_gadget.h b/include/uapi/linux/usb/raw_gadget.h index 00cbded71061..ea375082b3ac 100644 --- a/include/uapi/linux/usb/raw_gadget.h +++ b/include/uapi/linux/usb/raw_gadget.h @@ -66,12 +66,12 @@ struct usb_raw_event { #define USB_RAW_IO_FLAGS_ZERO 0x0001 #define USB_RAW_IO_FLAGS_MASK 0x0001 -static int usb_raw_io_flags_valid(__u16 flags) +static inline int usb_raw_io_flags_valid(__u16 flags) { return (flags & ~USB_RAW_IO_FLAGS_MASK) == 0; } -static int usb_raw_io_flags_zero(__u16 flags) +static inline int usb_raw_io_flags_zero(__u16 flags) { return (flags & USB_RAW_IO_FLAGS_ZERO); } -- GitLab From e9c80bd9a5a68801d02623a28406e2449a904a5e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:49 -0700 Subject: [PATCH 0563/1278] UPSTREAM: fscrypt: add FS_IOC_GET_ENCRYPTION_NONCE ioctl Add an ioctl FS_IOC_GET_ENCRYPTION_NONCE which retrieves the nonce from an encrypted file or directory. The nonce is the 16-byte random value stored in the inode's encryption xattr. It is normally used together with the master key to derive the inode's actual encryption key. The nonces are needed by automated tests that verify the correctness of the ciphertext on-disk. Except for the IV_INO_LBLK_64 case, there's no way to replicate a file's ciphertext without knowing that file's nonce. The nonces aren't secret, and the existing ciphertext verification tests in xfstests retrieve them from disk using debugfs or dump.f2fs. But in environments that lack these debugging tools, getting the nonces by manually parsing the filesystem structure would be very hard. To make this important type of testing much easier, let's just add an ioctl that retrieves the nonce. Link: https://lore.kernel.org/r/20200314205052.93294-2-ebiggers@kernel.org Reviewed-by: Theodore Ts'o (cherry picked from commit e98ad464750c0894bc560d10503dae8ff90ccdac) Bug: 151100202 Change-Id: Ieef2c57b9257ae38eb2e5c1a018ca6f325bb62dd Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 11 +++++++++++ fs/crypto/fscrypt_private.h | 20 ++++++++++++++++++++ fs/crypto/keysetup.c | 16 ++-------------- fs/crypto/policy.c | 21 ++++++++++++++++++++- include/linux/fscrypt.h | 6 ++++++ include/uapi/linux/fscrypt.h | 1 + 6 files changed, 60 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index fbcd185d15da..dc444b8d3704 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -633,6 +633,17 @@ from a passphrase or other low-entropy user credential. FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to generate and manage any needed salt(s) in userspace. +Getting a file's encryption nonce +--------------------------------- + +Since Linux v5.7, the ioctl FS_IOC_GET_ENCRYPTION_NONCE is supported. +On encrypted files and directories it gets the inode's 16-byte nonce. +On unencrypted files and directories, it fails with ENODATA. + +This ioctl can be useful for automated tests which verify that the +encryption is being done correctly. It is not needed for normal use +of fscrypt. + Adding keys ----------- diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index f78cc4dfb452..f262f823fd58 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -78,6 +78,26 @@ static inline int fscrypt_context_size(const union fscrypt_context *ctx) return 0; } +/* Check whether an fscrypt_context has a recognized version number and size */ +static inline bool fscrypt_context_is_valid(const union fscrypt_context *ctx, + int ctx_size) +{ + return ctx_size >= 1 && ctx_size == fscrypt_context_size(ctx); +} + +/* Retrieve the context's nonce, assuming the context was already validated */ +static inline const u8 *fscrypt_context_nonce(const union fscrypt_context *ctx) +{ + switch (ctx->version) { + case FSCRYPT_CONTEXT_V1: + return ctx->v1.nonce; + case FSCRYPT_CONTEXT_V2: + return ctx->v2.nonce; + } + WARN_ON(1); + return NULL; +} + #undef fscrypt_policy union fscrypt_policy { u8 version; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index f4c6d8cb4587..e52bbbe68dc8 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -494,20 +494,8 @@ int fscrypt_get_encryption_info(struct inode *inode) goto out; } - switch (ctx.version) { - case FSCRYPT_CONTEXT_V1: - memcpy(crypt_info->ci_nonce, ctx.v1.nonce, - FS_KEY_DERIVATION_NONCE_SIZE); - break; - case FSCRYPT_CONTEXT_V2: - memcpy(crypt_info->ci_nonce, ctx.v2.nonce, - FS_KEY_DERIVATION_NONCE_SIZE); - break; - default: - WARN_ON(1); - res = -EINVAL; - goto out; - } + memcpy(crypt_info->ci_nonce, fscrypt_context_nonce(&ctx), + FS_KEY_DERIVATION_NONCE_SIZE); if (!fscrypt_supported_policy(&crypt_info->ci_policy, inode)) { res = -EINVAL; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index cf2a9d26ef7d..10ccf945020c 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -258,7 +258,7 @@ int fscrypt_policy_from_context(union fscrypt_policy *policy_u, { memset(policy_u, 0, sizeof(*policy_u)); - if (ctx_size <= 0 || ctx_size != fscrypt_context_size(ctx_u)) + if (!fscrypt_context_is_valid(ctx_u, ctx_size)) return -EINVAL; switch (ctx_u->version) { @@ -481,6 +481,25 @@ int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *uarg) } EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_policy_ex); +/* FS_IOC_GET_ENCRYPTION_NONCE: retrieve file's encryption nonce for testing */ +int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) +{ + struct inode *inode = file_inode(filp); + union fscrypt_context ctx; + int ret; + + ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); + if (ret < 0) + return ret; + if (!fscrypt_context_is_valid(&ctx, ret)) + return -EINVAL; + if (copy_to_user(arg, fscrypt_context_nonce(&ctx), + FS_KEY_DERIVATION_NONCE_SIZE)) + return -EFAULT; + return 0; +} +EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_nonce); + /** * fscrypt_has_permitted_context() - is a file's encryption policy permitted * within its directory? diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index db2dbc0b972a..2a9cd6c5f361 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -144,6 +144,7 @@ extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *); extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *); +extern int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); extern int fscrypt_has_permitted_context(struct inode *, struct inode *); extern int fscrypt_inherit_context(struct inode *, struct inode *, void *, bool); @@ -302,6 +303,11 @@ static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, return -EOPNOTSUPP; } +static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 0c9a8b81d357..320a6381576e 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -166,6 +166,7 @@ struct fscrypt_get_key_status_arg { #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg) #define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg) +#define FS_IOC_GET_ENCRYPTION_NONCE _IOR('f', 27, __u8[16]) /**********************************************************************/ -- GitLab From 10e5acfbee42fd3d9bebf8209128d636679d1153 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:50 -0700 Subject: [PATCH 0564/1278] UPSTREAM: ext4: wire up FS_IOC_GET_ENCRYPTION_NONCE This new ioctl retrieves a file's encryption nonce, which is useful for testing. See the corresponding fs/crypto/ patch for more details. Link: https://lore.kernel.org/r/20200314205052.93294-3-ebiggers@kernel.org Reviewed-by: Theodore Ts'o (cherry picked from commit 7ec9f3b47aba0fe715bf3472ed80e91c37970363) Bug: 151100202 Change-Id: I85350aed66285b92444d37c8cd840fb03d2ca25d Signed-off-by: Eric Biggers --- fs/ext4/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f852d90a1562..5239395de225 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1100,6 +1100,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EOPNOTSUPP; return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); + case FS_IOC_GET_ENCRYPTION_NONCE: + if (!ext4_has_feature_encrypt(sb)) + return -EOPNOTSUPP; + return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); + case EXT4_IOC_FSGETXATTR: { struct fsxattr fa; @@ -1243,6 +1248,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + case FS_IOC_GET_ENCRYPTION_NONCE: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: -- GitLab From 379237bb9d7b140740b5487ead8d64a3ff0ca8b5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:51 -0700 Subject: [PATCH 0565/1278] UPSTREAM: f2fs: wire up FS_IOC_GET_ENCRYPTION_NONCE This new ioctl retrieves a file's encryption nonce, which is useful for testing. See the corresponding fs/crypto/ patch for more details. Link: https://lore.kernel.org/r/20200314205052.93294-4-ebiggers@kernel.org (cherry picked from commit ee446e1af4ff90945e2273ae0af322b8b745b8cc) Bug: 151100202 Change-Id: I26037efc77774c04951da887d8557a15ce6a23ed Signed-off-by: Eric Biggers --- fs/f2fs/file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f77fa8430d6f..e4e4961c3dd3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2440,6 +2440,14 @@ static int f2fs_ioc_get_encryption_key_status(struct file *filp, return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); } +static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); +} + static int f2fs_ioc_gc(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -3392,6 +3400,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_remove_encryption_key_all_users(filp, arg); case FS_IOC_GET_ENCRYPTION_KEY_STATUS: return f2fs_ioc_get_encryption_key_status(filp, arg); + case FS_IOC_GET_ENCRYPTION_NONCE: + return f2fs_ioc_get_encryption_nonce(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); case F2FS_IOC_GARBAGE_COLLECT_RANGE: @@ -3561,6 +3571,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + case FS_IOC_GET_ENCRYPTION_NONCE: case F2FS_IOC_GARBAGE_COLLECT: case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: -- GitLab From c57952b019ae4245dfd0d974405a2e8029af2cb3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:52 -0700 Subject: [PATCH 0566/1278] UPSTREAM: ubifs: wire up FS_IOC_GET_ENCRYPTION_NONCE This new ioctl retrieves a file's encryption nonce, which is useful for testing. See the corresponding fs/crypto/ patch for more details. Link: https://lore.kernel.org/r/20200314205052.93294-5-ebiggers@kernel.org Change-Id: I8f15e873b41e036bc039119a579beac1b78f937b Bug: 151100202 (cherry picked from commit 861261f2a9cc488c845fc214d9035f7a11094591) Signed-off-by: Eric Biggers --- fs/ubifs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 203067d39855..daa59b98c8b4 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -229,6 +229,9 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: return fscrypt_ioctl_get_key_status(file, (void __user *)arg); + case FS_IOC_GET_ENCRYPTION_NONCE: + return fscrypt_ioctl_get_nonce(file, (void __user *)arg); + default: return -ENOTTY; } @@ -251,6 +254,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + case FS_IOC_GET_ENCRYPTION_NONCE: break; default: return -ENOIOCTLCMD; -- GitLab From 227507aa68648f6926b57ac7338047665c5d4fc1 Mon Sep 17 00:00:00 2001 From: Yuji Sasaki Date: Fri, 14 Feb 2020 13:13:40 +0530 Subject: [PATCH 0567/1278] spi: qup: call spi_qup_pm_resume_runtime before suspending [ Upstream commit 136b5cd2e2f97581ae560cff0db2a3b5369112da ] spi_qup_suspend() will cause synchronous external abort when runtime suspend is enabled and applied, as it tries to access SPI controller register while clock is already disabled in spi_qup_pm_suspend_runtime(). Signed-off-by: Yuji sasaki Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20200214074340.2286170-1-vkoul@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-qup.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index 974a8ce58b68..cb74fd1af205 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1190,6 +1190,11 @@ static int spi_qup_suspend(struct device *device) struct spi_qup *controller = spi_master_get_devdata(master); int ret; + if (pm_runtime_suspended(device)) { + ret = spi_qup_pm_resume_runtime(device); + if (ret) + return ret; + } ret = spi_master_suspend(master); if (ret) return ret; @@ -1198,10 +1203,8 @@ static int spi_qup_suspend(struct device *device) if (ret) return ret; - if (!pm_runtime_suspended(device)) { - clk_disable_unprepare(controller->cclk); - clk_disable_unprepare(controller->iclk); - } + clk_disable_unprepare(controller->cclk); + clk_disable_unprepare(controller->iclk); return 0; } -- GitLab From 9e92bbac2d92c72fff268e0fe447adc3bcc9e28e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Feb 2020 17:01:32 +0530 Subject: [PATCH 0568/1278] powerpc: Include .BTF section [ Upstream commit cb0cc635c7a9fa8a3a0f75d4d896721819c63add ] Selecting CONFIG_DEBUG_INFO_BTF results in the below warning from ld: ld: warning: orphan section `.BTF' from `.btf.vmlinux.bin.o' being placed in section `.BTF' Include .BTF section in vmlinux explicitly to fix the same. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200220113132.857132-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/vmlinux.lds.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b0cf4af7ba84..e4da937d6cf9 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -317,6 +317,12 @@ SECTIONS *(.branch_lt) } +#ifdef CONFIG_DEBUG_INFO_BTF + .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { + *(.BTF) + } +#endif + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { *(.opd) } -- GitLab From 5567dc658363d57edda0a000e5edabaacf4ee7d4 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 28 Jan 2020 12:11:47 +0530 Subject: [PATCH 0569/1278] ARM: dts: dra7: Add "dma-ranges" property to PCIe RC DT nodes [ Upstream commit 27f13774654ea6bd0b6fc9b97cce8d19e5735661 ] 'dma-ranges' in a PCI bridge node does correctly set dma masks for PCI devices not described in the DT. Certain DRA7 platforms (e.g., DRA76) has RAM above 32-bit boundary (accessible with LPAE config) though the PCIe bridge will be able to access only 32-bits. Add 'dma-ranges' property in PCIe RC DT nodes to indicate the host bridge can access only 32 bits. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/dra7.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index fec965009b9f..a40a7af85d02 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -302,6 +302,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; @@ -356,6 +357,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; -- GitLab From 80211ce55d8ca17deeebdf90171493f597f01bb4 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Tue, 11 Feb 2020 14:37:00 -0800 Subject: [PATCH 0570/1278] spi: pxa2xx: Add CS control clock quirk [ Upstream commit 683f65ded66a9a7ff01ed7280804d2132ebfdf7e ] In some circumstances on Intel LPSS controllers, toggling the LPSS CS control register doesn't actually cause the CS line to toggle. This seems to be failure of dynamic clock gating that occurs after going through a suspend/resume transition, where the controller is sent through a reset transition. This ruins SPI transactions that either rely on delay_usecs, or toggle the CS line without sending data. Whenever CS is toggled, momentarily set the clock gating register to "Force On" to poke the controller into acting on CS. Signed-off-by: Rajat Jain Signed-off-by: Evan Green Link: https://lore.kernel.org/r/20200211223700.110252-1-rajatja@google.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-pxa2xx.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index b2245cdce230..5160e16d3a98 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -76,6 +76,10 @@ MODULE_ALIAS("platform:pxa2xx-spi"); #define LPSS_CAPS_CS_EN_SHIFT 9 #define LPSS_CAPS_CS_EN_MASK (0xf << LPSS_CAPS_CS_EN_SHIFT) +#define LPSS_PRIV_CLOCK_GATE 0x38 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3 + struct lpss_config { /* LPSS offset from drv_data->ioaddr */ unsigned offset; @@ -92,6 +96,8 @@ struct lpss_config { unsigned cs_sel_shift; unsigned cs_sel_mask; unsigned cs_num; + /* Quirks */ + unsigned cs_clk_stays_gated : 1; }; /* Keep these sorted with enum pxa_ssp_type */ @@ -162,6 +168,7 @@ static const struct lpss_config lpss_platforms[] = { .tx_threshold_hi = 56, .cs_sel_shift = 8, .cs_sel_mask = 3 << 8, + .cs_clk_stays_gated = true, }, }; @@ -385,6 +392,22 @@ static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable) else value |= LPSS_CS_CONTROL_CS_HIGH; __lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value); + if (config->cs_clk_stays_gated) { + u32 clkgate; + + /* + * Changing CS alone when dynamic clock gating is on won't + * actually flip CS at that time. This ruins SPI transfers + * that specify delays, or have no data. Toggle the clock mode + * to force on briefly to poke the CS pin to move. + */ + clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE); + value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) | + LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON; + + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value); + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate); + } } static void cs_assert(struct driver_data *drv_data) -- GitLab From caf5689f36a7af2f4949068ac95f56b0a055eafa Mon Sep 17 00:00:00 2001 From: Thommy Jakobsson Date: Mon, 24 Feb 2020 17:26:43 +0100 Subject: [PATCH 0571/1278] spi/zynqmp: remove entry that causes a cs glitch [ Upstream commit 5dd8304981ecffa77bb72b1c57c4be5dfe6cfae9 ] In the public interface for chipselect, there is always an entry commented as "Dummy generic FIFO entry" pushed down to the fifo right after the activate/deactivate command. The dummy entry is 0x0, irregardless if the intention was to activate or deactive the cs. This causes the cs line to glitch rather than beeing activated in the case when there was an activate command. This has been observed on oscilloscope, and have caused problems for at least one specific flash device type connected to the qspi port. After the change the glitch is gone and cs goes active when intended. The reason why this worked before (except for the glitch) was because when sending the actual data, the CS bits are once again set. Since most flashes uses mode 0, there is always a half clk period anyway for cs to clk active setup time. If someone would rely on timing from a chip_select call to a transfer_one, it would fail though. It is unknown why the dummy entry was there in the first place, git log seems to be of no help in this case. The reference manual gives no indication of the necessity of this. In fact the lower 8 bits are a setup (or hold in case of deactivate) time expressed in cycles. So this should not be needed to fulfill any setup/hold timings. Signed-off-by: Thommy Jakobsson Reviewed-by: Naga Sureshkumar Relli Link: https://lore.kernel.org/r/20200224162643.29102-1-thommyj@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-zynqmp-gqspi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 18aeaceee286..d26c0eda2d9e 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -415,9 +415,6 @@ static void zynqmp_qspi_chipselect(struct spi_device *qspi, bool is_high) zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry); - /* Dummy generic FIFO entry */ - zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0); - /* Manually start the generic FIFO command */ zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) | -- GitLab From 793918d828f64f5413c2a6f118591ff1814cba69 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 27 Feb 2020 08:06:37 +0100 Subject: [PATCH 0572/1278] drm/exynos: dsi: propagate error value and silence meaningless warning [ Upstream commit 0a9d1e3f3f038785ebc72d53f1c409d07f6b4ff5 ] Properly propagate error value from devm_regulator_bulk_get() and don't confuse user with meaningless warning about failure in getting regulators in case of deferred probe. Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 7904ffa9abfb..aef18f807e38 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1739,8 +1739,9 @@ static int exynos_dsi_probe(struct platform_device *pdev) ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret) { - dev_info(dev, "failed to get regulators: %d\n", ret); - return -EPROBE_DEFER; + if (ret != -EPROBE_DEFER) + dev_info(dev, "failed to get regulators: %d\n", ret); + return ret; } dsi->clks = devm_kzalloc(dev, -- GitLab From cb18f536e0f013e3cb1fd9cd90c33f9e24d3892e Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 20 Feb 2020 13:30:12 +0100 Subject: [PATCH 0573/1278] drm/exynos: dsi: fix workaround for the legacy clock name [ Upstream commit c0fd99d659ba5582e09625c7a985d63fc2ca74b5 ] Writing to the built-in strings arrays doesn't work if driver is loaded as kernel module. This is also considered as a bad pattern. Fix this by adding a call to clk_get() with legacy clock name. This fixes following kernel oops if driver is loaded as module: Unable to handle kernel paging request at virtual address bf047978 pgd = (ptrval) [bf047978] *pgd=59344811, *pte=5903c6df, *ppte=5903c65f Internal error: Oops: 80f [#1] SMP ARM Modules linked in: mc exynosdrm(+) analogix_dp rtc_s3c exynos_ppmu i2c_gpio CPU: 1 PID: 212 Comm: systemd-udevd Not tainted 5.6.0-rc2-next-20200219 #326 videodev: Linux video capture interface: v2.00 Hardware name: Samsung Exynos (Flattened Device Tree) PC is at exynos_dsi_probe+0x1f0/0x384 [exynosdrm] LR is at exynos_dsi_probe+0x1dc/0x384 [exynosdrm] ... Process systemd-udevd (pid: 212, stack limit = 0x(ptrval)) ... [] (exynos_dsi_probe [exynosdrm]) from [] (platform_drv_probe+0x6c/0xa4) [] (platform_drv_probe) from [] (really_probe+0x210/0x350) [] (really_probe) from [] (driver_probe_device+0x60/0x1a0) [] (driver_probe_device) from [] (device_driver_attach+0x58/0x60) [] (device_driver_attach) from [] (__driver_attach+0x80/0xbc) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0xb4) [] (bus_for_each_dev) from [] (bus_add_driver+0x130/0x1e8) [] (bus_add_driver) from [] (driver_register+0x78/0x110) [] (driver_register) from [] (exynos_drm_init+0xe8/0x11c [exynosdrm]) [] (exynos_drm_init [exynosdrm]) from [] (do_one_initcall+0x50/0x220) [] (do_one_initcall) from [] (do_init_module+0x60/0x210) [] (do_init_module) from [] (load_module+0x1c0c/0x2310) [] (load_module) from [] (sys_finit_module+0xac/0xbc) [] (sys_finit_module) from [] (ret_fast_syscall+0x0/0x54) Exception stack(0xd979bfa8 to 0xd979bff0) ... ---[ end trace db16efe05faab470 ]--- Signed-off-by: Marek Szyprowski Reviewed-by: Andrzej Hajda Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index aef18f807e38..366c975cde5b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1754,9 +1754,10 @@ static int exynos_dsi_probe(struct platform_device *pdev) dsi->clks[i] = devm_clk_get(dev, clk_names[i]); if (IS_ERR(dsi->clks[i])) { if (strcmp(clk_names[i], "sclk_mipi") == 0) { - strcpy(clk_names[i], OLD_SCLK_MIPI_CLK_NAME); - i--; - continue; + dsi->clks[i] = devm_clk_get(dev, + OLD_SCLK_MIPI_CLK_NAME); + if (!IS_ERR(dsi->clks[i])) + continue; } dev_info(dev, "failed to get the clock: %s\n", -- GitLab From f746a3d21fb4458c03286ad1aba8a1a16fa7e173 Mon Sep 17 00:00:00 2001 From: luanshi Date: Wed, 26 Feb 2020 13:45:10 +0800 Subject: [PATCH 0574/1278] drivers/perf: arm_pmu_acpi: Fix incorrect checking of gicc pointer [ Upstream commit 3ba52ad55b533760a1f65836aa0ec9d35e36bb4f ] Fix bogus NULL checks on the return value of acpi_cpu_get_madt_gicc() by checking for a 0 'gicc->performance_interrupt' value instead. Signed-off-by: Liguang Zhang Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm_pmu_acpi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 3303dd8d8eb5..604e549a9a47 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -25,8 +25,6 @@ static int arm_pmu_acpi_register_irq(int cpu) int gsi, trigger; gicc = acpi_cpu_get_madt_gicc(cpu); - if (WARN_ON(!gicc)) - return -EINVAL; gsi = gicc->performance_interrupt; @@ -65,11 +63,10 @@ static void arm_pmu_acpi_unregister_irq(int cpu) int gsi; gicc = acpi_cpu_get_madt_gicc(cpu); - if (!gicc) - return; gsi = gicc->performance_interrupt; - acpi_unregister_gsi(gsi); + if (gsi) + acpi_unregister_gsi(gsi); } static int arm_pmu_acpi_parse_irqs(void) -- GitLab From f30c7bbc98f35a454acc8ed4e4ac0a096de2f1e8 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 25 Feb 2020 10:44:27 -0800 Subject: [PATCH 0575/1278] altera-stapl: altera_get_note: prevent write beyond end of 'key' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3745488e9d599916a0b40d45d3f30e3d4720288e ] altera_get_note is called from altera_init, where key is kzalloc(33). When the allocation functions are annotated to allow the compiler to see the sizes of objects, and with FORTIFY_SOURCE, we see: In file included from drivers/misc/altera-stapl/altera.c:14:0: In function ‘strlcpy’, inlined from ‘altera_init’ at drivers/misc/altera-stapl/altera.c:2189:5: include/linux/string.h:378:4: error: call to ‘__write_overflow’ declared with attribute error: detected write beyond size of object passed as 1st parameter __write_overflow(); ^~~~~~~~~~~~~~~~~~ That refers to this code in altera_get_note: if (key != NULL) strlcpy(key, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])], length); The error triggers because the length of 'key' is 33, but the copy uses length supplied as the 'length' parameter, which is always 256. Split the size parameter into key_len and val_len, and use the appropriate length depending on what is being copied. Detected by compiler error, only compile-tested. Cc: "Igor M. Liplianin" Signed-off-by: Daniel Axtens Link: https://lore.kernel.org/r/20200120074344.504-2-dja@axtens.net Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/202002251042.D898E67AC@keescook Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/altera-stapl/altera.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index 494e263daa74..b7ee8043a133 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2126,8 +2126,8 @@ static int altera_execute(struct altera_state *astate, return status; } -static int altera_get_note(u8 *p, s32 program_size, - s32 *offset, char *key, char *value, int length) +static int altera_get_note(u8 *p, s32 program_size, s32 *offset, + char *key, char *value, int keylen, int vallen) /* * Gets key and value of NOTE fields in the JBC file. * Can be called in two modes: if offset pointer is NULL, @@ -2184,7 +2184,7 @@ static int altera_get_note(u8 *p, s32 program_size, &p[note_table + (8 * i) + 4])]; if (value != NULL) - strlcpy(value, value_ptr, length); + strlcpy(value, value_ptr, vallen); } } @@ -2203,13 +2203,13 @@ static int altera_get_note(u8 *p, s32 program_size, strlcpy(key, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])], - length); + keylen); if (value != NULL) strlcpy(value, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i) + 4])], - length); + vallen); *offset = i + 1; } @@ -2463,7 +2463,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw) __func__, (format_version == 2) ? "Jam STAPL" : "pre-standardized Jam 1.1"); while (altera_get_note((u8 *)fw->data, fw->size, - &offset, key, value, 256) == 0) + &offset, key, value, 32, 256) == 0) printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n", __func__, key, value); } -- GitLab From 4ec34f2196d125ff781170ddc6c3058c08ec5e73 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 28 Feb 2020 18:00:53 -0500 Subject: [PATCH 0576/1278] dm bio record: save/restore bi_end_io and bi_integrity [ Upstream commit 1b17159e52bb31f982f82a6278acd7fab1d3f67b ] Also, save/restore __bi_remaining in case the bio was used in a BIO_CHAIN (e.g. due to blk_queue_split). Suggested-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-bio-record.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index c82578af56a5..2ea0360108e1 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -20,8 +20,13 @@ struct dm_bio_details { struct gendisk *bi_disk; u8 bi_partno; + int __bi_remaining; unsigned long bi_flags; struct bvec_iter bi_iter; + bio_end_io_t *bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; +#endif }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) @@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) bd->bi_partno = bio->bi_partno; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; + bd->__bi_remaining = atomic_read(&bio->__bi_remaining); + bd->bi_end_io = bio->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bd->bi_integrity = bio_integrity(bio); +#endif } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) @@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) bio->bi_partno = bd->bi_partno; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; + atomic_set(&bio->__bi_remaining, bd->__bi_remaining); + bio->bi_end_io = bd->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bio->bi_integrity = bd->bi_integrity; +#endif } #endif -- GitLab From 7eb4641a6ae2db87b8799c142e77081f4ee3e8b7 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 3 Mar 2020 14:14:22 -0800 Subject: [PATCH 0577/1278] xenbus: req->body should be updated before req->state [ Upstream commit 1b6a51e86cce38cf4d48ce9c242120283ae2f603 ] The req->body should be updated before req->state is updated and the order should be guaranteed by a barrier. Otherwise, read_reply() might return req->body = NULL. Below is sample callstack when the issue is reproduced on purpose by reordering the updates of req->body and req->state and adding delay in code between updates of req->state and req->body. [ 22.356105] general protection fault: 0000 [#1] SMP PTI [ 22.361185] CPU: 2 PID: 52 Comm: xenwatch Not tainted 5.5.0xen+ #6 [ 22.366727] Hardware name: Xen HVM domU, BIOS ... [ 22.372245] RIP: 0010:_parse_integer_fixup_radix+0x6/0x60 ... ... [ 22.392163] RSP: 0018:ffffb2d64023fdf0 EFLAGS: 00010246 [ 22.395933] RAX: 0000000000000000 RBX: 75746e7562755f6d RCX: 0000000000000000 [ 22.400871] RDX: 0000000000000000 RSI: ffffb2d64023fdfc RDI: 75746e7562755f6d [ 22.405874] RBP: 0000000000000000 R08: 00000000000001e8 R09: 0000000000cdcdcd [ 22.410945] R10: ffffb2d6402ffe00 R11: ffff9d95395eaeb0 R12: ffff9d9535935000 [ 22.417613] R13: ffff9d9526d4a000 R14: ffff9d9526f4f340 R15: ffff9d9537654000 [ 22.423726] FS: 0000000000000000(0000) GS:ffff9d953bc80000(0000) knlGS:0000000000000000 [ 22.429898] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 22.434342] CR2: 000000c4206a9000 CR3: 00000001ea3fc002 CR4: 00000000001606e0 [ 22.439645] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 22.444941] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 22.450342] Call Trace: [ 22.452509] simple_strtoull+0x27/0x70 [ 22.455572] xenbus_transaction_start+0x31/0x50 [ 22.459104] netback_changed+0x76c/0xcc1 [xen_netfront] [ 22.463279] ? find_watch+0x40/0x40 [ 22.466156] xenwatch_thread+0xb4/0x150 [ 22.469309] ? wait_woken+0x80/0x80 [ 22.472198] kthread+0x10e/0x130 [ 22.474925] ? kthread_park+0x80/0x80 [ 22.477946] ret_from_fork+0x35/0x40 [ 22.480968] Modules linked in: xen_kbdfront xen_fbfront(+) xen_netfront xen_blkfront [ 22.486783] ---[ end trace a9222030a747c3f7 ]--- [ 22.490424] RIP: 0010:_parse_integer_fixup_radix+0x6/0x60 The virt_rmb() is added in the 'true' path of test_reply(). The "while" is changed to "do while" so that test_reply() is used as a read memory barrier. Signed-off-by: Dongli Zhang Link: https://lore.kernel.org/r/20200303221423.21962-1-dongli.zhang@oracle.com Reviewed-by: Julien Grall Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- drivers/xen/xenbus/xenbus_comms.c | 2 ++ drivers/xen/xenbus/xenbus_xs.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index d239fc3c5e3d..852ed161fc2a 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -313,6 +313,8 @@ static int process_msg(void) req->msg.type = state.msg.type; req->msg.len = state.msg.len; req->body = state.body; + /* write body, then update state */ + virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); } else diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3f3b29398ab8..b609c6e08796 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -188,8 +188,11 @@ static bool xenbus_ok(void) static bool test_reply(struct xb_req_data *req) { - if (req->state == xb_req_state_got_reply || !xenbus_ok()) + if (req->state == xb_req_state_got_reply || !xenbus_ok()) { + /* read req->state before all other fields */ + virt_rmb(); return true; + } /* Make sure to reread req->state each time. */ barrier(); @@ -199,7 +202,7 @@ static bool test_reply(struct xb_req_data *req) static void *read_reply(struct xb_req_data *req) { - while (req->state != xb_req_state_got_reply) { + do { wait_event(req->wq, test_reply(req)); if (!xenbus_ok()) @@ -213,7 +216,7 @@ static void *read_reply(struct xb_req_data *req) if (req->err) return ERR_PTR(req->err); - } + } while (req->state != xb_req_state_got_reply); return req->body; } -- GitLab From ccbedf8c1ed38e291259c9a20010f8cb9a3d162f Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 3 Mar 2020 14:14:23 -0800 Subject: [PATCH 0578/1278] xenbus: req->err should be updated before req->state [ Upstream commit 8130b9d5b5abf26f9927b487c15319a187775f34 ] This patch adds the barrier to guarantee that req->err is always updated before req->state. Otherwise, read_reply() would not return ERR_PTR(req->err) but req->body, when process_writes()->xb_write() is failed. Signed-off-by: Dongli Zhang Link: https://lore.kernel.org/r/20200303221423.21962-2-dongli.zhang@oracle.com Reviewed-by: Julien Grall Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- drivers/xen/xenbus/xenbus_comms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 852ed161fc2a..eb5151fc8efa 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -397,6 +397,8 @@ static int process_writes(void) if (state.req->state == xb_req_state_aborted) kfree(state.req); else { + /* write err, then update state */ + virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } -- GitLab From c074dded84884b2e17e7c0cba378578f28a133ca Mon Sep 17 00:00:00 2001 From: Carlo Nonato Date: Fri, 6 Mar 2020 13:27:31 +0100 Subject: [PATCH 0579/1278] block, bfq: fix overwrite of bfq_group pointer in bfq_find_set_group() [ Upstream commit 14afc59361976c0ba39e3a9589c3eaa43ebc7e1d ] The bfq_find_set_group() function takes as input a blkcg (which represents a cgroup) and retrieves the corresponding bfq_group, then it updates the bfq internal group hierarchy (see comments inside the function for why this is needed) and finally it returns the bfq_group. In the hierarchy update cycle, the pointer holding the correct bfq_group that has to be returned is mistakenly used to traverse the hierarchy bottom to top, meaning that in each iteration it gets overwritten with the parent of the current group. Since the update cycle stops at root's children (depth = 2), the overwrite becomes a problem only if the blkcg describes a cgroup at a hierarchy level deeper than that (depth > 2). In this case the root's child that happens to be also an ancestor of the correct bfq_group is returned. The main consequence is that processes contained in a cgroup at depth greater than 2 are wrongly placed in the group described above by BFQ. This commits fixes this problem by using a different bfq_group pointer in the update cycle in order to avoid the overwrite of the variable holding the original group reference. Reported-by: Kwon Je Oh Signed-off-by: Carlo Nonato Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-cgroup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index afbbe5750a1f..7d7aee024ece 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -499,12 +499,13 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, */ entity = &bfqg->entity; for_each_entity(entity) { - bfqg = container_of(entity, struct bfq_group, entity); - if (bfqg != bfqd->root_group) { - parent = bfqg_parent(bfqg); + struct bfq_group *curr_bfqg = container_of(entity, + struct bfq_group, entity); + if (curr_bfqg != bfqd->root_group) { + parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; - bfq_group_set_parent(bfqg, parent); + bfq_group_set_parent(curr_bfqg, parent); } } -- GitLab From 7dc9540c96f04e078cacdd238eb196f3d9f5a5db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 6 Mar 2020 23:13:11 +0100 Subject: [PATCH 0580/1278] parse-maintainers: Mark as executable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 611d61f9ac99dc9e1494473fb90117a960a89dfa ] This makes the script more convenient to run. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/parse-maintainers.pl | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/parse-maintainers.pl diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl old mode 100644 new mode 100755 -- GitLab From 0481655a2f4ed5fe5744b57c1dbe13f47738b92e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 5 Feb 2020 19:26:33 +0800 Subject: [PATCH 0581/1278] USB: Disable LPM on WD19's Realtek Hub commit b63e48fb50e1ca71db301ca9082befa6f16c55c4 upstream. Realtek Hub (0bda:0x0487) used in Dell Dock WD19 sometimes drops off the bus when bringing underlying ports from U3 to U0. Disabling LPM on the hub during setting link state is not enough, so let's disable LPM completely for this hub. Acked-by: Alan Stern Signed-off-by: Kai-Heng Feng Cc: stable Link: https://lore.kernel.org/r/20200205112633.25995-3-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 64c03e871f2d..f7d57b508b9e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -229,6 +229,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek hub in Dell WD19 (Type-C) */ + { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- GitLab From 5ba3b42d45df835413f2d0688074aaddc1efba9b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 13 Mar 2020 13:07:08 +0100 Subject: [PATCH 0582/1278] usb: quirks: add NO_LPM quirk for RTL8153 based ethernet adapters commit 75d7676ead19b1fbb5e0ee934c9ccddcb666b68c upstream. We have been receiving bug reports that ethernet connections over RTL8153 based ethernet adapters stops working after a while with errors like these showing up in dmesg when the ethernet stops working: [12696.189484] r8152 6-1:1.0 enp10s0u1: Tx timeout [12702.333456] r8152 6-1:1.0 enp10s0u1: Tx timeout [12707.965422] r8152 6-1:1.0 enp10s0u1: Tx timeout This has been reported on Dell WD15 docks, Belkin USB-C Express Dock 3.1 docks and with generic USB to ethernet dongles using the RTL8153 chipsets. Some users have tried adding usbcore.quirks=0bda:8153:k to the kernel commandline and all users who have tried this report that this fixes this. Also note that we already have an existing NO_LPM quirk for the RTL8153 used in the Microsoft Surface Dock (where it uses a different usb-id). This commit adds a NO_LPM quirk for the generic Realtek RTL8153 0bda:8153 usb-id, fixing the Tx timeout errors on these devices. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198931 Cc: stable@vger.kernel.org Cc: russianneuromancer@ya.ru Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200313120708.100339-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f7d57b508b9e..6c4bb47922ac 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -232,6 +232,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + /* Generic RTL8153 based ethernet adapters */ + { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- GitLab From 16aeb5a8574d95411744012142920413eea0cd9b Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 4 Mar 2020 11:43:10 +0100 Subject: [PATCH 0583/1278] USB: serial: option: add ME910G1 ECM composition 0x110b commit 8e852a7953be2a6ee371449f7257fe15ace6a1fc upstream. Add ME910G1 ECM composition 0x110b: tty, tty, tty, ecm Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20200304104310.2938-1-dnlplm@gmail.com Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index eff353de47cd..24a4ed76663a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1186,6 +1186,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */ .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff), /* Telit ME910G1 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), -- GitLab From 1483c946b6cc27b0a7b39d6c066de0bcb259de7a Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Fri, 6 Mar 2020 17:23:28 +0800 Subject: [PATCH 0584/1278] usb: host: xhci-plat: add a shutdown commit b433e340e7565110b0ce9ca4b3e26f4b97a1decf upstream. When loading new kernel via kexec, we need to shutdown host controller to avoid any un-expected memory accessing during new kernel boot. Signed-off-by: Ran Wang Cc: stable Tested-by: Stephen Boyd Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20200306092328.41253-1-ran.wang_1@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 108a212294bf..a7feb6338d97 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -416,6 +416,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = &xhci_plat_pm_ops, -- GitLab From aee8660485cd311c25143c5d24ac8f460822a589 Mon Sep 17 00:00:00 2001 From: Scott Chen Date: Wed, 11 Mar 2020 14:14:23 +0800 Subject: [PATCH 0585/1278] USB: serial: pl2303: add device-id for HP LD381 commit cecc113c1af0dd41ccf265c1fdb84dbd05e63423 upstream. Add a device id for HP LD381 Display LD381: 03f0:0f7f Signed-off-by: Scott Chen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 1c4d3dbd4635..5051b1dad09e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -96,6 +96,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index fcc2cfc1da28..f0a9eeb6272d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -129,6 +129,7 @@ #define HP_LM920_PRODUCT_ID 0x026b #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 +#define HP_LD381_PRODUCT_ID 0x0f7f #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 -- GitLab From ef3ae20feb82c9c14f76135a34bdaeef5664b6ec Mon Sep 17 00:00:00 2001 From: Alberto Mattea Date: Fri, 6 Mar 2020 17:08:58 +0200 Subject: [PATCH 0586/1278] usb: xhci: apply XHCI_SUSPEND_DELAY to AMD XHCI controller 1022:145c commit 16263abc12d09871156a1c8650fb651f0e552f5e upstream. This controller timeouts during suspend (S3) with [ 240.521724] xhci_hcd 0000:30:00.3: WARN: xHC save state timeout [ 240.521729] xhci_hcd 0000:30:00.3: ERROR mismatched command completion event thus preventing the system from entering S3. Moreover it remains in an undefined state where some connected devices stop working until a reboot. Apply the XHCI_SUSPEND_DELAY quirk to make it suspend properly. CC: stable@vger.kernel.org Signed-off-by: Alberto Mattea Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200306150858.21904-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 908496ed3254..c01a0d1e8b5c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -140,7 +140,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_AMD_PLL_FIX; if (pdev->vendor == PCI_VENDOR_ID_AMD && - (pdev->device == 0x15e0 || + (pdev->device == 0x145c || + pdev->device == 0x15e0 || pdev->device == 0x15e1 || pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; -- GitLab From 5fa4cc2319639a359698ef016a2464a8ab13520e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Mar 2020 10:59:22 +0100 Subject: [PATCH 0587/1278] ALSA: line6: Fix endless MIDI read loop commit d683469b3c93d7e2afd39e6e1970f24700eb7a68 upstream. The MIDI input event parser of the LINE6 driver may enter into an endless loop when the unexpected data sequence is given, as it tries to continue the secondary bytes without termination. Also, when the input data is too short, the parser returns a negative error, while the caller doesn't handle it properly. This would lead to the unexpected behavior as well. This patch addresses those issues by checking the return value correctly and handling the one-byte event in the parser properly. The bug was reported by syzkaller. Reported-by: syzbot+cce32521ee0a824c21f7@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/000000000000033087059f8f8fa3@google.com Link: https://lore.kernel.org/r/20200309095922.30269-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/line6/driver.c | 2 +- sound/usb/line6/midibuf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b223de3defc4..bf4eacc53a7d 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -313,7 +313,7 @@ static void line6_data_received(struct urb *urb) line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN); - if (done == 0) + if (done <= 0) break; line6->message_length = done; diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c index 36a610ba342e..c931d48801eb 100644 --- a/sound/usb/line6/midibuf.c +++ b/sound/usb/line6/midibuf.c @@ -163,7 +163,7 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, int midi_length_prev = midibuf_message_length(this->command_prev); - if (midi_length_prev > 0) { + if (midi_length_prev > 1) { midi_length = midi_length_prev - 1; repeat = 1; } else -- GitLab From c56d2421fe55db840c5457f558604e1aad2a1608 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Mar 2020 10:05:05 +0100 Subject: [PATCH 0588/1278] ALSA: seq: virmidi: Fix running status after receiving sysex commit 4384f167ce5fa7241b61bb0984d651bc528ddebe upstream. The virmidi driver handles sysex event exceptionally in a short-cut snd_seq_dump_var_event() call, but this missed the reset of the running status. As a result, it may lead to an incomplete command right after the sysex when an event with the same running status was queued. Fix it by clearing the running status properly via alling snd_midi_event_reset_decode() for that code path. Reported-by: Andreas Steinmetz Cc: Link: https://lore.kernel.org/r/3b4a4e0f232b7afbaf0a843f63d0e538e3029bfd.camel@domdv.de Link: https://lore.kernel.org/r/20200316090506.23966-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_virmidi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 8ebbca554e99..dd958d76ca5d 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -95,6 +95,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); if (len > 0) -- GitLab From 76029bc4f6f4d19ad0edf32cb196adec3ba7d419 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Mar 2020 10:05:06 +0100 Subject: [PATCH 0589/1278] ALSA: seq: oss: Fix running status after receiving sysex commit 6c3171ef76a0bad892050f6959a7eac02fb16df7 upstream. This is a similar bug like the previous case for virmidi: the invalid running status is kept after receiving a sysex message. Again the fix is to clear the running status after handling the sysex. Cc: Link: https://lore.kernel.org/r/3b4a4e0f232b7afbaf0a843f63d0e538e3029bfd.camel@domdv.de Link: https://lore.kernel.org/r/20200316090506.23966-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/oss/seq_oss_midi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 9debd1b8fd28..cdfb8f92d554 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -615,6 +615,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); + snd_midi_event_reset_decode(mdev->coder); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) -- GitLab From 5ac3462e19212ceca3f0f62effb4fc7aaf7ea4e1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Mar 2020 09:21:48 +0100 Subject: [PATCH 0590/1278] ALSA: pcm: oss: Avoid plugin buffer overflow commit f2ecf903ef06eb1bbbfa969db9889643d487e73a upstream. Each OSS PCM plugins allocate its internal buffer per pre-calculation of the max buffer size through the chain of plugins (calling src_frames and dst_frames callbacks). This works for most plugins, but the rate plugin might behave incorrectly. The calculation in the rate plugin involves with the fractional position, i.e. it may vary depending on the input position. Since the buffer size pre-calculation is always done with the offset zero, it may return a shorter size than it might be; this may result in the out-of-bound access as spotted by fuzzer. This patch addresses those possible buffer overflow accesses by simply setting the upper limit per the given buffer size for each plugin before src_frames() and after dst_frames() calls. Reported-by: syzbot+e1fe9f44fb8ecf4fb5dd@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/000000000000b25ea005a02bcf21@google.com Link: https://lore.kernel.org/r/20200309082148.19855-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_plugin.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index b8ab46b8298d..6583eb411f82 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -209,6 +209,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_last(plug); while (plugin && drv_frames > 0) { + if (drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) drv_frames = plugin->src_frames(plugin, drv_frames); @@ -220,6 +222,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p plugin_next = plugin->next; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); + if (drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin = plugin_next; } } else @@ -248,11 +252,15 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc if (frames < 0) return frames; } + if (frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin = plugin_next; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_last(plug); while (plugin) { + if (frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames); -- GitLab From 59dfa43ef54a64c366ffee5e4d39f667352095f9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 12 Mar 2020 16:57:30 +0100 Subject: [PATCH 0591/1278] ALSA: pcm: oss: Remove WARNING from snd_pcm_plug_alloc() checks commit 5461e0530c222129dfc941058be114b5cbc00837 upstream. The return value checks in snd_pcm_plug_alloc() are covered with snd_BUG_ON() macro that may trigger a kernel WARNING depending on the kconfig. But since the error condition can be triggered by a weird user space parameter passed to OSS layer, we shouldn't give the kernel stack trace just for that. As it's a normal error condition, let's remove snd_BUG_ON() macro usage there. Reported-by: syzbot+2a59ee7a9831b264f45e@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20200312155730.7520-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_plugin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 6583eb411f82..40d2d39151bf 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); @@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); -- GitLab From 5d777b76b2eddd3887c57a635643aacab3e437ce Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 14 Feb 2020 17:46:35 +0100 Subject: [PATCH 0592/1278] iio: trigger: stm32-timer: disable master mode when stopping commit 29e8c8253d7d5265f58122c0a7902e26df6c6f61 upstream. Master mode should be disabled when stopping. This mainly impacts possible other use-case after timer has been stopped. Currently, master mode remains set (from start routine). Fixes: 6fb34812c2a2 ("iio: stm32 trigger: Add support for TRGO2 triggers") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/trigger/stm32-timer-trigger.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index eb212f8c8879..0e4da570f22f 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -161,7 +161,8 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv, return 0; } -static void stm32_timer_stop(struct stm32_timer_trigger *priv) +static void stm32_timer_stop(struct stm32_timer_trigger *priv, + struct iio_trigger *trig) { u32 ccer, cr1; @@ -179,6 +180,12 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv) regmap_write(priv->regmap, TIM_PSC, 0); regmap_write(priv->regmap, TIM_ARR, 0); + /* Force disable master mode */ + if (stm32_timer_is_trgo2_name(trig->name)) + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0); + else + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0); + /* Make sure that registers are updated */ regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG); } @@ -197,7 +204,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev, return ret; if (freq == 0) { - stm32_timer_stop(priv); + stm32_timer_stop(priv, trig); } else { ret = stm32_timer_start(priv, trig, freq); if (ret) -- GitLab From 7934f5c57db8927fad85ed09a9c2df9d7aab0459 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 14 Feb 2020 12:03:24 +0100 Subject: [PATCH 0593/1278] iio: magnetometer: ak8974: Fix negative raw values in sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b500c086e4110829a308c23e83a7cdc65b26228a upstream. At the moment, reading from in_magn_*_raw in sysfs tends to return large values around 65000, even though the output of ak8974 is actually limited to ±32768. This happens because the value is never converted to the signed 16-bit integer variant. Add an explicit cast to s16 to fix this. Fixes: 7c94a8b2ee8c ("iio: magn: add a driver for AK8974") Signed-off-by: Stephan Gerhold Reviewed-by: Linus Waleij Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/ak8974.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 0bff76e96950..283ecd4ea800 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -563,7 +563,7 @@ static int ak8974_read_raw(struct iio_dev *indio_dev, * We read all axes and discard all but one, for optimized * reading, use the triggered buffer. */ - *val = le16_to_cpu(hw_values[chan->address]); + *val = (s16)le16_to_cpu(hw_values[chan->address]); ret = IIO_VAL_INT; } -- GitLab From 374c5761c2d5188cd8a3055ebd554163f1912ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 15 Mar 2020 17:44:25 +0100 Subject: [PATCH 0594/1278] mmc: sdhci-of-at91: fix cd-gpios for SAMA5D2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 53dd0a7cd65edc83b0c243d1c08377c8b876b2ee upstream. SAMA5D2x doesn't drive CMD line if GPIO is used as CD line (at least SAMA5D27 doesn't). Fix this by forcing card-detect in the module if module-controlled CD is not used. Fixed commit addresses the problem only for non-removable cards. This amends it to also cover gpio-cd case. Cc: stable@vger.kernel.org Fixes: 7a1e3f143176 ("mmc: sdhci-of-at91: force card detect value for non removable devices") Signed-off-by: Michał Mirosław Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/8d10950d9940468577daef4772b82a071b204716.1584290561.git.mirq-linux@rere.qmqm.pl Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-at91.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 1dadd460cc8f..78c9ac33b562 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -125,7 +125,8 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) { sdhci_reset(host, mask); - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); } @@ -404,8 +405,11 @@ static int sdhci_at91_probe(struct platform_device *pdev) * detection procedure using the SDMCC_CD signal is bypassed. * This bit is reset when a software reset for all command is performed * so we need to implement our own reset function to set back this bit. + * + * WA: SAMA5D2 doesn't drive CMD if using CD GPIO line. */ - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); pm_runtime_put_autosuspend(&pdev->dev); -- GitLab From 7132b011bc17cb185fdfc701fbaf0ead14e2c715 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Thu, 12 Mar 2020 10:36:52 +0100 Subject: [PATCH 0595/1278] staging: rtl8188eu: Add device id for MERCUSYS MW150US v2 commit bb5786b9286c253557a0115bc8d21879e61b7b94 upstream. This device was added to the stand-alone driver on github. Add it to the staging driver as well. Link: https://github.com/lwfinger/rtl8188eu/commit/2141f244c3e7 Signed-off-by: Michael Straube Cc: stable Link: https://lore.kernel.org/r/20200312093652.13918-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 536453358568..83b5f5c63ae1 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -46,6 +46,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ + {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ -- GitLab From 53b2e7727caef4158f0ad5bfbffa3124b5e369ad Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 6 Mar 2020 01:30:47 +0100 Subject: [PATCH 0596/1278] staging/speakup: fix get_word non-space look-ahead commit 9d32c0cde4e2d1343dfb88a67b2ec6397705b32b upstream. get_char was erroneously given the address of the pointer to the text instead of the address of the text, thus leading to random crashes when the user requests speaking a word while the current position is on a space character and say_word_ctl is not enabled. Reported-on: https://github.com/bytefire/speakup/issues/1 Reported-by: Kirk Reiser Reported-by: Janina Sajka Reported-by: Alexandr Epaneshnikov Reported-by: Gregory Nowak Reported-by: deedra waters Signed-off-by: Samuel Thibault Tested-by: Alexandr Epaneshnikov Tested-by: Gregory Nowak Tested-by: Michael Taboada Cc: stable Link: https://lore.kernel.org/r/20200306003047.thijtmqrnayd3dmw@function Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c index 56f7be6af1f6..a27f5e9a1ae1 100644 --- a/drivers/staging/speakup/main.c +++ b/drivers/staging/speakup/main.c @@ -567,7 +567,7 @@ static u_long get_word(struct vc_data *vc) return 0; } else if (tmpx < vc->vc_cols - 2 && (ch == SPACE || ch == 0 || (ch < 0x100 && IS_WDLM(ch))) && - get_char(vc, (u_short *)&tmp_pos + 1, &temp) > SPACE) { + get_char(vc, (u_short *)tmp_pos + 1, &temp) > SPACE) { tmp_pos += 2; tmpx++; } else -- GitLab From c750ccb1036bd7f5ed259c0f3d8dc45ecc490dc2 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Mar 2020 08:22:14 +0200 Subject: [PATCH 0597/1278] intel_th: Fix user-visible error codes commit ce666be89a8a09c5924ff08fc32e119f974bdab6 upstream. There are a few places in the driver that end up returning ENOTSUPP to the user, replace those with EINVAL. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Fixes: ba82664c134ef ("intel_th: Add Memory Storage Unit driver") Cc: stable@vger.kernel.org # v4.4+ Link: https://lore.kernel.org/r/20200317062215.15598-6-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 6ebf6a2edb33..ca2717137ad2 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -499,7 +499,7 @@ static int msc_configure(struct msc *msc) lockdep_assert_held(&msc->buf_mutex); if (msc->mode > MSC_MODE_MULTI) - return -ENOTSUPP; + return -EINVAL; if (msc->mode == MSC_MODE_MULTI) msc_buffer_clear_hw_header(msc); @@ -950,7 +950,7 @@ static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages, } else if (msc->mode == MSC_MODE_MULTI) { ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins); } else { - ret = -ENOTSUPP; + ret = -EINVAL; } if (!ret) { @@ -1173,7 +1173,7 @@ static ssize_t intel_th_msc_read(struct file *file, char __user *buf, if (ret >= 0) *ppos = iter->offset; } else { - ret = -ENOTSUPP; + ret = -EINVAL; } put_count: -- GitLab From 3c6157d6e209957bf6d04ada956a1d8ed34903c1 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Mar 2020 08:22:15 +0200 Subject: [PATCH 0598/1278] intel_th: pci: Add Elkhart Lake CPU support commit add492d2e9446a77ede9bb43699ec85ca8fc1aba upstream. This adds support for the Trace Hub in Elkhart Lake CPU. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200317062215.15598-7-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index fc371444407d..b8cbd26b60e1 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -218,6 +218,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Elkhart Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Elkhart Lake */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26), -- GitLab From 7d2c3a9ce8d50cd3d92ab08e060c80f321cd880c Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 18 Mar 2020 15:26:49 +0000 Subject: [PATCH 0599/1278] rtc: max8907: add missing select REGMAP_IRQ commit 5d892919fdd0cefd361697472d4e1b174a594991 upstream. I have hit the following build error: armv7a-hardfloat-linux-gnueabi-ld: drivers/rtc/rtc-max8907.o: in function `max8907_rtc_probe': rtc-max8907.c:(.text+0x400): undefined reference to `regmap_irq_get_virq' max8907 should select REGMAP_IRQ Fixes: 94c01ab6d7544 ("rtc: add MAX8907 RTC driver") Cc: stable Signed-off-by: Corentin Labbe Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index e0e58f3b1420..68b76e6ddc1e 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -323,6 +323,7 @@ config RTC_DRV_MAX6900 config RTC_DRV_MAX8907 tristate "Maxim MAX8907" depends on MFD_MAX8907 || COMPILE_TEST + select REGMAP_IRQ help If you say yes here you will get support for the RTC of Maxim MAX8907 PMIC. -- GitLab From 4c36106eb343ae92251e579c9c7d0ee0f49cbfee Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 6 Mar 2020 17:08:57 +0200 Subject: [PATCH 0600/1278] xhci: Do not open code __print_symbolic() in xhci trace events commit 045706bff837ee89c13f1ace173db71922c1c40b upstream. libtraceevent (used by perf and trace-cmd) failed to parse the xhci_urb_dequeue trace event. This is because the user space trace event format parsing is not a full C compiler. It can handle some basic logic, but is not meant to be able to handle everything C can do. In cases where a trace event field needs to be converted from a number to a string, there's the __print_symbolic() macro that should be used: See samples/trace_events/trace-events-sample.h Some xhci trace events open coded the __print_symbolic() causing the user spaces tools to fail to parse it. This has to be replaced with __print_symbolic() instead. CC: stable@vger.kernel.org Reported-by: Tzvetomir Stoyanov Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206531 Fixes: 5abdc2e6e12ff ("usb: host: xhci: add urb_enqueue/dequeue/giveback tracers") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200306150858.21904-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-trace.h | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 02a1164ca599..a350c46d01e8 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -276,23 +276,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb, ), TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x", __entry->epnum, __entry->dir_in ? "in" : "out", - ({ char *s; - switch (__entry->type) { - case USB_ENDPOINT_XFER_INT: - s = "intr"; - break; - case USB_ENDPOINT_XFER_CONTROL: - s = "control"; - break; - case USB_ENDPOINT_XFER_BULK: - s = "bulk"; - break; - case USB_ENDPOINT_XFER_ISOC: - s = "isoc"; - break; - default: - s = "UNKNOWN"; - } s; }), __entry->urb, __entry->pipe, __entry->slot_id, + __print_symbolic(__entry->type, + { USB_ENDPOINT_XFER_INT, "intr" }, + { USB_ENDPOINT_XFER_CONTROL, "control" }, + { USB_ENDPOINT_XFER_BULK, "bulk" }, + { USB_ENDPOINT_XFER_ISOC, "isoc" }), + __entry->urb, __entry->pipe, __entry->slot_id, __entry->actual, __entry->length, __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream, __entry->flags ) -- GitLab From 6c7bec7874871b0f3596c524feeff2adc2a40595 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 21 Mar 2020 18:22:10 -0700 Subject: [PATCH 0601/1278] memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7d36665a5886c27ca4c4d0afd3ecc50b400f3587 upstream. An eventfd monitors multiple memory thresholds of the cgroup, closes them, the kernel deletes all events related to this eventfd. Before all events are deleted, another eventfd monitors the memory threshold of this cgroup, leading to a crash: BUG: kernel NULL pointer dereference, address: 0000000000000004 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 800000033058e067 P4D 800000033058e067 PUD 3355ce067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 2 PID: 14012 Comm: kworker/2:6 Kdump: loaded Not tainted 5.6.0-rc4 #3 Hardware name: LENOVO 20AWS01K00/20AWS01K00, BIOS GLET70WW (2.24 ) 05/21/2014 Workqueue: events memcg_event_remove RIP: 0010:__mem_cgroup_usage_unregister_event+0xb3/0x190 RSP: 0018:ffffb47e01c4fe18 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffff8bb223a8a000 RCX: 0000000000000001 RDX: 0000000000000001 RSI: ffff8bb22fb83540 RDI: 0000000000000001 RBP: ffffb47e01c4fe48 R08: 0000000000000000 R09: 0000000000000010 R10: 000000000000000c R11: 071c71c71c71c71c R12: ffff8bb226aba880 R13: ffff8bb223a8a480 R14: 0000000000000000 R15: 0000000000000000 FS:  0000000000000000(0000) GS:ffff8bb242680000(0000) knlGS:0000000000000000 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000004 CR3: 000000032c29c003 CR4: 00000000001606e0 Call Trace: memcg_event_remove+0x32/0x90 process_one_work+0x172/0x380 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 CR2: 0000000000000004 We can reproduce this problem in the following ways: 1. We create a new cgroup subdirectory and a new eventfd, and then we monitor multiple memory thresholds of the cgroup through this eventfd. 2. closing this eventfd, and __mem_cgroup_usage_unregister_event () will be called multiple times to delete all events related to this eventfd. The first time __mem_cgroup_usage_unregister_event() is called, the kernel will clear all items related to this eventfd in thresholds-> primary. Since there is currently only one eventfd, thresholds-> primary becomes empty, so the kernel will set thresholds-> primary and hresholds-> spare to NULL. If at this time, the user creates a new eventfd and monitor the memory threshold of this cgroup, kernel will re-initialize thresholds-> primary. Then when __mem_cgroup_usage_unregister_event () is called for the second time, because thresholds-> primary is not empty, the system will access thresholds-> spare, but thresholds-> spare is NULL, which will trigger a crash. In general, the longer it takes to delete all events related to this eventfd, the easier it is to trigger this problem. The solution is to check whether the thresholds associated with the eventfd has been cleared when deleting the event. If so, we do nothing. [akpm@linux-foundation.org: fix comment, per Kirill] Fixes: 907860ed381a ("cgroups: make cftype.unregister_event() void-returning") Signed-off-by: Chunguang Xu Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Kirill A. Shutemov Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Link: http://lkml.kernel.org/r/077a6f67-aefa-4591-efec-f2f3af2b0b02@gmail.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5d6ebd1449f0..d8df500e0eea 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3518,7 +3518,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; unsigned long usage; - int i, j, size; + int i, j, size, entries; mutex_lock(&memcg->thresholds_lock); @@ -3538,14 +3538,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, __mem_cgroup_threshold(memcg, type == _MEMSWAP); /* Calculate new number of threshold */ - size = 0; + size = entries = 0; for (i = 0; i < thresholds->primary->size; i++) { if (thresholds->primary->entries[i].eventfd != eventfd) size++; + else + entries++; } new = thresholds->spare; + /* If no items related to eventfd have been cleared, nothing to do */ + if (!entries) + goto unlock; + /* Set thresholds array to NULL if we don't have thresholds */ if (!size) { kfree(new); -- GitLab From ffb15296ddb6d61bfd119f18bb34cadc3a2ccf5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 17 Mar 2020 11:04:09 -0700 Subject: [PATCH 0602/1278] mm: slub: be more careful about the double cmpxchg of freelist commit 5076190daded2197f62fe92cf69674488be44175 upstream. This is just a cleanup addition to Jann's fix to properly update the transaction ID for the slub slowpath in commit fd4d9c7d0c71 ("mm: slub: add missing TID bump.."). The transaction ID is what protects us against any concurrent accesses, but we should really also make sure to make the 'freelist' comparison itself always use the same freelist value that we then used as the new next free pointer. Jann points out that if we do all of this carefully, we could skip the transaction ID update for all the paths that only remove entries from the lists, and only update the TID when adding entries (to avoid the ABA issue with cmpxchg and list handling re-adding a previously seen value). But this patch just does the "make sure to cmpxchg the same value we used" rather than then try to be clever. Acked-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 099c7a85ede0..502845b6728c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2943,11 +2943,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s, barrier(); if (likely(page == c->page)) { - set_freepointer(s, tail_obj, c->freelist); + void **freelist = READ_ONCE(c->freelist); + + set_freepointer(s, tail_obj, freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, - c->freelist, tid, + freelist, tid, head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); -- GitLab From f752174191cf364d115bc274c96cfa31ebb67b05 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sat, 21 Mar 2020 18:22:37 -0700 Subject: [PATCH 0603/1278] mm, slub: prevent kmalloc_node crashes and memory leaks commit 0715e6c516f106ed553828a671d30ad9a3431536 upstream. Sachin reports [1] a crash in SLUB __slab_alloc(): BUG: Kernel NULL pointer dereference on read at 0x000073b0 Faulting instruction address: 0xc0000000003d55f4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 19 PID: 1 Comm: systemd Not tainted 5.6.0-rc2-next-20200218-autotest #1 NIP: c0000000003d55f4 LR: c0000000003d5b94 CTR: 0000000000000000 REGS: c0000008b37836d0 TRAP: 0300 Not tainted (5.6.0-rc2-next-20200218-autotest) MSR: 8000000000009033 CR: 24004844 XER: 00000000 CFAR: c00000000000dec4 DAR: 00000000000073b0 DSISR: 40000000 IRQMASK: 1 GPR00: c0000000003d5b94 c0000008b3783960 c00000000155d400 c0000008b301f500 GPR04: 0000000000000dc0 0000000000000002 c0000000003443d8 c0000008bb398620 GPR08: 00000008ba2f0000 0000000000000001 0000000000000000 0000000000000000 GPR12: 0000000024004844 c00000001ec52a00 0000000000000000 0000000000000000 GPR16: c0000008a1b20048 c000000001595898 c000000001750c18 0000000000000002 GPR20: c000000001750c28 c000000001624470 0000000fffffffe0 5deadbeef0000122 GPR24: 0000000000000001 0000000000000dc0 0000000000000002 c0000000003443d8 GPR28: c0000008b301f500 c0000008bb398620 0000000000000000 c00c000002287180 NIP ___slab_alloc+0x1f4/0x760 LR __slab_alloc+0x34/0x60 Call Trace: ___slab_alloc+0x334/0x760 (unreliable) __slab_alloc+0x34/0x60 __kmalloc_node+0x110/0x490 kvmalloc_node+0x58/0x110 mem_cgroup_css_online+0x108/0x270 online_css+0x48/0xd0 cgroup_apply_control_enable+0x2ec/0x4d0 cgroup_mkdir+0x228/0x5f0 kernfs_iop_mkdir+0x90/0xf0 vfs_mkdir+0x110/0x230 do_mkdirat+0xb0/0x1a0 system_call+0x5c/0x68 This is a PowerPC platform with following NUMA topology: available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 35247 MB node 1 free: 30907 MB node distances: node 0 1 0: 10 40 1: 40 10 possible numa nodes: 0-31 This only happens with a mmotm patch "mm/memcontrol.c: allocate shrinker_map on appropriate NUMA node" [2] which effectively calls kmalloc_node for each possible node. SLUB however only allocates kmem_cache_node on online N_NORMAL_MEMORY nodes, and relies on node_to_mem_node to return such valid node for other nodes since commit a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node"). This is however not true in this configuration where the _node_numa_mem_ array is not initialized for nodes 0 and 2-31, thus it contains zeroes and get_partial() ends up accessing non-allocated kmem_cache_node. A related issue was reported by Bharata (originally by Ramachandran) [3] where a similar PowerPC configuration, but with mainline kernel without patch [2] ends up allocating large amounts of pages by kmalloc-1k kmalloc-512. This seems to have the same underlying issue with node_to_mem_node() not behaving as expected, and might probably also lead to an infinite loop with CONFIG_SLUB_CPU_PARTIAL [4]. This patch should fix both issues by not relying on node_to_mem_node() anymore and instead simply falling back to NUMA_NO_NODE, when kmalloc_node(node) is attempted for a node that's not online, or has no usable memory. The "usable memory" condition is also changed from node_present_pages() to N_NORMAL_MEMORY node state, as that is exactly the condition that SLUB uses to allocate kmem_cache_node structures. The check in get_partial() is removed completely, as the checks in ___slab_alloc() are now sufficient to prevent get_partial() being reached with an invalid node. [1] https://lore.kernel.org/linux-next/3381CD91-AB3D-4773-BA04-E7A072A63968@linux.vnet.ibm.com/ [2] https://lore.kernel.org/linux-mm/fff0e636-4c36-ed10-281c-8cdb0687c839@virtuozzo.com/ [3] https://lore.kernel.org/linux-mm/20200317092624.GB22538@in.ibm.com/ [4] https://lore.kernel.org/linux-mm/088b5996-faae-8a56-ef9c-5b567125ae54@suse.cz/ Fixes: a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node") Reported-by: Sachin Sant Reported-by: PUVICHAKRAVARTHY RAMACHANDRAN Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Tested-by: Sachin Sant Tested-by: Bharata B Rao Reviewed-by: Srikar Dronamraju Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Christopher Lameter Cc: linuxppc-dev@lists.ozlabs.org Cc: Joonsoo Kim Cc: Pekka Enberg Cc: David Rientjes Cc: Kirill Tkhai Cc: Vlastimil Babka Cc: Nathan Lynch Cc: Link: http://lkml.kernel.org/r/20200320115533.9604-1-vbabka@suse.cz Debugged-by: Srikar Dronamraju Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 502845b6728c..958a8f7a3c25 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1923,8 +1923,6 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - else if (!node_present_pages(node)) - searchnode = node_to_mem_node(node); object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) @@ -2521,17 +2519,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, struct page *page; page = c->page; - if (!page) + if (!page) { + /* + * if the node is not online or has no normal memory, just + * ignore the node constraint + */ + if (unlikely(node != NUMA_NO_NODE && + !node_state(node, N_NORMAL_MEMORY))) + node = NUMA_NO_NODE; goto new_slab; + } redo: if (unlikely(!node_match(page, node))) { - int searchnode = node; - - if (node != NUMA_NO_NODE && !node_present_pages(node)) - searchnode = node_to_mem_node(node); - - if (unlikely(!node_match(page, searchnode))) { + /* + * same as above but node_match() being false already + * implies node != NUMA_NO_NODE + */ + if (!node_state(node, N_NORMAL_MEMORY)) { + node = NUMA_NO_NODE; + goto redo; + } else { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, page, c->freelist, c); goto new_slab; -- GitLab From a07b91a7c51b5f701961900579e8653527ed1b1b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 21 Mar 2020 18:22:17 -0700 Subject: [PATCH 0604/1278] page-flags: fix a crash at SetPageError(THP_SWAP) commit d72520ad004a8ce18a6ba6cde317f0081b27365a upstream. Commit bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out") supported writing THP to a swap device but forgot to upgrade an older commit df8c94d13c7e ("page-flags: define behavior of FS/IO-related flags on compound pages") which could trigger a crash during THP swapping out with DEBUG_VM_PGFLAGS=y, kernel BUG at include/linux/page-flags.h:317! page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page)) page:fffff3b2ec3a8000 refcount:512 mapcount:0 mapping:000000009eb0338c index:0x7f6e58200 head:fffff3b2ec3a8000 order:9 compound_mapcount:0 compound_pincount:0 anon flags: 0x45fffe0000d8454(uptodate|lru|workingset|owner_priv_1|writeback|head|reclaim|swapbacked) end_swap_bio_write() SetPageError(page) VM_BUG_ON_PAGE(1 && PageCompound(page)) bio_endio+0x297/0x560 dec_pending+0x218/0x430 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x297/0x560 blk_update_request+0x201/0x920 scsi_end_request+0x6b/0x4b0 scsi_io_completion+0x509/0x7e0 scsi_finish_command+0x1ed/0x2a0 scsi_softirq_done+0x1c9/0x1d0 __blk_mqnterrupt+0xf/0x20 Fix by checking PF_NO_TAIL in those places instead. Fixes: bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out") Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: "Huang, Ying" Acked-by: Rafael Aquini Cc: Link: http://lkml.kernel.org/r/20200310235846.1319-1-cai@lca.pw Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5f966c94732b..7cbd8195ffb3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -264,7 +264,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) -PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) +PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) __SETPAGEFLAG(Referenced, referenced, PF_HEAD) -- GitLab From c3cf92fd9b6352465edfa55c8968eca3cc766b93 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 21 Mar 2020 18:22:41 -0700 Subject: [PATCH 0605/1278] x86/mm: split vmalloc_sync_all() commit 763802b53a427ed3cbd419dbba255c414fdd9e7c upstream. Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in the vunmap() code-path. While this change was necessary to maintain correctness on x86-32-pae kernels, it also adds additional cycles for architectures that don't need it. Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported severe performance regressions in micro-benchmarks because it now also calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But the vmalloc_sync_all() implementation on x86-64 is only needed for newly created mappings. To avoid the unnecessary work on x86-64 and to gain the performance back, split up vmalloc_sync_all() into two functions: * vmalloc_sync_mappings(), and * vmalloc_sync_unmappings() Most call-sites to vmalloc_sync_all() only care about new mappings being synchronized. The only exception is the new call-site added in the above mentioned commit. Shile Zhang directed us to a report of an 80% regression in reaim throughput. Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") Reported-by: kernel test robot Reported-by: Shile Zhang Signed-off-by: Joerg Roedel Signed-off-by: Andrew Morton Tested-by: Borislav Petkov Acked-by: Rafael J. Wysocki [GHES] Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 26 ++++++++++++++++++++++++-- drivers/acpi/apei/ghes.c | 2 +- include/linux/vmalloc.h | 5 +++-- kernel/notifier.c | 2 +- mm/nommu.c | 10 +++++++--- mm/vmalloc.c | 11 +++++++---- 6 files changed, 43 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 27cab342a0b2..1789626cf95e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -272,7 +272,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -299,6 +299,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -401,11 +411,23 @@ static void dump_pagetable(unsigned long address) #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index cd6fae6ad4c2..3f9f286088fa 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -201,7 +201,7 @@ static int ghes_estatus_pool_expand(unsigned long len) * New allocation must be visible in all pgd before it can be found by * an NMI allocating from the pool. */ - vmalloc_sync_all(); + vmalloc_sync_mappings(); return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); } diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1e5d8c392f15..29ef33498cb6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,8 +106,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ diff --git a/kernel/notifier.c b/kernel/notifier.c index 6196af8a8223..59a1e9b48a6a 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -552,7 +552,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); diff --git a/mm/nommu.c b/mm/nommu.c index 17c00d93de2e..13fc62d97544 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -450,10 +450,14 @@ void vm_unmap_aliases(void) EXPORT_SYMBOL_GPL(vm_unmap_aliases); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture + * chose not to have one. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) +{ +} + +void __weak vmalloc_sync_unmappings(void) { } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0b8852d80f44..d00961ba0c42 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1769,7 +1769,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, * First make sure the mappings are removed from all page-tables * before they are freed. */ - vmalloc_sync_all(); + vmalloc_sync_unmappings(); /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED @@ -2318,16 +2318,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, EXPORT_SYMBOL(remap_vmalloc_range); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose + * not to have one. * * The purpose of this function is to make sure the vmalloc area * mappings are identical in all page-tables in the system. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) { } +void __weak vmalloc_sync_unmappings(void) +{ +} static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) { -- GitLab From c3024ba3e32637e3a541be4954799f5a096c39bb Mon Sep 17 00:00:00 2001 From: Anthony Mallet Date: Thu, 12 Mar 2020 14:31:00 +0100 Subject: [PATCH 0606/1278] USB: cdc-acm: fix close_delay and closing_wait units in TIOCSSERIAL [ Upstream commit 633e2b2ded739a34bd0fb1d8b5b871f7e489ea29 ] close_delay and closing_wait are specified in hundredth of a second but stored internally in jiffies. Use the jiffies_to_msecs() and msecs_to_jiffies() functions to convert from each other. Signed-off-by: Anthony Mallet Cc: stable Link: https://lore.kernel.org/r/20200312133101.7096-1-anthony.mallet@laas.fr Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/class/cdc-acm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0453f0eb1178..74d0a91e8427 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -926,10 +926,10 @@ static int get_serial_info(struct acm *acm, struct serial_struct __user *info) memset(&tmp, 0, sizeof(tmp)); tmp.xmit_fifo_size = acm->writesize; tmp.baud_base = le32_to_cpu(acm->line.dwDTERate); - tmp.close_delay = acm->port.close_delay / 10; + tmp.close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; tmp.closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : - acm->port.closing_wait / 10; + jiffies_to_msecs(acm->port.closing_wait) / 10; if (copy_to_user(info, &tmp, sizeof(tmp))) return -EFAULT; @@ -947,9 +947,10 @@ static int set_serial_info(struct acm *acm, if (copy_from_user(&new_serial, newinfo, sizeof(new_serial))) return -EFAULT; - close_delay = new_serial.close_delay * 10; + close_delay = msecs_to_jiffies(new_serial.close_delay * 10); closing_wait = new_serial.closing_wait == ASYNC_CLOSING_WAIT_NONE ? - ASYNC_CLOSING_WAIT_NONE : new_serial.closing_wait * 10; + ASYNC_CLOSING_WAIT_NONE : + msecs_to_jiffies(new_serial.closing_wait * 10); mutex_lock(&acm->port.mutex); -- GitLab From 712337610673846d89b07371b2dce77eb08b12e7 Mon Sep 17 00:00:00 2001 From: Anthony Mallet Date: Thu, 12 Mar 2020 14:31:01 +0100 Subject: [PATCH 0607/1278] USB: cdc-acm: fix rounding error in TIOCSSERIAL [ Upstream commit b401f8c4f492cbf74f3f59c9141e5be3071071bb ] By default, tty_port_init() initializes those parameters to a multiple of HZ. For instance in line 69 of tty_port.c: port->close_delay = (50 * HZ) / 100; https://github.com/torvalds/linux/blob/master/drivers/tty/tty_port.c#L69 With e.g. CONFIG_HZ = 250 (as this is the case for Ubuntu 18.04 linux-image-4.15.0-37-generic), the default setting for close_delay is thus 125. When ioctl(fd, TIOCGSERIAL, &s) is executed, the setting returned in user space is '12' (125/10). When ioctl(fd, TIOCSSERIAL, &s) is then executed with the same setting '12', the value is interpreted as '120' which is different from the current setting and a EPERM error may be raised by set_serial_info() if !CAP_SYS_ADMIN. https://github.com/torvalds/linux/blob/master/drivers/usb/class/cdc-acm.c#L919 Fixes: ba2d8ce9db0a6 ("cdc-acm: implement TIOCSSERIAL to avoid blocking close(2)") Signed-off-by: Anthony Mallet Cc: stable Link: https://lore.kernel.org/r/20200312133101.7096-2-anthony.mallet@laas.fr Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/class/cdc-acm.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 74d0a91e8427..30a124b74d45 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -942,6 +942,7 @@ static int set_serial_info(struct acm *acm, { struct serial_struct new_serial; unsigned int closing_wait, close_delay; + unsigned int old_closing_wait, old_close_delay; int retval = 0; if (copy_from_user(&new_serial, newinfo, sizeof(new_serial))) @@ -952,18 +953,24 @@ static int set_serial_info(struct acm *acm, ASYNC_CLOSING_WAIT_NONE : msecs_to_jiffies(new_serial.closing_wait * 10); + /* we must redo the rounding here, so that the values match */ + old_close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; + old_closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? + ASYNC_CLOSING_WAIT_NONE : + jiffies_to_msecs(acm->port.closing_wait) / 10; + mutex_lock(&acm->port.mutex); - if (!capable(CAP_SYS_ADMIN)) { - if ((close_delay != acm->port.close_delay) || - (closing_wait != acm->port.closing_wait)) + if ((new_serial.close_delay != old_close_delay) || + (new_serial.closing_wait != old_closing_wait)) { + if (!capable(CAP_SYS_ADMIN)) retval = -EPERM; - else - retval = -EOPNOTSUPP; - } else { - acm->port.close_delay = close_delay; - acm->port.closing_wait = closing_wait; - } + else { + acm->port.close_delay = close_delay; + acm->port.closing_wait = closing_wait; + } + } else + retval = -EOPNOTSUPP; mutex_unlock(&acm->port.mutex); return retval; -- GitLab From c9a99f738913f03ab48ddd1220b651e71fbd9e79 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 10 Apr 2018 11:57:47 +0300 Subject: [PATCH 0608/1278] iio: adc: at91-sama5d2_adc: fix channel configuration for differential channels [ Upstream commit f0c8d1f6dc8eac5a1fbf441c8e080721a7b6c0ff ] When iterating through the channels, the index in the array is not the scan index. Added an xlate function to translate to the proper index. The result of the bug is that the channel array is indexed with a wrong index, thus instead of the proper channel, we access invalid memory, which may lead to invalid results and/or corruption. This will be used also for devicetree channel xlate. Fixes: 5e1a1da0f ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Fixes: 073c66201 ("iio: adc: at91-sama5d2_adc: add support for DMA") Signed-off-by: Eugen Hristev Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/at91-sama5d2_adc.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index a70ef7fec95f..0898f40c2b89 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -300,6 +300,27 @@ static const struct iio_chan_spec at91_adc_channels[] = { + AT91_SAMA5D2_DIFF_CHAN_CNT + 1), }; +static int at91_adc_chan_xlate(struct iio_dev *indio_dev, int chan) +{ + int i; + + for (i = 0; i < indio_dev->num_channels; i++) { + if (indio_dev->channels[i].scan_index == chan) + return i; + } + return -EINVAL; +} + +static inline struct iio_chan_spec const * +at91_adc_chan_get(struct iio_dev *indio_dev, int chan) +{ + int index = at91_adc_chan_xlate(indio_dev, chan); + + if (index < 0) + return NULL; + return indio_dev->channels + index; +} + static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) { struct iio_dev *indio = iio_trigger_get_drvdata(trig); @@ -317,8 +338,10 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) at91_adc_writel(st, AT91_SAMA5D2_TRGR, status); for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { - struct iio_chan_spec const *chan = indio->channels + bit; + struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit); + if (!chan) + continue; if (state) { at91_adc_writel(st, AT91_SAMA5D2_CHER, BIT(chan->channel)); @@ -398,8 +421,11 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p) u8 bit; for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { - struct iio_chan_spec const *chan = indio->channels + bit; + struct iio_chan_spec const *chan = + at91_adc_chan_get(indio, bit); + if (!chan) + continue; st->buffer[i] = at91_adc_readl(st, chan->address); i++; } -- GitLab From 9d25ef1abe73f42c6cf40ca6505dee606742ec8b Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 28 Jan 2020 12:57:39 +0000 Subject: [PATCH 0609/1278] iio: adc: at91-sama5d2_adc: fix differential channels in triggered mode [ Upstream commit a500f3bd787f8224341e44b238f318c407b10897 ] The differential channels require writing the channel offset register (COR). Otherwise they do not work in differential mode. The configuration of COR is missing in triggered mode. Fixes: 5e1a1da0f8c9 ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Signed-off-by: Eugen Hristev Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/at91-sama5d2_adc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 0898f40c2b89..34639ee2d2ce 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -339,9 +339,24 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit); + u32 cor; if (!chan) continue; + if (state) { + cor = at91_adc_readl(st, AT91_SAMA5D2_COR); + + if (chan->differential) + cor |= (BIT(chan->channel) | + BIT(chan->channel2)) << + AT91_SAMA5D2_COR_DIFF_OFFSET; + else + cor &= ~(BIT(chan->channel) << + AT91_SAMA5D2_COR_DIFF_OFFSET); + + at91_adc_writel(st, AT91_SAMA5D2_COR, cor); + } + if (state) { at91_adc_writel(st, AT91_SAMA5D2_CHER, BIT(chan->channel)); -- GitLab From dafda49dfbf600de7d8d68d79b7b36df59c84584 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 11 Mar 2020 12:41:21 -0700 Subject: [PATCH 0610/1278] kbuild: Disable -Wpointer-to-enum-cast commit 82f2bc2fcc0160d6f82dd1ac64518ae0a4dd183f upstream. Clang's -Wpointer-to-int-cast deviates from GCC in that it warns when casting to enums. The kernel does this in certain places, such as device tree matches to set the version of the device being used, which allows the kernel to avoid using a gigantic union. https://elixir.bootlin.com/linux/v5.5.8/source/drivers/ata/ahci_brcm.c#L428 https://elixir.bootlin.com/linux/v5.5.8/source/drivers/ata/ahci_brcm.c#L402 https://elixir.bootlin.com/linux/v5.5.8/source/include/linux/mod_devicetable.h#L264 To avoid a ton of false positive warnings, disable this particular part of the warning, which has been split off into a separate diagnostic so that the entire warning does not need to be turned off for clang. It will be visible under W=1 in case people want to go about fixing these easily and enabling the warning treewide. Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/887 Link: https://github.com/llvm/llvm-project/commit/2a41b31fcdfcb67ab7038fc2ffb606fd50b83a84 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Makefile.extrawarn | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 8d5357053f86..486e135d3e30 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -72,5 +72,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format) KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare) KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length) KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized) +KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) endif endif -- GitLab From e52694b56eb6d4b1fe424bda6126b8ce13c246a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Mar 2020 11:28:31 +0100 Subject: [PATCH 0611/1278] futex: Fix inode life-time issue commit 8019ad13ef7f64be44d4f892af9c840179009254 upstream. As reported by Jann, ihold() does not in fact guarantee inode persistence. And instead of making it so, replace the usage of inode pointers with a per boot, machine wide, unique inode identifier. This sequence number is global, but shared (file backed) futexes are rare enough that this should not become a performance issue. Reported-by: Jann Horn Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 1 + include/linux/fs.h | 1 + include/linux/futex.h | 17 +++++---- kernel/futex.c | 89 ++++++++++++++++++++++++++----------------- 4 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index d2a700c5efce..17172b616d22 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -135,6 +135,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &no_open_fops; diff --git a/include/linux/fs.h b/include/linux/fs.h index f0fddf4ea828..a94642fcf93b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -645,6 +645,7 @@ struct inode { struct rcu_head i_rcu; }; u64 i_version; + atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; diff --git a/include/linux/futex.h b/include/linux/futex.h index a4b6cba699bf..6adb1ccac603 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -34,23 +34,26 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, union futex_key { struct { + u64 i_seq; unsigned long pgoff; - struct inode *inode; - int offset; + unsigned int offset; } shared; struct { + union { + struct mm_struct *mm; + u64 __tmp; + }; unsigned long address; - struct mm_struct *mm; - int offset; + unsigned int offset; } private; struct { + u64 ptr; unsigned long word; - void *ptr; - int offset; + unsigned int offset; } both; }; -#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #ifdef CONFIG_FUTEX enum { diff --git a/kernel/futex.c b/kernel/futex.c index f5aae14c247b..3f1b7c003b70 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -445,7 +445,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - ihold(key->shared.inode); /* implies smp_mb(); (B) */ + smp_mb(); /* explicit smp_mb(); (B) */ break; case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies smp_mb(); (B) */ @@ -479,7 +479,6 @@ static void drop_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - iput(key->shared.inode); break; case FUT_OFF_MMSHARED: mmdrop(key->private.mm); @@ -487,6 +486,46 @@ static void drop_futex_key_refs(union futex_key *key) } } +/* + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. + */ +static u64 get_inode_sequence_number(struct inode *inode) +{ + static atomic64_t i_seq; + u64 old; + + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; + + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; + } +} + /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex @@ -499,9 +538,15 @@ static void drop_futex_key_refs(union futex_key *key) * * The key words are stored in @key on success. * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page). For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + * ( inode->i_sequence, page->index, offset_within_page ) + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + * ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ @@ -641,8 +686,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ - } else { struct inode *inode; @@ -674,40 +717,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) goto again; } - /* - * Take a reference unless it is about to be freed. Previously - * this reference was taken by ihold under the page lock - * pinning the inode in place so i_lock was unnecessary. The - * only way for this check to fail is if the inode was - * truncated in parallel which is almost certainly an - * application bug. In such a case, just retry. - * - * We are not calling into get_futex_key_refs() in file-backed - * cases, therefore a successful atomic_inc return below will - * guarantee that get_futex_key() will still imply smp_mb(); (B). - */ - if (!atomic_inc_not_zero(&inode->i_count)) { - rcu_read_unlock(); - put_page(page); - - goto again; - } - - /* Should be impossible but lets be paranoid for now */ - if (WARN_ON_ONCE(inode->i_mapping != mapping)) { - err = -EFAULT; - rcu_read_unlock(); - iput(inode); - - goto out; - } - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = inode; + key->shared.i_seq = get_inode_sequence_number(inode); key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + out: put_page(page); return err; -- GitLab From 25963a6606d3180b308425a402075a4bb9d07e01 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 8 Mar 2020 19:07:17 +0100 Subject: [PATCH 0612/1278] futex: Unbreak futex hashing commit 8d67743653dce5a0e7aa500fcccb237cde7ad88e upstream. The recent futex inode life time fix changed the ordering of the futex key union struct members, but forgot to adjust the hash function accordingly, As a result the hashing omits the leading 64bit and even hashes beyond the futex key causing a bad hash distribution which led to a ~100% performance regression. Hand in the futex key pointer instead of a random struct member and make the size calculation based of the struct offset. Fixes: 8019ad13ef7f ("futex: Fix inode life-time issue") Reported-by: Rong Chen Decoded-by: Linus Torvalds Signed-off-by: Thomas Gleixner Tested-by: Rong Chen Link: https://lkml.kernel.org/r/87h7yy90ve.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 3f1b7c003b70..2921ebaa1467 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -401,9 +401,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, - (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); + return &futex_queues[hash & (futex_hashsize - 1)]; } -- GitLab From afed1a4dbb76c81900f10fd77397fb91ad442702 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 23 Mar 2020 16:21:31 -0400 Subject: [PATCH 0613/1278] Revert "vrf: mark skb for multicast or link-local as enslaved to VRF" This reverts commit 2271c9500434af2a26b2c9eadeb3c0b075409fb5. This patch shouldn't have been backported to 4.14. Signed-off-by: Sasha Levin --- drivers/net/vrf.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e0cea5c05f0e..03e4fcdfeab7 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -996,23 +996,24 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; - bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); - bool is_ndisc = ipv6_ndisc_frame(skb); + bool need_strict; - /* loopback, multicast & non-ND link-local traffic; do not push through - * packet taps again. Reset pkt_type for upper layers to process skb + /* loopback traffic; do not push through packet taps again. + * Reset pkt_type for upper layers to process skb */ - if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { + if (skb->pkt_type == PACKET_LOOPBACK) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; - if (skb->pkt_type == PACKET_LOOPBACK) - skb->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; goto out; } - /* if packet is NDISC then keep the ingress interface */ - if (!is_ndisc) { + /* if packet is NDISC or addressed to multicast or link-local + * then keep the ingress interface + */ + need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); + if (!ipv6_ndisc_frame(skb) && !need_strict) { vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; -- GitLab From c4ad116663f3d589d39ceb382dfcafb8feae2960 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 23 Mar 2020 16:21:45 -0400 Subject: [PATCH 0614/1278] Revert "ipv6: Fix handling of LLA with VRF and sockets bound to VRF" This reverts commit 0293f8d1bdd21b3eb71032edb5832f9090dea48e. This patch shouldn't have been backported to 4.14. Signed-off-by: Sasha Levin --- net/ipv6/tcp_ipv6.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5ec73cf386df..7b4ce3f9e2f4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -718,7 +718,6 @@ static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { - bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk_listener); @@ -726,7 +725,7 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ - if ((!sk_listener->sk_bound_dev_if || l3_slave) && + if (!sk_listener->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); -- GitLab From 1a2a8799c0df449af1c48d64aed6724117080f02 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 11 Mar 2020 14:13:28 +0800 Subject: [PATCH 0615/1278] ALSA: hda/realtek: Fix pop noise on ALC225 commit 3b36b13d5e69d6f51ff1c55d1b404a74646c9757 upstream. Commit 317d9313925c ("ALSA: hda/realtek - Set default power save node to 0") makes the ALC225 have pop noise on S3 resume and cold boot. So partially revert this commit for ALC225 to fix the regression. Fixes: 317d9313925c ("ALSA: hda/realtek - Set default power save node to 0") BugLink: https://bugs.launchpad.net/bugs/1866357 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200311061328.17614-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5a7afbeb612d..b500dad33ea9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4687,6 +4687,8 @@ static void alc_determine_headset_type(struct hda_codec *codec) is_ctia = (val & 0x1c02) == 0x1c02; break; case 0x10ec0225: + codec->power_save_node = 1; + /* fall through */ case 0x10ec0295: case 0x10ec0299: alc_process_coef_fw(codec, alc225_pre_hsmode); -- GitLab From 194fb5c02e6b4cf5deabe7f53a761cebc3450b43 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 11 Mar 2020 17:12:44 +0000 Subject: [PATCH 0616/1278] arm64: smp: fix smp_send_stop() behaviour commit d0bab0c39e32d39a8c5cddca72e5b4a3059fe050 upstream. On a system with only one CPU online, when another one CPU panics while starting-up, smp_send_stop() will fail to send any STOP message to the other already online core, resulting in a system still responsive and alive at the end of the panic procedure. [ 186.700083] CPU3: shutdown [ 187.075462] CPU2: shutdown [ 187.162869] CPU1: shutdown [ 188.689998] ------------[ cut here ]------------ [ 188.691645] kernel BUG at arch/arm64/kernel/cpufeature.c:886! [ 188.692079] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 188.692444] Modules linked in: [ 188.693031] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.6.0-rc4-00001-g338d25c35a98 #104 [ 188.693175] Hardware name: Foundation-v8A (DT) [ 188.693492] pstate: 200001c5 (nzCv dAIF -PAN -UAO) [ 188.694183] pc : has_cpuid_feature+0xf0/0x348 [ 188.694311] lr : verify_local_elf_hwcaps+0x84/0xe8 [ 188.694410] sp : ffff800011b1bf60 [ 188.694536] x29: ffff800011b1bf60 x28: 0000000000000000 [ 188.694707] x27: 0000000000000000 x26: 0000000000000000 [ 188.694801] x25: 0000000000000000 x24: ffff80001189a25c [ 188.694905] x23: 0000000000000000 x22: 0000000000000000 [ 188.694996] x21: ffff8000114aa018 x20: ffff800011156a38 [ 188.695089] x19: ffff800010c944a0 x18: 0000000000000004 [ 188.695187] x17: 0000000000000000 x16: 0000000000000000 [ 188.695280] x15: 0000249dbde5431e x14: 0262cbe497efa1fa [ 188.695371] x13: 0000000000000002 x12: 0000000000002592 [ 188.695472] x11: 0000000000000080 x10: 00400032b5503510 [ 188.695572] x9 : 0000000000000000 x8 : ffff800010c80204 [ 188.695659] x7 : 00000000410fd0f0 x6 : 0000000000000001 [ 188.695750] x5 : 00000000410fd0f0 x4 : 0000000000000000 [ 188.695836] x3 : 0000000000000000 x2 : ffff8000100939d8 [ 188.695919] x1 : 0000000000180420 x0 : 0000000000180480 [ 188.696253] Call trace: [ 188.696410] has_cpuid_feature+0xf0/0x348 [ 188.696504] verify_local_elf_hwcaps+0x84/0xe8 [ 188.696591] check_local_cpu_capabilities+0x44/0x128 [ 188.696666] secondary_start_kernel+0xf4/0x188 [ 188.697150] Code: 52805001 72a00301 6b01001f 54000ec0 (d4210000) [ 188.698639] ---[ end trace 3f12ca47652f7b72 ]--- [ 188.699160] Kernel panic - not syncing: Attempted to kill the idle task! [ 188.699546] Kernel Offset: disabled [ 188.699828] CPU features: 0x00004,20c02008 [ 188.700012] Memory Limit: none [ 188.700538] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- [root@arch ~]# echo Helo Helo [root@arch ~]# cat /proc/cpuinfo | grep proce processor : 0 Make smp_send_stop() account also for the online status of the calling CPU while evaluating how many CPUs are effectively online: this way, the right number of STOPs is sent, so enforcing a proper freeze of the system at the end of panic even under the above conditions. Fixes: 08e875c16a16c ("arm64: SMP support") Reported-by: Dave Martin Acked-by: Mark Rutland Signed-off-by: Cristian Marussi Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/smp.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 909bf3926fd2..c5e20dd48560 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -913,11 +913,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + void smp_send_stop(void) { unsigned long timeout; - if (num_online_cpus() > 1) { + if (num_other_online_cpus()) { cpumask_t mask; cpumask_copy(&mask, cpu_online_mask); @@ -930,10 +941,10 @@ void smp_send_stop(void) /* Wait up to one second for other CPUs to stop */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (num_other_online_cpus() && timeout--) udelay(1); - if (num_online_cpus() > 1) + if (num_other_online_cpus()) pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(cpu_online_mask)); } -- GitLab From 2742449ee97b78597f0bc723fd123e1a85e0a17a Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 11 Mar 2020 17:12:45 +0000 Subject: [PATCH 0617/1278] arm64: smp: fix crash_smp_send_stop() behaviour commit f50b7dacccbab2b9e3ef18f52a6dcc18ed2050b9 upstream. On a system configured to trigger a crash_kexec() reboot, when only one CPU is online and another CPU panics while starting-up, crash_smp_send_stop() will fail to send any STOP message to the other already online core, resulting in fail to freeze and registers not properly saved. Moreover even if the proper messages are sent (case CPUs > 2) it will similarly fail to account for the booting CPU when executing the final stop wait-loop, so potentially resulting in some CPU not been waited for shutdown before rebooting. A tangible effect of this behaviour can be observed when, after a panic with kexec enabled and loaded, on the following reboot triggered by kexec, the cpu that could not be successfully stopped fails to come back online: [ 362.291022] ------------[ cut here ]------------ [ 362.291525] kernel BUG at arch/arm64/kernel/cpufeature.c:886! [ 362.292023] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 362.292400] Modules linked in: [ 362.292970] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Not tainted 5.6.0-rc4-00003-gc780b890948a #105 [ 362.293136] Hardware name: Foundation-v8A (DT) [ 362.293382] pstate: 200001c5 (nzCv dAIF -PAN -UAO) [ 362.294063] pc : has_cpuid_feature+0xf0/0x348 [ 362.294177] lr : verify_local_elf_hwcaps+0x84/0xe8 [ 362.294280] sp : ffff800011b1bf60 [ 362.294362] x29: ffff800011b1bf60 x28: 0000000000000000 [ 362.294534] x27: 0000000000000000 x26: 0000000000000000 [ 362.294631] x25: 0000000000000000 x24: ffff80001189a25c [ 362.294718] x23: 0000000000000000 x22: 0000000000000000 [ 362.294803] x21: ffff8000114aa018 x20: ffff800011156a00 [ 362.294897] x19: ffff800010c944a0 x18: 0000000000000004 [ 362.294987] x17: 0000000000000000 x16: 0000000000000000 [ 362.295073] x15: 00004e53b831ae3c x14: 00004e53b831ae3c [ 362.295165] x13: 0000000000000384 x12: 0000000000000000 [ 362.295251] x11: 0000000000000000 x10: 00400032b5503510 [ 362.295334] x9 : 0000000000000000 x8 : ffff800010c7e204 [ 362.295426] x7 : 00000000410fd0f0 x6 : 0000000000000001 [ 362.295508] x5 : 00000000410fd0f0 x4 : 0000000000000000 [ 362.295592] x3 : 0000000000000000 x2 : ffff8000100939d8 [ 362.295683] x1 : 0000000000180420 x0 : 0000000000180480 [ 362.296011] Call trace: [ 362.296257] has_cpuid_feature+0xf0/0x348 [ 362.296350] verify_local_elf_hwcaps+0x84/0xe8 [ 362.296424] check_local_cpu_capabilities+0x44/0x128 [ 362.296497] secondary_start_kernel+0xf4/0x188 [ 362.296998] Code: 52805001 72a00301 6b01001f 54000ec0 (d4210000) [ 362.298652] SMP: stopping secondary CPUs [ 362.300615] Starting crashdump kernel... [ 362.301168] Bye! [ 0.000000] Booting Linux on physical CPU 0x0000000003 [0x410fd0f0] [ 0.000000] Linux version 5.6.0-rc4-00003-gc780b890948a (crimar01@e120937-lin) (gcc version 8.3.0 (GNU Toolchain for the A-profile Architecture 8.3-2019.03 (arm-rel-8.36))) #105 SMP PREEMPT Fri Mar 6 17:00:42 GMT 2020 [ 0.000000] Machine model: Foundation-v8A [ 0.000000] earlycon: pl11 at MMIO 0x000000001c090000 (options '') [ 0.000000] printk: bootconsole [pl11] enabled ..... [ 0.138024] rcu: Hierarchical SRCU implementation. [ 0.153472] its@2f020000: unable to locate ITS domain [ 0.154078] its@2f020000: Unable to locate ITS domain [ 0.157541] EFI services will not be available. [ 0.175395] smp: Bringing up secondary CPUs ... [ 0.209182] psci: failed to boot CPU1 (-22) [ 0.209377] CPU1: failed to boot: -22 [ 0.274598] Detected PIPT I-cache on CPU2 [ 0.278707] GICv3: CPU2: found redistributor 1 region 0:0x000000002f120000 [ 0.285212] CPU2: Booted secondary processor 0x0000000001 [0x410fd0f0] [ 0.369053] Detected PIPT I-cache on CPU3 [ 0.372947] GICv3: CPU3: found redistributor 2 region 0:0x000000002f140000 [ 0.378664] CPU3: Booted secondary processor 0x0000000002 [0x410fd0f0] [ 0.401707] smp: Brought up 1 node, 3 CPUs [ 0.404057] SMP: Total of 3 processors activated. Make crash_smp_send_stop() account also for the online status of the calling CPU while evaluating how many CPUs are effectively online: this way the right number of STOPs is sent and all other stopped-cores's registers are properly saved. Fixes: 78fd584cdec05 ("arm64: kdump: implement machine_crash_shutdown()") Acked-by: Mark Rutland Signed-off-by: Cristian Marussi Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/smp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index c5e20dd48560..cfb5a6ad7865 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -965,13 +965,17 @@ void crash_smp_send_stop(void) cpus_stopped = 1; - if (num_online_cpus() == 1) + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) return; cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_CRASH_STOP); -- GitLab From bff9bc14148c35e59260449f6a1d0ebe6e8fde1b Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 5 Mar 2020 00:25:09 +0100 Subject: [PATCH 0618/1278] drm/bridge: dw-hdmi: fix AVI frame colorimetry commit e8dca30f7118461d47e1c3510d0e31b277439151 upstream. CTA-861-F explicitly states that for RGB colorspace colorimetry should be set to "none". Fix that. Acked-by: Laurent Pinchart Fixes: def23aa7e982 ("drm: bridge: dw-hdmi: Switch to V4L bus format and encodings") Signed-off-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/20200304232512.51616-2-jernej.skrabec@siol.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 46 +++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index cc1094f90125..96cf64d0ee82 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1348,28 +1348,34 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode) frame.colorspace = HDMI_COLORSPACE_RGB; /* Set up colorimetry */ - switch (hdmi->hdmi_data.enc_out_encoding) { - case V4L2_YCBCR_ENC_601: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else + if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { + switch (hdmi->hdmi_data.enc_out_encoding) { + case V4L2_YCBCR_ENC_601: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + case V4L2_YCBCR_ENC_709: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_709; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; + break; + default: /* Carries no data */ frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + } + } else { + frame.colorimetry = HDMI_COLORIMETRY_NONE; frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; - case V4L2_YCBCR_ENC_709: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else - frame.colorimetry = HDMI_COLORIMETRY_ITU_709; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; - break; - default: /* Carries no data */ - frame.colorimetry = HDMI_COLORIMETRY_ITU_601; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; } frame.scan_mode = HDMI_SCAN_MODE_NONE; -- GitLab From dacad5570ef403172e3cc2e01b6e51ae99591e5d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Mar 2020 12:01:50 +0100 Subject: [PATCH 0619/1278] staging: greybus: loopback_test: fix potential path truncation commit f16023834863932f95dfad13fac3fc47f77d2f29 upstream. Newer GCC warns about a possible truncation of a generated sysfs path name as we're concatenating a directory path with a file name and placing the result in a buffer that is half the size of the maximum length of the directory path (which is user controlled). loopback_test.c: In function 'open_poll_files': loopback_test.c:651:31: warning: '%s' directive output may be truncated writing up to 511 bytes into a region of size 255 [-Wformat-truncation=] 651 | snprintf(buf, sizeof(buf), "%s%s", dev->sysfs_entry, "iteration_count"); | ^~ loopback_test.c:651:3: note: 'snprintf' output between 16 and 527 bytes into a destination of size 255 651 | snprintf(buf, sizeof(buf), "%s%s", dev->sysfs_entry, "iteration_count"); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by making sure the buffer is large enough the concatenated strings. Fixes: 6b0658f68786 ("greybus: tools: Add tools directory to greybus repo and add loopback") Fixes: 9250c0ee2626 ("greybus: Loopback_test: use poll instead of inotify") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200312110151.22028-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index fbe589fca840..ddea50523d56 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -644,7 +644,7 @@ int find_loopback_devices(struct loopback_test *t) static int open_poll_files(struct loopback_test *t) { struct loopback_device *dev; - char buf[MAX_STR_LEN]; + char buf[MAX_SYSFS_PATH + MAX_STR_LEN]; char dummy; int fds_idx = 0; int i; -- GitLab From e3bc8d886b40801abde9e01b85157994171be3bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Mar 2020 12:01:51 +0100 Subject: [PATCH 0620/1278] staging: greybus: loopback_test: fix potential path truncations commit ae62cf5eb2792d9a818c2d93728ed92119357017 upstream. Newer GCC warns about possible truncations of two generated path names as we're concatenating the configurable sysfs and debugfs path prefixes with a filename and placing the results in buffers of the same size as the maximum length of the prefixes. snprintf(d->name, MAX_STR_LEN, "gb_loopback%u", dev_id); snprintf(d->sysfs_entry, MAX_SYSFS_PATH, "%s%s/", t->sysfs_prefix, d->name); snprintf(d->debugfs_entry, MAX_SYSFS_PATH, "%sraw_latency_%s", t->debugfs_prefix, d->name); Fix this by separating the maximum path length from the maximum prefix length and reducing the latter enough to fit the generated strings. Note that we also need to reduce the device-name buffer size as GCC isn't smart enough to figure out that we ever only used MAX_STR_LEN bytes of it. Fixes: 6b0658f68786 ("greybus: tools: Add tools directory to greybus repo and add loopback") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200312110151.22028-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index ddea50523d56..b6aa70b94f33 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -20,6 +20,7 @@ #include #define MAX_NUM_DEVICES 10 +#define MAX_SYSFS_PREFIX 0x80 #define MAX_SYSFS_PATH 0x200 #define CSV_MAX_LINE 0x1000 #define SYSFS_MAX_INT 0x20 @@ -68,7 +69,7 @@ struct loopback_results { }; struct loopback_device { - char name[MAX_SYSFS_PATH]; + char name[MAX_STR_LEN]; char sysfs_entry[MAX_SYSFS_PATH]; char debugfs_entry[MAX_SYSFS_PATH]; struct loopback_results results; @@ -94,8 +95,8 @@ struct loopback_test { int stop_all; int poll_count; char test_name[MAX_STR_LEN]; - char sysfs_prefix[MAX_SYSFS_PATH]; - char debugfs_prefix[MAX_SYSFS_PATH]; + char sysfs_prefix[MAX_SYSFS_PREFIX]; + char debugfs_prefix[MAX_SYSFS_PREFIX]; struct timespec poll_timeout; struct loopback_device devices[MAX_NUM_DEVICES]; struct loopback_results aggregate_results; @@ -914,10 +915,10 @@ int main(int argc, char *argv[]) t.iteration_max = atoi(optarg); break; case 'S': - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'D': - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'm': t.mask = atol(optarg); @@ -968,10 +969,10 @@ int main(int argc, char *argv[]) } if (!strcmp(t.sysfs_prefix, "")) - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", sysfs_prefix); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", sysfs_prefix); if (!strcmp(t.debugfs_prefix, "")) - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", debugfs_prefix); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", debugfs_prefix); ret = find_loopback_devices(&t); if (ret) -- GitLab From 0207ef3379517de58c1f9d6cbb894145c94cc7fa Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 28 Nov 2018 16:00:05 -0500 Subject: [PATCH 0621/1278] Revert "drm/dp_mst: Skip validating ports during destruction, just ref" commit 9765635b30756eb74e05e260ac812659c296cd28 upstream. This reverts commit: c54c7374ff44 ("drm/dp_mst: Skip validating ports during destruction, just ref") ugh. In drm_dp_destroy_connector_work(), we have a pretty good chance of freeing the actual struct drm_dp_mst_port. However, after destroying things we send a hotplug through (*mgr->cbs->hotplug)(mgr) which is where the problems start. For i915, this calls all the way down to the fbcon probing helpers, which start trying to access the port in a modeset. [ 45.062001] ================================================================== [ 45.062112] BUG: KASAN: use-after-free in ex_handler_refcount+0x146/0x180 [ 45.062196] Write of size 4 at addr ffff8882b4b70968 by task kworker/3:1/53 [ 45.062325] CPU: 3 PID: 53 Comm: kworker/3:1 Kdump: loaded Tainted: G O 4.20.0-rc4Lyude-Test+ #3 [ 45.062442] Hardware name: LENOVO 20BWS1KY00/20BWS1KY00, BIOS JBET71WW (1.35 ) 09/14/2018 [ 45.062554] Workqueue: events drm_dp_destroy_connector_work [drm_kms_helper] [ 45.062641] Call Trace: [ 45.062685] dump_stack+0xbd/0x15a [ 45.062735] ? dump_stack_print_info.cold.0+0x1b/0x1b [ 45.062801] ? printk+0x9f/0xc5 [ 45.062847] ? kmsg_dump_rewind_nolock+0xe4/0xe4 [ 45.062909] ? ex_handler_refcount+0x146/0x180 [ 45.062970] print_address_description+0x71/0x239 [ 45.063036] ? ex_handler_refcount+0x146/0x180 [ 45.063095] kasan_report.cold.5+0x242/0x30b [ 45.063155] __asan_report_store4_noabort+0x1c/0x20 [ 45.063313] ex_handler_refcount+0x146/0x180 [ 45.063371] ? ex_handler_clear_fs+0xb0/0xb0 [ 45.063428] fixup_exception+0x98/0xd7 [ 45.063484] ? raw_notifier_call_chain+0x20/0x20 [ 45.063548] do_trap+0x6d/0x210 [ 45.063605] ? _GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper] [ 45.063732] do_error_trap+0xc0/0x170 [ 45.063802] ? _GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper] [ 45.063929] do_invalid_op+0x3b/0x50 [ 45.063997] ? _GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper] [ 45.064103] invalid_op+0x14/0x20 [ 45.064162] RIP: 0010:_GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper] [ 45.064274] Code: 00 48 c7 c7 80 fe 53 a0 48 89 e5 e8 5b 6f 26 e1 5d c3 48 8d 0e 0f 0b 48 8d 0b 0f 0b 48 8d 0f 0f 0b 48 8d 0f 0f 0b 49 8d 4d 00 <0f> 0b 49 8d 0e 0f 0b 48 8d 08 0f 0b 49 8d 4d 00 0f 0b 48 8d 0b 0f [ 45.064569] RSP: 0018:ffff8882b789ee10 EFLAGS: 00010282 [ 45.064637] RAX: ffff8882af47ae70 RBX: ffff8882af47aa60 RCX: ffff8882b4b70968 [ 45.064723] RDX: ffff8882af47ae70 RSI: 0000000000000008 RDI: ffff8882b788bdb8 [ 45.064808] RBP: ffff8882b789ee28 R08: ffffed1056f13db4 R09: ffffed1056f13db3 [ 45.064894] R10: ffffed1056f13db3 R11: ffff8882b789ed9f R12: ffff8882af47ad28 [ 45.064980] R13: ffff8882b4b70968 R14: ffff8882acd86728 R15: ffff8882b4b75dc8 [ 45.065084] drm_dp_mst_reset_vcpi_slots+0x12/0x80 [drm_kms_helper] [ 45.065225] intel_mst_disable_dp+0xda/0x180 [i915] [ 45.065361] intel_encoders_disable.isra.107+0x197/0x310 [i915] [ 45.065498] haswell_crtc_disable+0xbe/0x400 [i915] [ 45.065622] ? i9xx_disable_plane+0x1c0/0x3e0 [i915] [ 45.065750] intel_atomic_commit_tail+0x74e/0x3e60 [i915] [ 45.065884] ? intel_pre_plane_update+0xbc0/0xbc0 [i915] [ 45.065968] ? drm_atomic_helper_swap_state+0x88b/0x1d90 [drm_kms_helper] [ 45.066054] ? kasan_check_write+0x14/0x20 [ 45.066165] ? i915_gem_track_fb+0x13a/0x330 [i915] [ 45.066277] ? i915_sw_fence_complete+0xe9/0x140 [i915] [ 45.066406] ? __i915_sw_fence_complete+0xc50/0xc50 [i915] [ 45.066540] intel_atomic_commit+0x72e/0xef0 [i915] [ 45.066635] ? drm_dev_dbg+0x200/0x200 [drm] [ 45.066764] ? intel_atomic_commit_tail+0x3e60/0x3e60 [i915] [ 45.066898] ? intel_atomic_commit_tail+0x3e60/0x3e60 [i915] [ 45.067001] drm_atomic_commit+0xc4/0xf0 [drm] [ 45.067074] restore_fbdev_mode_atomic+0x562/0x780 [drm_kms_helper] [ 45.067166] ? drm_fb_helper_debug_leave+0x690/0x690 [drm_kms_helper] [ 45.067249] ? kasan_check_read+0x11/0x20 [ 45.067324] restore_fbdev_mode+0x127/0x4b0 [drm_kms_helper] [ 45.067364] ? kasan_check_read+0x11/0x20 [ 45.067406] drm_fb_helper_restore_fbdev_mode_unlocked+0x164/0x200 [drm_kms_helper] [ 45.067462] ? drm_fb_helper_hotplug_event+0x30/0x30 [drm_kms_helper] [ 45.067508] ? kasan_check_write+0x14/0x20 [ 45.070360] ? mutex_unlock+0x22/0x40 [ 45.073748] drm_fb_helper_set_par+0xb2/0xf0 [drm_kms_helper] [ 45.075846] drm_fb_helper_hotplug_event.part.33+0x1cd/0x290 [drm_kms_helper] [ 45.078088] drm_fb_helper_hotplug_event+0x1c/0x30 [drm_kms_helper] [ 45.082614] intel_fbdev_output_poll_changed+0x9f/0x140 [i915] [ 45.087069] drm_kms_helper_hotplug_event+0x67/0x90 [drm_kms_helper] [ 45.089319] intel_dp_mst_hotplug+0x37/0x50 [i915] [ 45.091496] drm_dp_destroy_connector_work+0x510/0x6f0 [drm_kms_helper] [ 45.093675] ? drm_dp_update_payload_part1+0x1220/0x1220 [drm_kms_helper] [ 45.095851] ? kasan_check_write+0x14/0x20 [ 45.098473] ? kasan_check_read+0x11/0x20 [ 45.101155] ? strscpy+0x17c/0x530 [ 45.103808] ? __switch_to_asm+0x34/0x70 [ 45.106456] ? syscall_return_via_sysret+0xf/0x7f [ 45.109711] ? read_word_at_a_time+0x20/0x20 [ 45.113138] ? __switch_to_asm+0x40/0x70 [ 45.116529] ? __switch_to_asm+0x34/0x70 [ 45.119891] ? __switch_to_asm+0x40/0x70 [ 45.123224] ? __switch_to_asm+0x34/0x70 [ 45.126540] ? __switch_to_asm+0x34/0x70 [ 45.129824] process_one_work+0x88d/0x15d0 [ 45.133172] ? pool_mayday_timeout+0x850/0x850 [ 45.136459] ? pci_mmcfg_check_reserved+0x110/0x128 [ 45.139739] ? wake_q_add+0xb0/0xb0 [ 45.143010] ? check_preempt_wakeup+0x652/0x1050 [ 45.146304] ? worker_enter_idle+0x29e/0x740 [ 45.149589] ? __schedule+0x1ec0/0x1ec0 [ 45.152937] ? kasan_check_read+0x11/0x20 [ 45.156179] ? _raw_spin_lock_irq+0xa3/0x130 [ 45.159382] ? _raw_read_unlock_irqrestore+0x30/0x30 [ 45.162542] ? kasan_check_write+0x14/0x20 [ 45.165657] worker_thread+0x1a5/0x1470 [ 45.168725] ? set_load_weight+0x2e0/0x2e0 [ 45.171755] ? process_one_work+0x15d0/0x15d0 [ 45.174806] ? __switch_to_asm+0x34/0x70 [ 45.177645] ? __switch_to_asm+0x40/0x70 [ 45.180323] ? __switch_to_asm+0x34/0x70 [ 45.182936] ? __switch_to_asm+0x40/0x70 [ 45.185539] ? __switch_to_asm+0x34/0x70 [ 45.188100] ? __switch_to_asm+0x40/0x70 [ 45.190628] ? __schedule+0x7d4/0x1ec0 [ 45.193143] ? save_stack+0xa9/0xd0 [ 45.195632] ? kasan_check_write+0x10/0x20 [ 45.198162] ? kasan_kmalloc+0xc4/0xe0 [ 45.200609] ? kmem_cache_alloc_trace+0xdd/0x190 [ 45.203046] ? kthread+0x9f/0x3b0 [ 45.205470] ? ret_from_fork+0x35/0x40 [ 45.207876] ? unwind_next_frame+0x43/0x50 [ 45.210273] ? __save_stack_trace+0x82/0x100 [ 45.212658] ? deactivate_slab.isra.67+0x3d4/0x580 [ 45.215026] ? default_wake_function+0x35/0x50 [ 45.217399] ? kasan_check_read+0x11/0x20 [ 45.219825] ? _raw_spin_lock_irqsave+0xae/0x140 [ 45.222174] ? __lock_text_start+0x8/0x8 [ 45.224521] ? replenish_dl_entity.cold.62+0x4f/0x4f [ 45.226868] ? __kthread_parkme+0x87/0xf0 [ 45.229200] kthread+0x2f7/0x3b0 [ 45.231557] ? process_one_work+0x15d0/0x15d0 [ 45.233923] ? kthread_park+0x120/0x120 [ 45.236249] ret_from_fork+0x35/0x40 [ 45.240875] Allocated by task 242: [ 45.243136] save_stack+0x43/0xd0 [ 45.245385] kasan_kmalloc+0xc4/0xe0 [ 45.247597] kmem_cache_alloc_trace+0xdd/0x190 [ 45.249793] drm_dp_add_port+0x1e0/0x2170 [drm_kms_helper] [ 45.252000] drm_dp_send_link_address+0x4a7/0x740 [drm_kms_helper] [ 45.254389] drm_dp_check_and_send_link_address+0x1a7/0x210 [drm_kms_helper] [ 45.256803] drm_dp_mst_link_probe_work+0x6f/0xb0 [drm_kms_helper] [ 45.259200] process_one_work+0x88d/0x15d0 [ 45.261597] worker_thread+0x1a5/0x1470 [ 45.264038] kthread+0x2f7/0x3b0 [ 45.266371] ret_from_fork+0x35/0x40 [ 45.270937] Freed by task 53: [ 45.273170] save_stack+0x43/0xd0 [ 45.275382] __kasan_slab_free+0x139/0x190 [ 45.277604] kasan_slab_free+0xe/0x10 [ 45.279826] kfree+0x99/0x1b0 [ 45.282044] drm_dp_free_mst_port+0x4a/0x60 [drm_kms_helper] [ 45.284330] drm_dp_destroy_connector_work+0x43e/0x6f0 [drm_kms_helper] [ 45.286660] process_one_work+0x88d/0x15d0 [ 45.288934] worker_thread+0x1a5/0x1470 [ 45.291231] kthread+0x2f7/0x3b0 [ 45.293547] ret_from_fork+0x35/0x40 [ 45.298206] The buggy address belongs to the object at ffff8882b4b70968 which belongs to the cache kmalloc-2k of size 2048 [ 45.303047] The buggy address is located 0 bytes inside of 2048-byte region [ffff8882b4b70968, ffff8882b4b71168) [ 45.308010] The buggy address belongs to the page: [ 45.310477] page:ffffea000ad2dc00 count:1 mapcount:0 mapping:ffff8882c080cf40 index:0x0 compound_mapcount: 0 [ 45.313051] flags: 0x8000000000010200(slab|head) [ 45.315635] raw: 8000000000010200 ffffea000aac2808 ffffea000abe8608 ffff8882c080cf40 [ 45.318300] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000 [ 45.320966] page dumped because: kasan: bad access detected [ 45.326312] Memory state around the buggy address: [ 45.329085] ffff8882b4b70800: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 45.331845] ffff8882b4b70880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 45.334584] >ffff8882b4b70900: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb [ 45.337302] ^ [ 45.340061] ffff8882b4b70980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 45.342910] ffff8882b4b70a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 45.345748] ================================================================== So, this definitely isn't a fix that we want. This being said; there's no real easy fix for this problem because of some of the catch-22's of the MST helpers current design. For starters; we always need to validate a port with drm_dp_get_validated_port_ref(), but validation relies on the lifetime of the port in the actual topology. So once the port is gone, it can't be validated again. If we were to try to make the payload helpers not use port validation, then we'd cause another problem: if the port isn't validated, it could be freed and we'd just start causing more KASAN issues. There are already hacks that attempt to workaround this in drm_dp_mst_destroy_connector_work() by re-initializing the kref so that it can be used again and it's memory can be freed once the VCPI helpers finish removing the port's respective payloads. But none of these really do anything helpful since the port still can't be validated since it's gone from the topology. Also, that workaround is immensely confusing to read through. What really needs to be done in order to fix this is to teach DRM how to track the lifetime of the structs for MST ports and branch devices separately from their lifetime in the actual topology. Simply put; this means having two different krefs-one that removes the port/branch device from the topology, and one that finally calls kfree(). This would let us simplify things, since we'd now be able to keep ports around without having to keep them in the topology at the same time, which is exactly what we need in order to teach our VCPI helpers to only validate ports when it's actually necessary without running the risk of trying to use unallocated memory. Such a fix is on it's way, but for now let's play it safe and just revert this. If this bug has been around for well over a year, we can wait a little while to get an actual proper fix here. Signed-off-by: Lyude Paul Fixes: c54c7374ff44 ("drm/dp_mst: Skip validating ports during destruction, just ref") Cc: Daniel Vetter Cc: Sean Paul Cc: Jerry Zuo Cc: Harry Wentland Cc: stable@vger.kernel.org # v4.6+ Acked-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20181128210005.24434-1-lyude@redhat.com Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 9d94c306c8ca..c8c83f84aced 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -982,20 +982,9 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_ static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port) { struct drm_dp_mst_port *rport = NULL; - mutex_lock(&mgr->lock); - /* - * Port may or may not be 'valid' but we don't care about that when - * destroying the port and we are guaranteed that the port pointer - * will be valid until we've finished - */ - if (current_work() == &mgr->destroy_connector_work) { - kref_get(&port->kref); - rport = port; - } else if (mgr->mst_primary) { - rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, - port); - } + if (mgr->mst_primary) + rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port); mutex_unlock(&mgr->lock); return rport; } -- GitLab From b597b0c42f6515203c3f678814612aa31dd96a16 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 21 Mar 2020 06:46:50 +0000 Subject: [PATCH 0622/1278] hsr: fix general protection fault in hsr_addr_is_self() [ Upstream commit 3a303cfdd28d5f930a307c82e8a9d996394d5ebd ] The port->hsr is used in the hsr_handle_frame(), which is a callback of rx_handler. hsr master and slaves are initialized in hsr_add_port(). This function initializes several pointers, which includes port->hsr after registering rx_handler. So, in the rx_handler routine, un-initialized pointer would be used. In order to fix this, pointers should be initialized before registering rx_handler. Test commands: ip netns del left ip netns del right modprobe -rv veth modprobe -rv hsr killall ping modprobe hsr ip netns add left ip netns add right ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link add veth4 type veth peer name veth5 ip link set veth1 netns left ip link set veth3 netns right ip link set veth4 netns left ip link set veth5 netns right ip link set veth0 up ip link set veth2 up ip link set veth0 address fc:00:00:00:00:01 ip link set veth2 address fc:00:00:00:00:02 ip netns exec left ip link set veth1 up ip netns exec left ip link set veth4 up ip netns exec right ip link set veth3 up ip netns exec right ip link set veth5 up ip link add hsr0 type hsr slave1 veth0 slave2 veth2 ip a a 192.168.100.1/24 dev hsr0 ip link set hsr0 up ip netns exec left ip link add hsr1 type hsr slave1 veth1 slave2 veth4 ip netns exec left ip a a 192.168.100.2/24 dev hsr1 ip netns exec left ip link set hsr1 up ip netns exec left ip n a 192.168.100.1 dev hsr1 lladdr \ fc:00:00:00:00:01 nud permanent ip netns exec left ip n r 192.168.100.1 dev hsr1 lladdr \ fc:00:00:00:00:01 nud permanent for i in {1..100} do ip netns exec left ping 192.168.100.1 & done ip netns exec left hping3 192.168.100.1 -2 --flood & ip netns exec right ip link add hsr2 type hsr slave1 veth3 slave2 veth5 ip netns exec right ip a a 192.168.100.3/24 dev hsr2 ip netns exec right ip link set hsr2 up ip netns exec right ip n a 192.168.100.1 dev hsr2 lladdr \ fc:00:00:00:00:02 nud permanent ip netns exec right ip n r 192.168.100.1 dev hsr2 lladdr \ fc:00:00:00:00:02 nud permanent for i in {1..100} do ip netns exec right ping 192.168.100.1 & done ip netns exec right hping3 192.168.100.1 -2 --flood & while : do ip link add hsr0 type hsr slave1 veth0 slave2 veth2 ip a a 192.168.100.1/24 dev hsr0 ip link set hsr0 up ip link del hsr0 done Splat looks like: [ 120.954938][ C0] general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1]I [ 120.957761][ C0] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] [ 120.959064][ C0] CPU: 0 PID: 1511 Comm: hping3 Not tainted 5.6.0-rc5+ #460 [ 120.960054][ C0] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 120.962261][ C0] RIP: 0010:hsr_addr_is_self+0x65/0x2a0 [hsr] [ 120.963149][ C0] Code: 44 24 18 70 73 2f c0 48 c1 eb 03 48 8d 04 13 c7 00 f1 f1 f1 f1 c7 40 04 00 f2 f2 f2 4 [ 120.966277][ C0] RSP: 0018:ffff8880d9c09af0 EFLAGS: 00010206 [ 120.967293][ C0] RAX: 0000000000000006 RBX: 1ffff1101b38135f RCX: 0000000000000000 [ 120.968516][ C0] RDX: dffffc0000000000 RSI: ffff8880d17cb208 RDI: 0000000000000000 [ 120.969718][ C0] RBP: 0000000000000030 R08: ffffed101b3c0e3c R09: 0000000000000001 [ 120.972203][ C0] R10: 0000000000000001 R11: ffffed101b3c0e3b R12: 0000000000000000 [ 120.973379][ C0] R13: ffff8880aaf80100 R14: ffff8880aaf800f2 R15: ffff8880aaf80040 [ 120.974410][ C0] FS: 00007f58e693f740(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 120.979794][ C0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 120.980773][ C0] CR2: 00007ffcb8b38f29 CR3: 00000000afe8e001 CR4: 00000000000606f0 [ 120.981945][ C0] Call Trace: [ 120.982411][ C0] [ 120.982848][ C0] ? hsr_add_node+0x8c0/0x8c0 [hsr] [ 120.983522][ C0] ? rcu_read_lock_held+0x90/0xa0 [ 120.984159][ C0] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 120.984944][ C0] hsr_handle_frame+0x1db/0x4e0 [hsr] [ 120.985597][ C0] ? hsr_nl_nodedown+0x2b0/0x2b0 [hsr] [ 120.986289][ C0] __netif_receive_skb_core+0x6bf/0x3170 [ 120.992513][ C0] ? check_chain_key+0x236/0x5d0 [ 120.993223][ C0] ? do_xdp_generic+0x1460/0x1460 [ 120.993875][ C0] ? register_lock_class+0x14d0/0x14d0 [ 120.994609][ C0] ? __netif_receive_skb_one_core+0x8d/0x160 [ 120.995377][ C0] __netif_receive_skb_one_core+0x8d/0x160 [ 120.996204][ C0] ? __netif_receive_skb_core+0x3170/0x3170 [ ... ] Reported-by: syzbot+fcf5dd39282ceb27108d@syzkaller.appspotmail.com Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_slave.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 5fee6ec7c93d..b215df0bce0e 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -152,16 +152,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, if (port == NULL) return -ENOMEM; + port->hsr = hsr; + port->dev = dev; + port->type = type; + if (type != HSR_PT_MASTER) { res = hsr_portdev_setup(dev, port); if (res) goto fail_dev_setup; } - port->hsr = hsr; - port->dev = dev; - port->type = type; - list_add_tail_rcu(&port->port_list, &hsr->ports); synchronize_rcu(); -- GitLab From dccb6b9bebdc2f5868c1860eea3010b0bab7166b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 22 Mar 2020 13:51:13 -0400 Subject: [PATCH 0623/1278] macsec: restrict to ethernet devices [ Upstream commit b06d072ccc4b1acd0147b17914b7ad1caa1818bb ] Only attach macsec to ethernet devices. Syzbot was able to trigger a KMSAN warning in macsec_handle_frame by attaching to a phonet device. Macvlan has a similar check in macvlan_port_create. v1->v2 - fix commit message typo Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c2c3ce5653db..5959e8817a1b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -3219,6 +3220,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_ETHER) + return -EINVAL; dev->priv_flags |= IFF_MACSEC; -- GitLab From c5980c71536ae46b69fada2ff4018afbaa088e4b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 22 Mar 2020 13:58:50 -0700 Subject: [PATCH 0624/1278] net: dsa: Fix duplicate frames flooded by learning [ Upstream commit 0e62f543bed03a64495bd2651d4fe1aa4bcb7fe5 ] When both the switch and the bridge are learning about new addresses, switch ports attached to the bridge would see duplicate ARP frames because both entities would attempt to send them. Fixes: 5037d532b83d ("net: dsa: add Broadcom tag RX/TX handler") Reported-by: Maxime Bizon Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/tag_brcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index de92fc1fc3be..b3b918afd212 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -134,6 +134,8 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; + skb->offload_fwd_mark = 1; + return skb; } -- GitLab From 7645f4037d1ae561842cce0df120d6988c78205c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 16 Mar 2020 22:56:36 +0800 Subject: [PATCH 0625/1278] net: mvneta: Fix the case where the last poll did not process all rx [ Upstream commit 065fd83e1be2e1ba0d446a257fd86a3cc7bddb51 ] For the case where the last mvneta_poll did not process all RX packets, we need to xor the pp->cause_rx_tx or port->cause_rx_tx before claculating the rx_queue. Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU") Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index a0c64b30f81a..a115e51dc211 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2759,11 +2759,10 @@ static int mvneta_poll(struct napi_struct *napi, int budget) /* For the case where the last mvneta_poll did not process all * RX packets */ - rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); - cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx : port->cause_rx_tx; + rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); if (rx_queue) { rx_queue = rx_queue - 1; if (pp->bm_priv) -- GitLab From 2975472e042e0bbfeeabddc5023cb8c011ec5a07 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 13 Mar 2020 12:18:09 -0400 Subject: [PATCH 0626/1278] net/packet: tpacket_rcv: avoid a producer race condition [ Upstream commit 61fad6816fc10fb8793a925d5c1256d1c3db0cd2 ] PACKET_RX_RING can cause multiple writers to access the same slot if a fast writer wraps the ring while a slow writer is still copying. This is particularly likely with few, large, slots (e.g., GSO packets). Synchronize kernel thread ownership of rx ring slots with a bitmap. Writers acquire a slot race-free by testing tp_status TP_STATUS_KERNEL while holding the sk receive queue lock. They release this lock before copying and set tp_status to TP_STATUS_USER to release to userspace when done. During copying, another writer may take the lock, also see TP_STATUS_KERNEL, and start writing to the same slot. Introduce a new rx_owner_map bitmap with a bit per slot. To acquire a slot, test and set with the lock held. To release race-free, update tp_status and owner bit as a transaction, so take the lock again. This is the one of a variety of discussed options (see Link below): * instead of a shadow ring, embed the data in the slot itself, such as in tp_padding. But any test for this field may match a value left by userspace, causing deadlock. * avoid the lock on release. This leaves a small race if releasing the shadow slot before setting TP_STATUS_USER. The below reproducer showed that this race is not academic. If releasing the slot after tp_status, the race is more subtle. See the first link for details. * add a new tp_status TP_KERNEL_OWNED to avoid the transactional store of two fields. But, legacy applications may interpret all non-zero tp_status as owned by the user. As libpcap does. So this is possible only opt-in by newer processes. It can be added as an optional mode. * embed the struct at the tail of pg_vec to avoid extra allocation. The implementation proved no less complex than a separate field. The additional locking cost on release adds contention, no different than scaling on multicore or multiqueue h/w. In practice, below reproducer nor small packet tcpdump showed a noticeable change in perf report in cycles spent in spinlock. Where contention is problematic, packet sockets support mitigation through PACKET_FANOUT. And we can consider adding opt-in state TP_KERNEL_OWNED. Easy to reproduce by running multiple netperf or similar TCP_STREAM flows concurrently with `tcpdump -B 129 -n greater 60000`. Based on an earlier patchset by Jon Rosen. See links below. I believe this issue goes back to the introduction of tpacket_rcv, which predates git history. Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg237222.html Suggested-by: Jon Rosen Signed-off-by: Willem de Bruijn Signed-off-by: Jon Rosen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 21 +++++++++++++++++++++ net/packet/internal.h | 5 ++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 387589a4a340..102750bd751c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2204,6 +2204,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; + unsigned int slot_id = 0; bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. @@ -2300,6 +2301,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (!h.raw) goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { + slot_id = po->rx_ring.head; + if (test_bit(slot_id, po->rx_ring.rx_owner_map)) + goto drop_n_account; + __set_bit(slot_id, po->rx_ring.rx_owner_map); + } + if (do_vnet && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), @@ -2405,7 +2413,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, #endif if (po->tp_version <= TPACKET_V2) { + spin_lock(&sk->sk_receive_queue.lock); __packet_set_status(po, h.raw, status); + __clear_bit(slot_id, po->rx_ring.rx_owner_map); + spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); } else { prb_clear_blk_fill_status(&po->rx_ring); @@ -4298,6 +4309,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); + unsigned long *rx_owner_map = NULL; int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; @@ -4383,6 +4395,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } break; default: + if (!tx_ring) { + rx_owner_map = bitmap_alloc(req->tp_frame_nr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!rx_owner_map) + goto out_free_pg_vec; + } break; } } @@ -4412,6 +4430,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); + if (po->tp_version <= TPACKET_V2) + swap(rb->rx_owner_map, rx_owner_map); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; @@ -4443,6 +4463,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: + bitmap_free(rx_owner_map); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: diff --git a/net/packet/internal.h b/net/packet/internal.h index c70a2794456f..f10294800aaf 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -70,7 +70,10 @@ struct packet_ring_buffer { unsigned int __percpu *pending_refcnt; - struct tpacket_kbdq_core prb_bdqc; + union { + unsigned long *rx_owner_map; + struct tpacket_kbdq_core prb_bdqc; + }; }; extern struct mutex fanout_mutex; -- GitLab From ee86fd16c8c6c4336c744c141f82b19a9768ebd7 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 20 Mar 2020 21:46:14 +0100 Subject: [PATCH 0627/1278] net: qmi_wwan: add support for ASKEY WWHC050 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 12a5ba5a1994568d4ceaff9e78c6b0329d953386 ] ASKEY WWHC050 is a mcie LTE modem. The oem configuration states: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1690 ProdID=7588 Rev=ff.ff S: Manufacturer=Android S: Product=Android S: SerialNumber=813f0eef6e6e C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Tested on openwrt distribution. Signed-off-by: Cezary Jackiewicz Signed-off-by: Pawel Dembicki Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a8d5561afc7d..e028e03765a5 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1139,6 +1139,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ + {QMI_FIXED_INTF(0x1690, 0x7588, 4)}, /* ASKEY WWHC050 */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ -- GitLab From f0c92f59cf528bc1b872f2ca91b01e128a2af3e6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Mar 2020 22:29:54 -0700 Subject: [PATCH 0628/1278] net_sched: cls_route: remove the right filter from hashtable [ Upstream commit ef299cc3fa1a9e1288665a9fdc8bff55629fd359 ] route4_change() allocates a new filter and copies values from the old one. After the new filter is inserted into the hash table, the old filter should be removed and freed, as the final step of the update. However, the current code mistakenly removes the new one. This looks apparently wrong to me, and it causes double "free" and use-after-free too, as reported by syzbot. Reported-and-tested-by: syzbot+f9b32aaacd60305d9687@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+2f8c233f131943d6056d@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+9c2df9fd5e9445b74e01@syzkaller.appspotmail.com Fixes: 1109c00547fc ("net: sched: RCU cls_route") Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: John Fastabend Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index ac9a5b8825b9..4f133faa9e60 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -539,8 +539,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, fp = &b->ht[h]; for (pfp = rtnl_dereference(*fp); pfp; fp = &pfp->next, pfp = rtnl_dereference(*fp)) { - if (pfp == f) { - *fp = f->next; + if (pfp == fold) { + rcu_assign_pointer(*fp, fold->next); break; } } -- GitLab From 9f8b6c44be178c2498a00b270872a6e30e7c8266 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 11 Mar 2020 22:42:28 -0700 Subject: [PATCH 0629/1278] net_sched: keep alloc_hash updated after hash allocation [ Upstream commit 0d1c3530e1bd38382edef72591b78e877e0edcd3 ] In commit 599be01ee567 ("net_sched: fix an OOB access in cls_tcindex") I moved cp->hash calculation before the first tcindex_alloc_perfect_hash(), but cp->alloc_hash is left untouched. This difference could lead to another out of bound access. cp->alloc_hash should always be the size allocated, we should update it after this tcindex_alloc_perfect_hash(). Reported-and-tested-by: syzbot+dcc34d54d68ef7d2d53d@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+c72da7b9ed57cde6fca2@syzkaller.appspotmail.com Fixes: 599be01ee567 ("net_sched: fix an OOB access in cls_tcindex") Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 796b4e1beb12..c2d2c054a4e4 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -375,6 +375,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tcindex_alloc_perfect_hash(cp) < 0) goto errout; + cp->alloc_hash = cp->hash; for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; -- GitLab From acfd29fa0fe247d47837eabd7e801522d6177917 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sat, 21 Mar 2020 15:36:19 +0100 Subject: [PATCH 0630/1278] net: stmmac: dwmac-rk: fix error path in rk_gmac_probe [ Upstream commit 9de9aa487daff7a5c73434c24269b44ed6a428e6 ] Make sure we clean up devicetree related configuration also when clock init fails. Fixes: fecd4d7eef8b ("net: stmmac: dwmac-rk: Add integrated PHY support") Signed-off-by: Emil Renner Berthing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 712b5eb3507a..4156cf007b53 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1362,7 +1362,7 @@ static int rk_gmac_probe(struct platform_device *pdev) ret = rk_gmac_clk_init(plat_dat); if (ret) - return ret; + goto err_remove_config_dt; ret = rk_gmac_powerup(plat_dat->bsp_priv); if (ret) -- GitLab From dc58386288a4270a6fe996707278f81489e20b5b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Mar 2020 16:21:17 +0300 Subject: [PATCH 0631/1278] NFC: fdp: Fix a signedness bug in fdp_nci_send_patch() [ Upstream commit 0dcdf9f64028ec3b75db6b691560f8286f3898bf ] The nci_conn_max_data_pkt_payload_size() function sometimes returns -EPROTO so "max_size" needs to be signed for the error handling to work. We can make "payload_size" an int as well. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/fdp/fdp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index ec50027b0d8b..3195bae1685f 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -192,7 +192,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) const struct firmware *fw; struct sk_buff *skb; unsigned long len; - u8 max_size, payload_size; + int max_size, payload_size; int rc = 0; if ((type == NCI_PATCH_TYPE_OTP && !info->otp_patch) || @@ -215,8 +215,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) while (len) { - payload_size = min_t(unsigned long, (unsigned long) max_size, - len); + payload_size = min_t(unsigned long, max_size, len); skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + payload_size), GFP_KERNEL); -- GitLab From 795536e997419cc81c6c4a08e49d7cac7141ca9d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 21 Mar 2020 14:08:29 +0100 Subject: [PATCH 0632/1278] slcan: not call free_netdev before rtnl_unlock in slcan_open [ Upstream commit 2091a3d42b4f339eaeed11228e0cbe9d4f92f558 ] As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code. This patch is a 1:1 copy of upstream slip.c commit f596c87005f7 ("slip: not call free_netdev before rtnl_unlock in slip_open"). Reported-by: yangerkun Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/slcan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 35564a9561b7..7c37b96ff22a 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -621,7 +621,10 @@ static int slcan_open(struct tty_struct *tty) tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); slc_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); -- GitLab From f9c47737687ad4270ea5c38113b8c9bcd077601d Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 22 Mar 2020 16:40:02 -0400 Subject: [PATCH 0633/1278] bnxt_en: fix memory leaks in bnxt_dcbnl_ieee_getets() [ Upstream commit 62d4073e86e62e316bea2c53e77db10418fd5dd7 ] The allocated ieee_ets structure goes out of scope without being freed, leaking memory. Appropriate result codes should be returned so that callers do not rely on invalid data passed by reference. Also cache the ETS config retrieved from the device so that it doesn't need to be freed. The balance of the code was clearly written with the intent of having the results of querying the hardware cached in the device structure. The commensurate store was evidently missed though. Fixes: 7df4ae9fe855 ("bnxt_en: Implement DCBNL to support host-based DCBX.") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index fed37cd9ae1d..125e22ffe2ae 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -387,24 +387,26 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { struct bnxt *bp = netdev_priv(dev); struct ieee_ets *my_ets = bp->ieee_ets; + int rc; ets->ets_cap = bp->max_tc; if (!my_ets) { - int rc; - if (bp->dcbx_cap & DCB_CAP_DCBX_HOST) return 0; my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL); if (!my_ets) - return 0; + return -ENOMEM; rc = bnxt_hwrm_queue_cos2bw_qcfg(bp, my_ets); if (rc) - return 0; + goto error; rc = bnxt_hwrm_queue_pri2cos_qcfg(bp, my_ets); if (rc) - return 0; + goto error; + + /* cache result */ + bp->ieee_ets = my_ets; } ets->cbs = my_ets->cbs; @@ -413,6 +415,9 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa)); memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc)); return 0; +error: + kfree(my_ets); + return rc; } static int bnxt_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets) -- GitLab From bee69eac4713b94126b7f9555acf64767c6f6852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20van=20Dorst?= Date: Thu, 19 Mar 2020 14:47:56 +0100 Subject: [PATCH 0634/1278] net: dsa: mt7530: Change the LINK bit to reflect the link status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 22259471b51925353bd7b16f864c79fdd76e425e ] Andrew reported: After a number of network port link up/down changes, sometimes the switch port gets stuck in a state where it thinks it is still transmitting packets but the cpu port is not actually transmitting anymore. In this state you will see a message on the console "mtk_soc_eth 1e100000.ethernet eth0: transmit timed out" and the Tx counter in ifconfig will be incrementing on virtual port, but not incrementing on cpu port. The issue is that MAC TX/RX status has no impact on the link status or queue manager of the switch. So the queue manager just queues up packets of a disabled port and sends out pause frames when the queue is full. Change the LINK bit to reflect the link status. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Reported-by: Andrew Smith Signed-off-by: René van Dorst Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 3b073e152237..58c16aa00a70 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -549,7 +549,7 @@ mt7530_mib_reset(struct dsa_switch *ds) static void mt7530_port_set_status(struct mt7530_priv *priv, int port, int enable) { - u32 mask = PMCR_TX_EN | PMCR_RX_EN; + u32 mask = PMCR_TX_EN | PMCR_RX_EN | PMCR_FORCE_LNK; if (enable) mt7530_set(priv, MT7530_PMCR_P(port), mask); -- GitLab From b200a210a6509af627f5e88e60a241a8867048e2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 18 Mar 2020 13:28:09 +0000 Subject: [PATCH 0635/1278] vxlan: check return value of gro_cells_init() [ Upstream commit 384d91c267e621e0926062cfb3f20cb72dc16928 ] gro_cells_init() returns error if memory allocation is failed. But the vxlan module doesn't check the return value of gro_cells_init(). Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer")` Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 4d97a7b5fe3c..927d62c76a60 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2454,10 +2454,19 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan, /* Setup stats when device is created */ static int vxlan_init(struct net_device *dev) { + struct vxlan_dev *vxlan = netdev_priv(dev); + int err; + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + err = gro_cells_init(&vxlan->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; } @@ -2717,8 +2726,6 @@ static void vxlan_setup(struct net_device *dev) vxlan->dev = dev; - gro_cells_init(&vxlan->gro_cells, dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vxlan->fdb_head[h]); } -- GitLab From 16da9c5d0e1887cff73a1877d75e154f08043208 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 13 Mar 2020 06:50:14 +0000 Subject: [PATCH 0636/1278] hsr: use rcu_read_lock() in hsr_get_node_{list/status}() [ Upstream commit 173756b86803655d70af7732079b3aa935e6ab68 ] hsr_get_node_{list/status}() are not under rtnl_lock() because they are callback functions of generic netlink. But they use __dev_get_by_index() without rtnl_lock(). So, it would use unsafe data. In order to fix it, rcu_read_lock() and dev_get_by_index_rcu() are used instead of __dev_get_by_index(). Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_framereg.c | 10 ++-------- net/hsr/hsr_netlink.c | 43 +++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 6705420b3111..d7206581145d 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -468,13 +468,9 @@ int hsr_get_node_data(struct hsr_priv *hsr, struct hsr_port *port; unsigned long tdiff; - - rcu_read_lock(); node = find_node_by_AddrA(&hsr->node_db, addr); - if (!node) { - rcu_read_unlock(); - return -ENOENT; /* No such entry */ - } + if (!node) + return -ENOENT; ether_addr_copy(addr_b, node->MacAddressB); @@ -509,7 +505,5 @@ int hsr_get_node_data(struct hsr_priv *hsr, *addr_b_ifindex = -1; } - rcu_read_unlock(); - return 0; } diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b9cce0fd5696..f7f96b2050a5 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -259,17 +259,16 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) if (!na) goto invalid; - hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + rcu_read_lock(); + hsr_dev = dev_get_by_index_rcu(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) - goto invalid; + goto rcu_unlock; if (!is_hsr_master(hsr_dev)) - goto invalid; - + goto rcu_unlock; /* Send reply */ - - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -321,12 +320,10 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); if (res < 0) goto nla_put_failure; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (port) res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, port->dev->ifindex); - rcu_read_unlock(); if (res < 0) goto nla_put_failure; @@ -336,20 +333,22 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); if (res < 0) goto nla_put_failure; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); if (port) res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, port->dev->ifindex); - rcu_read_unlock(); if (res < 0) goto nla_put_failure; + rcu_read_unlock(); + genlmsg_end(skb_out, msg_head); genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); return 0; +rcu_unlock: + rcu_read_unlock(); invalid: netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; @@ -359,6 +358,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) /* Fall through */ fail: + rcu_read_unlock(); return res; } @@ -385,17 +385,16 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (!na) goto invalid; - hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + rcu_read_lock(); + hsr_dev = dev_get_by_index_rcu(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) - goto invalid; + goto rcu_unlock; if (!is_hsr_master(hsr_dev)) - goto invalid; - + goto rcu_unlock; /* Send reply */ - - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -415,14 +414,11 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) hsr = netdev_priv(hsr_dev); - rcu_read_lock(); pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); - if (res < 0) { - rcu_read_unlock(); + if (res < 0) goto nla_put_failure; - } pos = hsr_get_next_node(hsr, pos, addr); } rcu_read_unlock(); @@ -432,6 +428,8 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; +rcu_unlock: + rcu_read_unlock(); invalid: netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; @@ -441,6 +439,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) /* Fall through */ fail: + rcu_read_unlock(); return res; } -- GitLab From 718742a0efd85417076321b7ed73b08fabdf46a6 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 13 Mar 2020 06:50:24 +0000 Subject: [PATCH 0637/1278] hsr: add restart routine into hsr_get_node_list() [ Upstream commit ca19c70f5225771c05bcdcb832b4eb84d7271c5e ] The hsr_get_node_list() is to send node addresses to the userspace. If there are so many nodes, it could fail because of buffer size. In order to avoid this failure, the restart routine is added. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_netlink.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index f7f96b2050a5..3f2fe3d50328 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -366,16 +366,14 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) */ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) { - /* For receiving */ - struct nlattr *na; + unsigned char addr[ETH_ALEN]; struct net_device *hsr_dev; - - /* For sending */ struct sk_buff *skb_out; - void *msg_head; struct hsr_priv *hsr; - void *pos; - unsigned char addr[ETH_ALEN]; + bool restart = false; + struct nlattr *na; + void *pos = NULL; + void *msg_head; int res; if (!info) @@ -393,8 +391,9 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (!is_hsr_master(hsr_dev)) goto rcu_unlock; +restart: /* Send reply */ - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb_out = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -408,17 +407,28 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) goto nla_put_failure; } - res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); - if (res < 0) - goto nla_put_failure; + if (!restart) { + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + } hsr = netdev_priv(hsr_dev); - pos = hsr_get_next_node(hsr, NULL, addr); + if (!pos) + pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); - if (res < 0) + if (res < 0) { + if (res == -EMSGSIZE) { + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, + info->snd_portid); + restart = true; + goto restart; + } goto nla_put_failure; + } pos = hsr_get_next_node(hsr, pos, addr); } rcu_read_unlock(); @@ -435,7 +445,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; nla_put_failure: - kfree_skb(skb_out); + nlmsg_free(skb_out); /* Fall through */ fail: -- GitLab From 4ba7450cf490f5852632a8ebda61233bcf4e8191 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 13 Mar 2020 06:50:33 +0000 Subject: [PATCH 0638/1278] hsr: set .netnsok flag [ Upstream commit 09e91dbea0aa32be02d8877bd50490813de56b9a ] The hsr module has been supporting the list and status command. (HSR_C_GET_NODE_LIST and HSR_C_GET_NODE_STATUS) These commands send node information to the user-space via generic netlink. But, in the non-init_net namespace, these commands are not allowed because .netnsok flag is false. So, there is no way to get node information in the non-init_net namespace. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 3f2fe3d50328..37708dabebd1 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -476,6 +476,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .name = "HSR", .version = 1, .maxattr = HSR_A_MAX, + .netnsok = true, .module = THIS_MODULE, .ops = hsr_ops, .n_ops = ARRAY_SIZE(hsr_ops), -- GitLab From 956b657b5774eb87fff6a4996f651b6ad93afe0a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 9 Oct 2018 17:48:15 +0200 Subject: [PATCH 0639/1278] net: ipv4: don't let PMTU updates increase route MTU [ Upstream commit 28d35bcdd3925e7293408cdb8aa5f2aac5f0d6e3 ] When an MTU update with PMTU smaller than net.ipv4.route.min_pmtu is received, we must clamp its value. However, we can receive a PMTU exception with PMTU < old_mtu < ip_rt_min_pmtu, which would lead to an increase in PMTU. To fix this, take the smallest of the old MTU and ip_rt_min_pmtu. Before this patch, in case of an update, the exception's MTU would always change. Now, an exception can have only its lock flag updated, but not the MTU, so we need to add a check on locking to the following "is this exception getting updated, or close to expiring?" test. Fixes: d52e5a7e7ca4 ("ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/route.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8b855d3eec9e..05fe1d007544 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1014,21 +1014,22 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; -- GitLab From e244073116c7681accc86b32b10ad47036aac4e2 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 30 Jan 2020 13:34:49 +0300 Subject: [PATCH 0640/1278] cgroup-v1: cgroup_pidlist_next should update position index [ Upstream commit db8dd9697238be70a6b4f9d0284cd89f59c0e070 ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. # mount | grep cgroup # dd if=/mnt/cgroup.procs bs=1 # normal output ... 1294 1295 1296 1304 1382 584+0 records in 584+0 records out 584 bytes copied dd: /mnt/cgroup.procs: cannot skip to specified offset 83 <<< generates end of last line 1383 <<< ... and whole last line once again 0+1 records in 0+1 records out 8 bytes copied dd: /mnt/cgroup.procs: cannot skip to specified offset 1386 <<< generates last line anyway 0+1 records in 0+1 records out 5 bytes copied https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cgroup-v1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index a2c05d2476ac..d14896518089 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -501,6 +501,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) */ p++; if (p >= end) { + (*pos)++; return NULL; } else { *pos = *p; -- GitLab From 2c1f4d27781351a85333c267c9a06f41ba526921 Mon Sep 17 00:00:00 2001 From: Mike Gilbert Date: Wed, 26 Feb 2020 14:33:59 -0500 Subject: [PATCH 0641/1278] cpupower: avoid multiple definition with gcc -fno-common [ Upstream commit 2de7fb60a4740135e03cf55c1982e393ccb87b6b ] Building cpupower with -fno-common in CFLAGS results in errors due to multiple definitions of the 'cpu_count' and 'start_time' variables. ./utils/idle_monitor/snb_idle.o:./utils/idle_monitor/cpupower-monitor.h:28: multiple definition of `cpu_count'; ./utils/idle_monitor/nhm_idle.o:./utils/idle_monitor/cpupower-monitor.h:28: first defined here ... ./utils/idle_monitor/cpuidle_sysfs.o:./utils/idle_monitor/cpuidle_sysfs.c:22: multiple definition of `start_time'; ./utils/idle_monitor/amd_fam14h_idle.o:./utils/idle_monitor/amd_fam14h_idle.c:85: first defined here The -fno-common option will be enabled by default in GCC 10. Bug: https://bugs.gentoo.org/707462 Signed-off-by: Mike Gilbert Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 2 ++ tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 2116df9ad832..c097a3748674 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -83,7 +83,7 @@ static struct pci_access *pci_acc; static struct pci_dev *amd_fam14h_pci_dev; static int nbp1_entered; -struct timespec start_time; +static struct timespec start_time; static unsigned long long timediff; #ifdef DEBUG diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 5b3205f16217..5277df27191f 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -21,7 +21,7 @@ struct cpuidle_monitor cpuidle_sysfs_monitor; static unsigned long long **previous_count; static unsigned long long **current_count; -struct timespec start_time; +static struct timespec start_time; static unsigned long long timediff; static int cpuidle_get_count_percent(unsigned int id, double *percent, diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 05f953f0f0a0..80a21cb67d94 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -29,6 +29,8 @@ struct cpuidle_monitor *all_monitors[] = { 0 }; +int cpu_count; + static struct cpuidle_monitor *monitors[MONITORS_MAX]; static unsigned int avail_monitors; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9e43f3371fbc..3558bbae2b5d 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -18,7 +18,7 @@ #define CSTATE_NAME_LEN 5 #define CSTATE_DESC_LEN 60 -int cpu_count; +extern int cpu_count; /* Hard to define the right names ...: */ enum power_range_e { -- GitLab From 06bd8b3078947d5d9ba9f0ccee8335a62f59b414 Mon Sep 17 00:00:00 2001 From: Dajun Jin Date: Mon, 2 Mar 2020 20:24:21 -0800 Subject: [PATCH 0642/1278] drivers/of/of_mdio.c:fix of_mdiobus_register() [ Upstream commit 209c65b61d94344522c41a83cd6ce51aac5fd0a4 ] When registers a phy_device successful, should terminate the loop or the phy_device would be registered in other addr. If there are multiple PHYs without reg properties, it will go wrong. Signed-off-by: Dajun Jin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/of/of_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index fe26697d3bd7..69da2f6896da 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -259,6 +259,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) rc = of_mdiobus_register_phy(mdio, child, addr); if (rc && rc != -ENODEV) goto unregister; + break; } } } -- GitLab From ef76f1e9da864b3b39762d7f46e6d7a5cddae4c9 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 19 Feb 2020 12:01:29 -0700 Subject: [PATCH 0643/1278] cgroup1: don't call release_agent when it is "" [ Upstream commit 2e5383d7904e60529136727e49629a82058a5607 ] Older (and maybe current) versions of systemd set release_agent to "" when shutting down, but do not set notify_on_release to 0. Since 64e90a8acb85 ("Introduce STATIC_USERMODEHELPER to mediate call_usermodehelper()"), we filter out such calls when the user mode helper path is "". However, when used in conjunction with an actual (i.e. non "") STATIC_USERMODEHELPER, the path is never "", so the real usermode helper will be called with argv[0] == "". Let's avoid this by not invoking the release_agent when it is "". Signed-off-by: Tycho Andersen Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cgroup-v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index d14896518089..545f29c5268d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -824,7 +824,7 @@ void cgroup1_release_agent(struct work_struct *work) pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!pathbuf || !agentbuf) + if (!pathbuf || !agentbuf || !strlen(agentbuf)) goto out; spin_lock_irq(&css_set_lock); -- GitLab From d2f8bfe3e4dc854367c5d508d7f42d9164601ae4 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 4 Mar 2020 18:04:25 +0200 Subject: [PATCH 0644/1278] dt-bindings: net: FMan erratum A050385 [ Upstream commit 26d5bb9e4c4b541c475751e015072eb2cbf70d15 ] FMAN DMA read or writes under heavy traffic load may cause FMAN internal resource leak; thus stopping further packet processing. The FMAN internal queue can overflow when FMAN splits single read or write transactions into multiple smaller transactions such that more than 17 AXI transactions are in flight from FMAN to interconnect. When the FMAN internal queue overflows, it can stall further packet processing. The issue can occur with any one of the following three conditions: 1. FMAN AXI transaction crosses 4K address boundary (Errata A010022) 2. FMAN DMA address for an AXI transaction is not 16 byte aligned, i.e. the last 4 bits of an address are non-zero 3. Scatter Gather (SG) frames have more than one SG buffer in the SG list and any one of the buffers, except the last buffer in the SG list has data size that is not a multiple of 16 bytes, i.e., other than 16, 32, 48, 64, etc. With any one of the above three conditions present, there is likelihood of stalled FMAN packet processing, especially under stress with multiple ports injecting line-rate traffic. To avoid situations that stall FMAN packet processing, all of the above three conditions must be avoided; therefore, configure the system with the following rules: 1. Frame buffers must not span a 4KB address boundary, unless the frame start address is 256 byte aligned 2. All FMAN DMA start addresses (for example, BMAN buffer address, FD[address] + FD[offset]) are 16B aligned 3. SG table and buffer addresses are 16B aligned and the size of SG buffers are multiple of 16 bytes, except for the last SG buffer that can be of any size. Additional workaround notes: - Address alignment of 64 bytes is recommended for maximally efficient system bus transactions (although 16 byte alignment is sufficient to avoid the stall condition) - To support frame sizes that are larger than 4K bytes, there are two options: 1. Large single buffer frames that span a 4KB page boundary can be converted into SG frames to avoid transaction splits at the 4KB boundary, 2. Align the large single buffer to 256B address boundaries, ensure that the frame address plus offset is 256B aligned. - If software generated SG frames have buffers that are unaligned and with random non-multiple of 16 byte lengths, before transmitting such frames via FMAN, frames will need to be copied into a new single buffer or multiple buffer SG frame that is compliant with the three rules listed above. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/net/fsl-fman.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index df873d1f3b7c..2aaae210317b 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -110,6 +110,13 @@ PROPERTIES Usage: required Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt +- fsl,erratum-a050385 + Usage: optional + Value type: boolean + Definition: A boolean property. Indicates the presence of the + erratum A050385 which indicates that DMA transactions that are + split can result in a FMan lock. + ============================================================================= FMan MURAM Node -- GitLab From 99ce027ad999a9fc9ac6c9bc0251733daf8228be Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 4 Mar 2020 18:04:26 +0200 Subject: [PATCH 0645/1278] arm64: dts: ls1043a: FMan erratum A050385 [ Upstream commit b54d3900862374e1bb2846e6b39d79c896c0b200 ] The LS1043A SoC is affected by the A050385 erratum stating that FMAN DMA read or writes under heavy traffic load may cause FMAN internal resource leak thus stopping further packet processing. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi index 169e171407a6..acd205ef329f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi @@ -21,6 +21,8 @@ }; &fman0 { + fsl,erratum-a050385; + /* these aliases provide the FMan ports mapping */ enet0: ethernet@e0000 { }; -- GitLab From cdbdda398ad19e5d9b497cc13b20ada53cf8df41 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 4 Mar 2020 18:04:27 +0200 Subject: [PATCH 0646/1278] fsl/fman: detect FMan erratum A050385 [ Upstream commit b281f7b93b258ce1419043bbd898a29254d5c9c7 ] Detect the presence of the A050385 erratum. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/Kconfig | 28 +++++++++++++++++++++ drivers/net/ethernet/freescale/fman/fman.c | 18 +++++++++++++ drivers/net/ethernet/freescale/fman/fman.h | 5 ++++ 3 files changed, 51 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 8870a9a798ca..91437b94bfcb 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -8,3 +8,31 @@ config FSL_FMAN help Freescale Data-Path Acceleration Architecture Frame Manager (FMan) support + +config DPAA_ERRATUM_A050385 + bool + depends on ARM64 && FSL_DPAA + default y + help + DPAA FMan erratum A050385 software workaround implementation: + align buffers, data start, SG fragment length to avoid FMan DMA + splits. + FMAN DMA read or writes under heavy traffic load may cause FMAN + internal resource leak thus stopping further packet processing. + The FMAN internal queue can overflow when FMAN splits single + read or write transactions into multiple smaller transactions + such that more than 17 AXI transactions are in flight from FMAN + to interconnect. When the FMAN internal queue overflows, it can + stall further packet processing. The issue can occur with any + one of the following three conditions: + 1. FMAN AXI transaction crosses 4K address boundary (Errata + A010022) + 2. FMAN DMA address for an AXI transaction is not 16 byte + aligned, i.e. the last 4 bits of an address are non-zero + 3. Scatter Gather (SG) frames have more than one SG buffer in + the SG list and any one of the buffers, except the last + buffer in the SG list has data size that is not a multiple + of 16 bytes, i.e., other than 16, 32, 48, 64, etc. + With any one of the above three conditions present, there is + likelihood of stalled FMAN packet processing, especially under + stress with multiple ports injecting line-rate traffic. diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 97425d94e280..9080d2332d03 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -566,6 +567,10 @@ struct fman_cfg { u32 qmi_def_tnums_thresh; }; +#ifdef CONFIG_DPAA_ERRATUM_A050385 +static bool fman_has_err_a050385; +#endif + static irqreturn_t fman_exceptions(struct fman *fman, enum fman_exceptions exception) { @@ -2517,6 +2522,14 @@ struct fman *fman_bind(struct device *fm_dev) } EXPORT_SYMBOL(fman_bind); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void) +{ + return fman_has_err_a050385; +} +EXPORT_SYMBOL(fman_has_errata_a050385); +#endif + static irqreturn_t fman_err_irq(int irq, void *handle) { struct fman *fman = (struct fman *)handle; @@ -2843,6 +2856,11 @@ static struct fman *read_dts_node(struct platform_device *of_dev) goto fman_free; } +#ifdef CONFIG_DPAA_ERRATUM_A050385 + fman_has_err_a050385 = + of_property_read_bool(fm_node, "fsl,erratum-a050385"); +#endif + return fman; fman_node_put: diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h index bfa02e0014ae..693401994fa2 100644 --- a/drivers/net/ethernet/freescale/fman/fman.h +++ b/drivers/net/ethernet/freescale/fman/fman.h @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -397,6 +398,10 @@ u16 fman_get_max_frm(void); int fman_get_rx_extra_headroom(void); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void); +#endif + struct fman *fman_bind(struct device *dev); #endif /* __FM_H */ -- GitLab From 4b7eb7a4693dd93bf5db8714da7410c6423324d3 Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Fri, 6 Mar 2020 09:57:28 -0600 Subject: [PATCH 0647/1278] scsi: ipr: Fix softlockup when rescanning devices in petitboot [ Upstream commit 394b61711f3ce33f75bf70a3e22938464a13b3ee ] When trying to rescan disks in petitboot shell, we hit the following softlockup stacktrace: Kernel panic - not syncing: System is deadlocked on memory [ 241.223394] CPU: 32 PID: 693 Comm: sh Not tainted 5.4.16-openpower1 #1 [ 241.223406] Call Trace: [ 241.223415] [c0000003f07c3180] [c000000000493fc4] dump_stack+0xa4/0xd8 (unreliable) [ 241.223432] [c0000003f07c31c0] [c00000000007d4ac] panic+0x148/0x3cc [ 241.223446] [c0000003f07c3260] [c000000000114b10] out_of_memory+0x468/0x4c4 [ 241.223461] [c0000003f07c3300] [c0000000001472b0] __alloc_pages_slowpath+0x594/0x6d8 [ 241.223476] [c0000003f07c3420] [c00000000014757c] __alloc_pages_nodemask+0x188/0x1a4 [ 241.223492] [c0000003f07c34a0] [c000000000153e10] alloc_pages_current+0xcc/0xd8 [ 241.223508] [c0000003f07c34e0] [c0000000001577ac] alloc_slab_page+0x30/0x98 [ 241.223524] [c0000003f07c3520] [c0000000001597fc] new_slab+0x138/0x40c [ 241.223538] [c0000003f07c35f0] [c00000000015b204] ___slab_alloc+0x1e4/0x404 [ 241.223552] [c0000003f07c36c0] [c00000000015b450] __slab_alloc+0x2c/0x48 [ 241.223566] [c0000003f07c36f0] [c00000000015b754] kmem_cache_alloc_node+0x9c/0x1b4 [ 241.223582] [c0000003f07c3760] [c000000000218c48] blk_alloc_queue_node+0x34/0x270 [ 241.223599] [c0000003f07c37b0] [c000000000226574] blk_mq_init_queue+0x2c/0x78 [ 241.223615] [c0000003f07c37e0] [c0000000002ff710] scsi_mq_alloc_queue+0x28/0x70 [ 241.223631] [c0000003f07c3810] [c0000000003005b8] scsi_alloc_sdev+0x184/0x264 [ 241.223647] [c0000003f07c38a0] [c000000000300ba0] scsi_probe_and_add_lun+0x288/0xa3c [ 241.223663] [c0000003f07c3a00] [c000000000301768] __scsi_scan_target+0xcc/0x478 [ 241.223679] [c0000003f07c3b20] [c000000000301c64] scsi_scan_channel.part.9+0x74/0x7c [ 241.223696] [c0000003f07c3b70] [c000000000301df4] scsi_scan_host_selected+0xe0/0x158 [ 241.223712] [c0000003f07c3bd0] [c000000000303f04] store_scan+0x104/0x114 [ 241.223727] [c0000003f07c3cb0] [c0000000002d5ac4] dev_attr_store+0x30/0x4c [ 241.223741] [c0000003f07c3cd0] [c0000000001dbc34] sysfs_kf_write+0x64/0x78 [ 241.223756] [c0000003f07c3cf0] [c0000000001da858] kernfs_fop_write+0x170/0x1b8 [ 241.223773] [c0000003f07c3d40] [c0000000001621fc] __vfs_write+0x34/0x60 [ 241.223787] [c0000003f07c3d60] [c000000000163c2c] vfs_write+0xa8/0xcc [ 241.223802] [c0000003f07c3db0] [c000000000163df4] ksys_write+0x70/0xbc [ 241.223816] [c0000003f07c3e20] [c00000000000b40c] system_call+0x5c/0x68 As a part of the scan process Linux will allocate and configure a scsi_device for each target to be scanned. If the device is not present, then the scsi_device is torn down. As a part of scsi_device teardown a workqueue item will be scheduled and the lockups we see are because there are 250k workqueue items to be processed. Accoding to the specification of SIS-64 sas controller, max_channel should be decreased on SIS-64 adapters to 4. The patch fixes softlockup issue. Thanks for Oliver Halloran's help with debugging and explanation! Link: https://lore.kernel.org/r/1583510248-23672-1-git-send-email-wenxiong@linux.vnet.ibm.com Signed-off-by: Wen Xiong Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ipr.c | 3 ++- drivers/scsi/ipr.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 35d54ee1c5c7..b172f0a02083 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9962,6 +9962,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->max_devs_supported = ipr_max_devs; if (ioa_cfg->sis64) { + host->max_channel = IPR_MAX_SIS64_BUSES; host->max_id = IPR_MAX_SIS64_TARGETS_PER_BUS; host->max_lun = IPR_MAX_SIS64_LUNS_PER_TARGET; if (ipr_max_devs > IPR_MAX_SIS64_DEVS) @@ -9970,6 +9971,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, + ((sizeof(struct ipr_config_table_entry64) * ioa_cfg->max_devs_supported))); } else { + host->max_channel = IPR_VSET_BUS; host->max_id = IPR_MAX_NUM_TARGETS_PER_BUS; host->max_lun = IPR_MAX_NUM_LUNS_PER_TARGET; if (ipr_max_devs > IPR_MAX_PHYSICAL_DEVS) @@ -9979,7 +9981,6 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, * ioa_cfg->max_devs_supported))); } - host->max_channel = IPR_VSET_BUS; host->unique_id = host->host_no; host->max_cmd_len = IPR_MAX_CDB_LEN; host->can_queue = ioa_cfg->max_cmds; diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 085e6c90f9e6..89b36987ff30 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1306,6 +1306,7 @@ struct ipr_resource_entry { #define IPR_ARRAY_VIRTUAL_BUS 0x1 #define IPR_VSET_VIRTUAL_BUS 0x2 #define IPR_IOAFP_VIRTUAL_BUS 0x3 +#define IPR_MAX_SIS64_BUSES 0x4 #define IPR_GET_RES_PHYS_LOC(res) \ (((res)->bus << 24) | ((res)->target << 8) | (res)->lun) -- GitLab From 584051f19b982ef1934dce74c31679d6c977c0cc Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 5 Mar 2020 15:04:09 +0100 Subject: [PATCH 0648/1278] mac80211: Do not send mesh HWMP PREQ if HWMP is disabled [ Upstream commit ba32679cac50c38fdf488296f96b1f3175532b8e ] When trying to transmit to an unknown destination, the mesh code would unconditionally transmit a HWMP PREQ even if HWMP is not the current path selection algorithm. Signed-off-by: Nicolas Cavallari Link: https://lore.kernel.org/r/20200305140409.12204-1-cavallar@lri.fr Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh_hwmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 994dde6e5f9d..986e9b6b961d 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1137,7 +1137,8 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, } } - if (!(mpath->flags & MESH_PATH_RESOLVING)) + if (!(mpath->flags & MESH_PATH_RESOLVING) && + mesh_path_sel_is_hwmp(sdata)) mesh_queue_preq(mpath, PREQ_Q_F_START); if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN) -- GitLab From 29ccf2e4f77693654efa2145d4247046e45a4604 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 10 Mar 2020 15:06:54 -0700 Subject: [PATCH 0649/1278] dpaa_eth: Remove unnecessary boolean expression in dpaa_get_headroom [ Upstream commit 7395f62d95aafacdb9bd4996ec2f95b4a655d7e6 ] Clang warns: drivers/net/ethernet/freescale/dpaa/dpaa_eth.c:2860:9: warning: converting the result of '?:' with integer constants to a boolean always evaluates to 'true' [-Wtautological-constant-compare] return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom, ^ drivers/net/ethernet/freescale/dpaa/dpaa_eth.c:131:34: note: expanded from macro 'DPAA_FD_DATA_ALIGNMENT' \#define DPAA_FD_DATA_ALIGNMENT (fman_has_errata_a050385() ? 64 : 16) ^ 1 warning generated. This was exposed by commit 3c68b8fffb48 ("dpaa_eth: FMan erratum A050385 workaround") even though it appears to have been an issue since the introductory commit 9ad1a3749333 ("dpaa_eth: add support for DPAA Ethernet") since DPAA_FD_DATA_ALIGNMENT has never been able to be zero. Just replace the whole boolean expression with the true branch, as it is always been true. Link: https://github.com/ClangBuiltLinux/linux/issues/928 Signed-off-by: Nathan Chancellor Reviewed-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 39b8b6730e77..67246d42c3d9 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2646,9 +2646,7 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl) headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE); - return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom, - DPAA_FD_DATA_ALIGNMENT) : - headroom; + return ALIGN(headroom, DPAA_FD_DATA_ALIGNMENT); } static int dpaa_eth_probe(struct platform_device *pdev) -- GitLab From 45aa094b494162605b3024c8b5328faa298d0e1d Mon Sep 17 00:00:00 2001 From: Dominik Czarnota Date: Mon, 9 Mar 2020 16:22:50 +0100 Subject: [PATCH 0650/1278] sxgbe: Fix off by one in samsung driver strncpy size arg [ Upstream commit f3cc008bf6d59b8d93b4190e01d3e557b0040e15 ] This patch fixes an off-by-one error in strncpy size argument in drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c. The issue is that in: strncmp(opt, "eee_timer:", 6) the passed string literal: "eee_timer:" has 10 bytes (without the NULL byte) and the passed size argument is 6. As a result, the logic will also accept other, malformed strings, e.g. "eee_tiXXX:". This bug doesn't seem to have any security impact since its present in module's cmdline parsing code. Signed-off-by: Dominik Czarnota Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 89831adb8eb7..6d27eec85fce 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2284,7 +2284,7 @@ static int __init sxgbe_cmdline_opt(char *str) if (!str || !*str) return -EINVAL; while ((opt = strsep(&str, ",")) != NULL) { - if (!strncmp(opt, "eee_timer:", 6)) { + if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; } -- GitLab From e5c83bb6a4213f4a134c8e8f700895c103e3653c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:51 +0100 Subject: [PATCH 0651/1278] arm64: ptrace: map SPSR_ELx<->PSR for compat tasks commit 76fc52bd07d3e9cb708f1a50b60c825c96acd606 upstream. The SPSR_ELx format for exceptions taken from AArch32 is slightly different to the AArch32 PSR format. Map between the two in the compat ptrace code. Signed-off-by: Mark Rutland Fixes: 7206dc93a58fb764 ("arm64: Expose Arm v8.4 features") Cc: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Will Deacon Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e230b4dff960..e8574b95bda8 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -833,6 +833,7 @@ static int compat_gpr_get(struct task_struct *target, break; case 16: reg = task_pt_regs(target)->pstate; + reg = pstate_to_compat_psr(reg); break; case 17: reg = task_pt_regs(target)->orig_x0; @@ -900,6 +901,7 @@ static int compat_gpr_set(struct task_struct *target, newregs.pc = reg; break; case 16: + reg = compat_psr_to_pstate(reg); newregs.pstate = reg; break; case 17: -- GitLab From b2452cc1fae35cd35d8f836af744d3e9f72d505e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:50 +0100 Subject: [PATCH 0652/1278] arm64: compat: map SPSR_ELx<->PSR for signals commit 25dc2c80cfa33153057aa94984855acd57adf92a upstream. The SPSR_ELx format for exceptions taken from AArch32 differs from the AArch32 PSR format. Thus, we must translate between the two when setting up a compat sigframe, or restoring context from a compat sigframe. Signed-off-by: Mark Rutland Fixes: 7206dc93a58fb764 ("arm64: Expose Arm v8.4 features") Cc: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Will Deacon Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal32.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e09bf5d15606..3832750cee8b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -321,6 +321,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, int err; sigset_t set; struct compat_aux_sigframe __user *aux; + unsigned long psr; err = get_sigset_t(&set, &sf->uc.uc_sigmask); if (err == 0) { @@ -344,7 +345,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __get_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); + + regs->pstate = compat_psr_to_pstate(psr); /* * Avoid compat_sys_sigreturn() restarting. @@ -500,6 +503,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct compat_aux_sigframe __user *aux; + unsigned long psr = pstate_to_compat_psr(regs->pstate); int err = 0; __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); @@ -518,7 +522,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __put_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ -- GitLab From 64219b370e89acbdd7385ddc5e2c759c7b5809b7 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sat, 29 Jun 2019 23:22:33 +0200 Subject: [PATCH 0653/1278] ftrace/x86: Anotate text_mutex split between ftrace_arch_code_modify_post_process() and ftrace_arch_code_modify_prepare() commit 074376ac0e1d1fcd4fafebca86ee6158e7c20680 upstream. ftrace_arch_code_modify_prepare() is acquiring text_mutex, while the corresponding release is happening in ftrace_arch_code_modify_post_process(). This has already been documented in the code, but let's also make the fact that this is intentional clear to the semantic analysis tools such as sparse. Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1906292321170.27227@cbobk.fhfr.pm Fixes: 39611265edc1a ("ftrace/x86: Add a comment to why we take text_mutex in ftrace_arch_code_modify_prepare()") Fixes: d5b844a2cf507 ("ftrace/x86: Remove possible deadlock between register_kprobe() and ftrace_run_update_code()") Signed-off-by: Jiri Kosina Signed-off-by: Steven Rostedt (VMware) Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ccc2b9d2956a..387340b1f6db 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -36,6 +36,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { mutex_lock(&text_mutex); set_kernel_text_rw(); @@ -44,6 +45,7 @@ int ftrace_arch_code_modify_prepare(void) } int ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { set_all_modules_text_ro(); set_kernel_text_ro(); -- GitLab From 841550de1b945dca2f81cab8094e912413d8ce16 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 4 Nov 2019 23:00:48 +0800 Subject: [PATCH 0654/1278] i2c: hix5hd2: add missed clk_disable_unprepare in remove commit e1b9f99ff8c40bba6e59de9ad4a659447b1e4112 upstream. The driver forgets to disable and unprepare clk when remove. Add a call to clk_disable_unprepare to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-hix5hd2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c index bb68957d3da5..aa5c55bd8b11 100644 --- a/drivers/i2c/busses/i2c-hix5hd2.c +++ b/drivers/i2c/busses/i2c-hix5hd2.c @@ -498,6 +498,7 @@ static int hix5hd2_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&priv->adap); pm_runtime_disable(priv->dev); pm_runtime_set_suspended(priv->dev); + clk_disable_unprepare(priv->clk); return 0; } -- GitLab From 374477e3f122b838cd402308a85f5acc0270d24f Mon Sep 17 00:00:00 2001 From: Yussuf Khalil Date: Sat, 7 Mar 2020 14:16:31 -0800 Subject: [PATCH 0655/1278] Input: synaptics - enable RMI on HP Envy 13-ad105ng commit 1369d0abe469fb4cdea8a5bce219d38cb857a658 upstream. This laptop (and perhaps other variants of the same model) reports an SMBus-capable Synaptics touchpad. Everything (including suspend and resume) works fine when RMI is enabled via the kernel command line, so let's add it to the whitelist. Signed-off-by: Yussuf Khalil Link: https://lore.kernel.org/r/20200307213508.267187-1-dev@pp3345.net Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 5f764e0993a4..2bca84f4c2b2 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -189,6 +189,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ + "SYN3257", /* HP Envy 13-ad105ng */ NULL }; -- GitLab From d3dbedd4b90bcc9c53d5cdc3de7fe3ab55727c59 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Tue, 24 Mar 2020 15:53:50 -0700 Subject: [PATCH 0656/1278] Input: avoid BIT() macro usage in the serio.h UAPI header commit 52afa505a03d914081f40cb869a3248567a57573 upstream. The commit 19ba1eb15a2a ("Input: psmouse - add a custom serio protocol to send extra information") introduced usage of the BIT() macro for SERIO_* flags; this macro is not provided in UAPI headers. Replace if with similarly defined _BITUL() macro defined in . Fixes: 19ba1eb15a2a ("Input: psmouse - add a custom serio protocol to send extra information") Signed-off-by: Eugene Syromiatnikov Cc: # v5.0+ Link: https://lore.kernel.org/r/20200324041341.GA32335@asgard.redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serio.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h index a0cac1d8670d..1937915be413 100644 --- a/include/uapi/linux/serio.h +++ b/include/uapi/linux/serio.h @@ -9,7 +9,7 @@ #ifndef _UAPI_SERIO_H #define _UAPI_SERIO_H - +#include #include #define SPIOCSTYPE _IOW('q', 0x01, unsigned long) @@ -18,10 +18,10 @@ /* * bit masks for use in "interrupt" flags (3rd argument) */ -#define SERIO_TIMEOUT BIT(0) -#define SERIO_PARITY BIT(1) -#define SERIO_FRAME BIT(2) -#define SERIO_OOB_DATA BIT(3) +#define SERIO_TIMEOUT _BITUL(0) +#define SERIO_PARITY _BITUL(1) +#define SERIO_FRAME _BITUL(2) +#define SERIO_OOB_DATA _BITUL(3) /* * Serio types -- GitLab From a1f30f0091492f32b32fda362d26eb149fde5a2d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 13 Mar 2020 11:47:17 +0200 Subject: [PATCH 0657/1278] ARM: dts: dra7: Add bus_dma_limit for L3 bus commit cfb5d65f25959f724081bae8445a0241db606af6 upstream. The L3 interconnect's memory map is from 0x0 to 0xffffffff. Out of this, System memory (SDRAM) can be accessed from 0x80000000 to 0xffffffff (2GB) DRA7 does support 4GB of SDRAM but upper 2GB can only be accessed by the MPU subsystem. Add the dma-ranges property to reflect the physical address limit of the L3 bus. Issues ere observed only with SATA on DRA7-EVM with 4GB RAM and CONFIG_ARM_LPAE enabled. This is because the controller supports 64-bit DMA and its driver sets the dma_mask to 64-bit thus resulting in DMA accesses beyond L3 limit of 2G. Setting the correct bus_dma_limit fixes the issue. Signed-off-by: Roger Quadros Cc: stable@kernel.org Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index a40a7af85d02..f271c564d57d 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -137,6 +137,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2"; reg = <0x0 0x44000000 0x0 0x1000000>, <0x0 0x45000000 0x0 0x1000>; -- GitLab From e71f8b6b21ce2b540dd2f20e45dc41a59960686e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 16 Mar 2020 12:27:31 +0200 Subject: [PATCH 0658/1278] ARM: dts: omap5: Add bus_dma_limit for L3 bus commit dfa7ea303f56a3a8b1ed3b91ef35af2da67ca4ee upstream. The L3 interconnect's memory map is from 0x0 to 0xffffffff. Out of this, System memory (SDRAM) can be accessed from 0x80000000 to 0xffffffff (2GB) OMAP5 does support 4GB of SDRAM but upper 2GB can only be accessed by the MPU subsystem. Add the dma-ranges property to reflect the physical address limit of the L3 bus. Cc: stable@kernel.org Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap5.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index eaff2a5751dd..bc3f53c79e9d 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -131,6 +131,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0 0 0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3"; reg = <0 0x44000000 0 0x2000>, <0 0x44800000 0 0x3000>, -- GitLab From 4c572d8b5fd01af0b6a1e41dff733e97fffcf7fd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Feb 2020 00:42:01 +0900 Subject: [PATCH 0659/1278] perf probe: Do not depend on dwfl_module_addrsym() commit 1efde2754275dbd9d11c6e0132a4f09facf297ab upstream. Do not depend on dwfl_module_addrsym() because it can fail on user-space shared libraries. Actually, same bug was fixed by commit 664fee3dc379 ("perf probe: Do not use dwfl_module_addrsym if dwarf_diename finds symbol name"), but commit 07d369857808 ("perf probe: Fix wrong address verification) reverted to get actual symbol address from symtab. This fixes it again by getting symbol address from DIE, and only if the DIE has only address range, it uses dwfl_module_addrsym(). Fixes: 07d369857808 ("perf probe: Fix wrong address verification) Reported-by: Alexandre Ghiti Signed-off-by: Masami Hiramatsu Tested-by: Alexandre Ghiti Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sasha Levin Link: http://lore.kernel.org/lkml/158281812176.476.14164573830975116234.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/probe-finder.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 893193bd28c1..ae0feea4e8b5 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -626,14 +626,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, return -EINVAL; } - /* Try to get actual symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + if (dwarf_entrypc(sp_die, &eaddr) == 0) { + /* If the DIE has entrypc, use it. */ + symbol = dwarf_diename(sp_die); + } else { + /* Try to get actual symbol name and address from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + eaddr = sym.st_value; + } if (!symbol) { pr_warning("Failed to find symbol at 0x%lx\n", (unsigned long)paddr); return -ENOENT; } - eaddr = sym.st_value; tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; -- GitLab From 395c716a8a17eb2e0d7200297d1c5a118ab70294 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 7 Mar 2020 03:32:58 +0900 Subject: [PATCH 0660/1278] tools: Let O= makes handle a relative path with -C option commit be40920fbf1003c38ccdc02b571e01a75d890c82 upstream. When I tried to compile tools/perf from the top directory with the -C option, the O= option didn't work correctly if I passed a relative path: $ make O=BUILD -C tools/perf/ make: Entering directory '/home/mhiramat/ksrc/linux/tools/perf' BUILD: Doing 'make -j8' parallel build ../scripts/Makefile.include:4: *** O=/home/mhiramat/ksrc/linux/tools/perf/BUILD does not exist. Stop. make: *** [Makefile:70: all] Error 2 make: Leaving directory '/home/mhiramat/ksrc/linux/tools/perf' The O= directory existence check failed because the check script ran in the build target directory instead of the directory where I ran the make command. To fix that, once change directory to $(PWD) and check O= directory, since the PWD is set to where the make command runs. Fixes: c883122acc0d ("perf tools: Let O= makes handle relative paths") Reported-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Cc: Andrew Morton Cc: Borislav Petkov Cc: Geert Uytterhoeven Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Michal Marek Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/158351957799.3363.15269768530697526765.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile | 2 +- tools/scripts/Makefile.include | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7902a5681fc8..b8fc7d972be9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -35,7 +35,7 @@ endif # Only pass canonical directory names as the output directory: # ifneq ($(O),) - FULL_O := $(shell readlink -f $(O) || echo $(O)) + FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O)) endif # diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 71dc7efc7efa..df247f39d7c5 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 ifneq ($(O),) ifeq ($(origin O), command line) - dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) - ABSOLUTE_O := $(shell cd $(O) ; pwd) + dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) -- GitLab From 88f7a6aa7fb9aa5076b65489146045dac865f1d3 Mon Sep 17 00:00:00 2001 From: Dirk Mueller Date: Tue, 14 Jan 2020 18:53:41 +0100 Subject: [PATCH 0661/1278] scripts/dtc: Remove redundant YYLOC global declaration commit e33a814e772cdc36436c8c188d8c42d019fda639 upstream. gcc 10 will default to -fno-common, which causes this error at link time: (.text+0x0): multiple definition of `yylloc'; dtc-lexer.lex.o (symbol from plugin):(.text+0x0): first defined here This is because both dtc-lexer as well as dtc-parser define the same global symbol yyloc. Before with -fcommon those were merged into one defintion. The proper solution would be to to mark this as "extern", however that leads to: dtc-lexer.l:26:16: error: redundant redeclaration of 'yylloc' [-Werror=redundant-decls] 26 | extern YYLTYPE yylloc; | ^~~~~~ In file included from dtc-lexer.l:24: dtc-parser.tab.h:127:16: note: previous declaration of 'yylloc' was here 127 | extern YYLTYPE yylloc; | ^~~~~~ cc1: all warnings being treated as errors which means the declaration is completely redundant and can just be dropped. Signed-off-by: Dirk Mueller Signed-off-by: David Gibson [robh: cherry-pick from upstream] Cc: stable@vger.kernel.org Signed-off-by: Rob Herring [nc: Also apply to dtc-lexer.lex.c_shipped due to a lack of e039139be8c2, where dtc-lexer.l started being used] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/dtc/dtc-lexer.l | 1 - scripts/dtc/dtc-lexer.lex.c_shipped | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/dtc/dtc-lexer.l b/scripts/dtc/dtc-lexer.l index fd825ebba69c..24af54997758 100644 --- a/scripts/dtc/dtc-lexer.l +++ b/scripts/dtc/dtc-lexer.l @@ -38,7 +38,6 @@ LINECOMMENT "//".*\n #include "srcpos.h" #include "dtc-parser.tab.h" -YYLTYPE yylloc; extern bool treesource_error; /* CAUTION: this will stop working if we ever use yyless() or yyunput() */ diff --git a/scripts/dtc/dtc-lexer.lex.c_shipped b/scripts/dtc/dtc-lexer.lex.c_shipped index 64c243772398..9db3a409c507 100644 --- a/scripts/dtc/dtc-lexer.lex.c_shipped +++ b/scripts/dtc/dtc-lexer.lex.c_shipped @@ -631,7 +631,6 @@ char *yytext; #include "srcpos.h" #include "dtc-parser.tab.h" -YYLTYPE yylloc; extern bool treesource_error; /* CAUTION: this will stop working if we ever use yyless() or yyunput() */ -- GitLab From 7a15a7feb7d5a8ad1bc6ba208bd021c13682a985 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 24 Mar 2020 11:16:15 -0400 Subject: [PATCH 0662/1278] scsi: sd: Fix optimal I/O size for devices that change reported values commit ea697a8bf5a4161e59806fab14f6e4a46dc7dcb0 upstream. Some USB bridge devices will return a default set of characteristics during initialization. And then, once an attached drive has spun up, substitute the actual parameters reported by the drive. According to the SCSI spec, the device should return a UNIT ATTENTION in case any reported parameters change. But in this case the change is made silently after a small window where default values are reported. Commit a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") validated the reported optimal I/O size against the physical block size to overcome problems with devices reporting nonsensical transfer sizes. However, this validation did not account for the fact that aforementioned devices will return default values during a brief window during spin-up. The subsequent change in reported characteristics would invalidate the checking that had previously been performed. Unset a previously configured optimal I/O size should the sanity checking fail on subsequent revalidate attempts. Link: https://lore.kernel.org/r/33fb522e-4f61-1b76-914f-c9e6a3553c9b@gmail.com Cc: Bryan Gurney Cc: Reported-by: Bernhard Sulzer Tested-by: Bernhard Sulzer Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e8c2afbb82e9..49d0720a0b7d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3212,9 +3212,11 @@ static int sd_revalidate_disk(struct gendisk *disk) if (sd_validate_opt_xfer_size(sdkp, dev_max)) { q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); - } else + } else { + q->limits.io_opt = 0; rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), (sector_t)BLK_DEF_MAX_SECTORS); + } /* Do not exceed controller limit */ rw_max = min(rw_max, queue_max_hw_sectors(q)); -- GitLab From 1ec47ff0525c4a530dc7783cb28044179334a4cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Mar 2020 15:51:35 +0100 Subject: [PATCH 0663/1278] mac80211: mark station unauthorized before key removal commit b16798f5b907733966fd1a558fca823b3c67e4a1 upstream. If a station is still marked as authorized, mark it as no longer so before removing its keys. This allows frames transmitted to it to be rejected, providing additional protection against leaking plain text data during the disconnection flow. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200326155133.ccb4fb0bb356.If48f0f0504efdcf16b8921f48c6d3bb2cb763c99@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 627dc642f894..77ab9cc1a230 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -3,6 +3,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018-2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -951,6 +952,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta) might_sleep(); lockdep_assert_held(&local->sta_mtx); + while (sta->sta_state == IEEE80211_STA_AUTHORIZED) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + WARN_ON_ONCE(ret); + } + /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); -- GitLab From 0c4787e06956718d0431166b751459243bf1bba7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:22 +0100 Subject: [PATCH 0664/1278] gpiolib: acpi: Correct comment for HP x2 10 honor_wakeup quirk commit efaa87fa0947d525cf7c075316adde4e3ac7720b upstream. Commit aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") added a quirk for some models of the HP x2 10 series. There are 2 issues with the comment describing the quirk: 1) The comment claims the DMI quirk applies to all Cherry Trail based HP x2 10 models. In the mean time I have learned that there are at least 3 models of the HP x2 10 models: Bay Trail SoC + AXP288 PMIC Cherry Trail SoC + AXP288 PMIC Cherry Trail SoC + TI PMIC And this quirk's DMI matches only match the Cherry Trail SoC + TI PMIC SoC, which is good because we want a slightly different quirk for the others. This commit updates the comment to make it clear that the quirk is only for the Cherry Trail SoC + TI PMIC models. 2) The comment says that it is ok to disable wakeup on all ACPI GPIO event handlers, because there is only the one for the embedded-controller events. This is not true, there also is a handler for the special INT0002 device which is related to USB wakeups. We need to also disable wakeups on that one because the device turns of the USB-keyboard built into the dock when closing the lid. The XHCI controller takes a while to notice this, so it only notices it when already suspended, causing a spurious wakeup because of this. So disabling wakeup on all handlers is the right thing to do, but not because there only is the one handler for the EC events. This commit updates the comment to correctly reflect this. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-1-hdegoede@redhat.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index c7b9125c8ec2..b34b02741a03 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1348,12 +1348,14 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { }, { /* - * Various HP X2 10 Cherry Trail models use an external - * embedded-controller connected via I2C + an ACPI GPIO - * event handler. The embedded controller generates various - * spurious wakeup events when suspended. So disable wakeup - * for its handler (it uses the only ACPI GPIO event handler). - * This breaks wakeup when opening the lid, the user needs + * HP X2 10 models with Cherry Trail SoC + TI PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FF:01 pin 0, causing spurious wakeups. + * When suspending by closing the LID, the power to the USB + * keyboard is turned off, causing INT0002 ACPI events to + * trigger once the XHCI controller notices the keyboard is + * gone. So INT0002 events cause spurious wakeups too. Ignoring + * EC wakes breaks wakeup when opening the lid, the user needs * to press the power-button to wakeup the system. The * alternative is suspend simply not working, which is worse. */ -- GitLab From f4b10cc220f7a18f4d004a63dceee9d3e4c5596e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:23 +0100 Subject: [PATCH 0665/1278] gpiolib: acpi: Rework honor_wakeup option into an ignore_wake option commit 2ccb21f5516afef5e251184eeefbf36db90206d7 upstream. Commit aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") was added to deal with spurious wakeups on one specific model of the HP x2 10 series. The approach taken there was to add a bool controlling wakeup support for all ACPI GPIO events. This was sufficient for the specific HP x2 10 model the commit was trying to fix, but in the mean time other models have turned up which need a similar workaround to avoid spurious wakeups from suspend, but only for one of the pins on which the ACPI tables request ACPI GPIO events. Since the honor_wakeup option was added to be able to ignore wake events, the name was perhaps not the best, this commit renames it to ignore_wake and changes it to a string with the following format: gpiolib_acpi.ignore_wake=controller@pin[,controller@pin[,...]] This allows working around spurious wakeup issues on a per pin basis. This commit also reworks the existing quirk for the HP x2 10 so that it functions as before. Note: -This removes the honor_wakeup parameter. This has only been upstream for a short time and to the best of my knowledge there are no users using this module parameter. -The controller@pin[,controller@pin[,...]] syntax is based on an existing kernel module parameter using the same controller@pin format. That version uses ';' as separator, but in practice that is problematic because grub2 cannot handle this without taking special care to escape the ';', so here we are using a ',' as separator instead which does not have this issue. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-2-hdegoede@redhat.com Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 96 +++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index b34b02741a03..b0124be9baff 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -24,18 +24,21 @@ #include "gpiolib.h" -#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l -#define QUIRK_NO_WAKEUP 0x02l - static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); -static int honor_wakeup = -1; -module_param(honor_wakeup, int, 0444); -MODULE_PARM_DESC(honor_wakeup, - "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto"); +static char *ignore_wake; +module_param(ignore_wake, charp, 0444); +MODULE_PARM_DESC(ignore_wake, + "controller@pin combos on which to ignore the ACPI wake flag " + "ignore_wake=controller@pin[,controller@pin[,...]]"); + +struct acpi_gpiolib_dmi_quirk { + bool no_edge_events_on_boot; + char *ignore_wake; +}; /** * struct acpi_gpio_event - ACPI GPIO event handler data @@ -262,6 +265,57 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio) acpi_gpiochip_request_irq(acpi_gpio, event); } +static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in) +{ + const char *controller, *pin_str; + int len, pin; + char *endp; + + controller = ignore_wake; + while (controller) { + pin_str = strchr(controller, '@'); + if (!pin_str) + goto err; + + len = pin_str - controller; + if (len == strlen(controller_in) && + strncmp(controller, controller_in, len) == 0) { + pin = simple_strtoul(pin_str + 1, &endp, 10); + if (*endp != 0 && *endp != ',') + goto err; + + if (pin == pin_in) + return true; + } + + controller = strchr(controller, ','); + if (controller) + controller++; + } + + return false; +err: + pr_err_once("Error invalid value for gpiolib_acpi.ignore_wake: %s\n", + ignore_wake); + return false; +} + +static bool acpi_gpio_irq_is_wake(struct device *parent, + struct acpi_resource_gpio *agpio) +{ + int pin = agpio->pin_table[0]; + + if (agpio->wake_capable != ACPI_WAKE_CAPABLE) + return false; + + if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) { + dev_info(parent, "Ignoring wakeup on pin %d\n", pin); + return false; + } + + return true; +} + static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, void *context) { @@ -347,7 +401,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, event->handle = evt_handle; event->handler = handler; event->irq = irq; - event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE; + event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio); event->pin = pin; event->desc = desc; @@ -1331,7 +1385,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), }, - .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .no_edge_events_on_boot = true, + }, }, { /* @@ -1344,7 +1400,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), }, - .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .no_edge_events_on_boot = true, + }, }, { /* @@ -1363,33 +1421,31 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), }, - .driver_data = (void *)QUIRK_NO_WAKEUP, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FF:01@0,INT0002:00@2", + }, }, {} /* Terminating entry */ }; static int acpi_gpio_setup_params(void) { + const struct acpi_gpiolib_dmi_quirk *quirk = NULL; const struct dmi_system_id *id; - long quirks = 0; id = dmi_first_match(gpiolib_acpi_quirks); if (id) - quirks = (long)id->driver_data; + quirk = id->driver_data; if (run_edge_events_on_boot < 0) { - if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT) + if (quirk && quirk->no_edge_events_on_boot) run_edge_events_on_boot = 0; else run_edge_events_on_boot = 1; } - if (honor_wakeup < 0) { - if (quirks & QUIRK_NO_WAKEUP) - honor_wakeup = 0; - else - honor_wakeup = 1; - } + if (ignore_wake == NULL && quirk && quirk->ignore_wake) + ignore_wake = quirk->ignore_wake; return 0; } -- GitLab From ab2e808622a430c931933623e5abd7cd488afc46 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:24 +0100 Subject: [PATCH 0666/1278] gpiolib: acpi: Add quirk to ignore EC wakeups on HP x2 10 BYT + AXP288 model commit 0e91506ba00730f088961a8d39f8693b0f8e3fea upstream. Commit aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") was added to deal with spurious wakeups on one specific model of the HP x2 10 series. In the mean time I have learned that there are at least 3 different HP x2 10 models: Bay Trail SoC + AXP288 PMIC Cherry Trail SoC + AXP288 PMIC Cherry Trail SoC + TI PMIC And the original quirk is only correct for (and only matches the) Cherry Trail SoC + TI PMIC model. The Bay Trail SoC + AXP288 PMIC model has different DMI strings, has the external EC interrupt on a different GPIO pin and only needs to ignore wakeups on the EC interrupt, the INT0002 device works fine on this model. This commit adds an extra DMI based quirk for the HP x2 10 BYT + AXP288 model, ignoring wakeups for ACPI GPIO events on the EC interrupt pin on this model. This fixes spurious wakeups from suspend on this model. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-3-hdegoede@redhat.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index b0124be9baff..319d8b07e4e9 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1425,6 +1425,21 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { .ignore_wake = "INT33FF:01@0,INT0002:00@2", }, }, + { + /* + * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FC:02 pin 28, causing spurious wakeups. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_BOARD_NAME, "815D"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FC:02@28", + }, + }, {} /* Terminating entry */ }; -- GitLab From 7d77f1b6281520de0ce6aa83011f16e56cc34cdd Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 13 Mar 2020 08:47:05 -0400 Subject: [PATCH 0667/1278] RDMA/core: Ensure security pkey modify is not lost commit 2d47fbacf2725a67869f4d3634c2415e7dfab2f4 upstream. The following modify sequence (loosely based on ipoib) will lose a pkey modifcation: - Modify (pkey index, port) - Modify (new pkey index, NO port) After the first modify, the qp_pps list will have saved the pkey and the unit on the main list. During the second modify, get_new_pps() will fetch the port from qp_pps and read the new pkey index from qp_attr->pkey_index. The state will still be zero, or IB_PORT_PKEY_NOT_VALID. Because of the invalid state, the new values will never replace the one in the qp pps list, losing the new pkey. This happens because the following if statements will never correct the state because the first term will be false. If the code had been executed, it would incorrectly overwrite valid values. if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) new_pps->main.state = IB_PORT_PKEY_VALID; } Fix by joining the two if statements with an or test to see if qp_pps is non-NULL and in the correct state. Fixes: 1dd017882e01 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list") Link: https://lore.kernel.org/r/20200313124704.14982.55907.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/security.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index ce8e3009344a..f4770601e6bf 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -348,16 +348,11 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, else if (qp_pps) new_pps->main.pkey_index = qp_pps->main.pkey_index; - if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) + if (((qp_attr_mask & IB_QP_PKEY_INDEX) && + (qp_attr_mask & IB_QP_PORT)) || + (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)) new_pps->main.state = IB_PORT_PKEY_VALID; - if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { - new_pps->main.port_num = qp_pps->main.port_num; - new_pps->main.pkey_index = qp_pps->main.pkey_index; - if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) - new_pps->main.state = IB_PORT_PKEY_VALID; - } - if (qp_attr_mask & IB_QP_ALT_PATH) { new_pps->alt.port_num = qp_attr->alt_port_num; new_pps->alt.pkey_index = qp_attr->alt_pkey_index; -- GitLab From cd195db9143211386cbf521e2bea8484a0ed6b90 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 13 Mar 2020 20:33:07 +0000 Subject: [PATCH 0668/1278] genirq: Fix reference leaks on irq affinity notifiers commit df81dfcfd6991d547653d46c051bac195cd182c1 upstream. The handling of notify->work did not properly maintain notify->kref in two cases: 1) where the work was already scheduled, another irq_set_affinity_locked() would get the ref and (no-op-ly) schedule the work. Thus when irq_affinity_notify() ran, it would drop the original ref but not the additional one. 2) when cancelling the (old) work in irq_set_affinity_notifier(), if there was outstanding work a ref had been got for it but was never put. Fix both by checking the return values of the work handling functions (schedule_work() for (1) and cancel_work_sync() for (2)) and put the extra ref if the return value indicates preexisting work. Fixes: cd7eab44e994 ("genirq: Add IRQ affinity notifiers") Fixes: 59c39840f5ab ("genirq: Prevent use-after-free and work list corruption") Signed-off-by: Edward Cree Signed-off-by: Thomas Gleixner Acked-by: Ben Hutchings Link: https://lkml.kernel.org/r/24f5983f-2ab5-e83a-44ee-a45b5f9300f5@solarflare.com Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 037e8fc1b008..5277949e82e0 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -224,7 +224,11 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); - schedule_work(&desc->affinity_notify->work); + if (!schedule_work(&desc->affinity_notify->work)) { + /* Work was already scheduled, drop our extra ref */ + kref_put(&desc->affinity_notify->kref, + desc->affinity_notify->release); + } } irqd_set(data, IRQD_AFFINITY_SET); @@ -324,7 +328,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) raw_spin_unlock_irqrestore(&desc->lock, flags); if (old_notify) { - cancel_work_sync(&old_notify->work); + if (cancel_work_sync(&old_notify->work)) { + /* Pending work had a ref, put that one too */ + kref_put(&old_notify->kref, old_notify->release); + } kref_put(&old_notify->kref, old_notify->release); } -- GitLab From a6001a07098525ada40c5c3e63c811e69a11c565 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Sun, 2 Feb 2020 13:19:34 +0200 Subject: [PATCH 0669/1278] xfrm: handle NETDEV_UNREGISTER for xfrm device commit 03891f820c2117b19e80b370281eb924a09cf79f upstream. This patch to handle the asynchronous unregister device event so the device IPsec offload resources could be cleanly released. Fixes: e4db5b61c572 ("xfrm: policy: remove pcpu policy cache") Signed-off-by: Raed Salem Reviewed-by: Boris Pismenny Reviewed-by: Saeed Mahameed Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 4e458fd9236a..c58557193527 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -187,6 +187,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void return xfrm_dev_feat_change(dev); case NETDEV_DOWN: + case NETDEV_UNREGISTER: return xfrm_dev_down(dev); } return NOTIFY_DONE; -- GitLab From 27993a6ac82cd57e8fa8088315736bd4e5ba8a1a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Feb 2020 17:00:27 +0100 Subject: [PATCH 0670/1278] vti[6]: fix packet tx through bpf_redirect() in XinY cases commit f1ed10264ed6b66b9cd5e8461cffce69be482356 upstream. I forgot the 4in6/6in4 cases in my previous patch. Let's fix them. Fixes: 95224166a903 ("vti[6]: fix packet tx through bpf_redirect()") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv4/Kconfig | 1 + net/ipv4/ip_vti.c | 38 ++++++++++++++++++++++++++++++-------- net/ipv6/ip6_vti.c | 32 +++++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f48fe6fc7e8c..4abc4ba733bf 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -297,6 +297,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" + depends on IPV6 || IPV6=n select INET_TUNNEL select NET_IP_TUNNEL depends on INET_XFRM_MODE_TUNNEL diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 59384ffe89f7..b930ab5cf140 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -208,17 +208,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - struct rtable *rt; - - fl->u.ip4.flowi4_oif = dev->ifindex; - fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; - rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); - if (IS_ERR(rt)) { + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + skb_dst_set(skb, dst); + break; +#endif + default: dev->stats.tx_carrier_errors++; goto tx_error_icmp; } - dst = &rt->dst; - skb_dst_set(skb, dst); } dst_hold(dst); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 396a0f61f5f8..2f0217657ba2 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -454,15 +454,33 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int mtu; if (!dst) { - fl->u.ip6.flowi6_oif = dev->ifindex; - fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; - dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); - if (dst->error) { - dst_release(dst); - dst = NULL; + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) + goto tx_err_link_failure; + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + goto tx_err_link_failure; + } + skb_dst_set(skb, dst); + break; + default: goto tx_err_link_failure; } - skb_dst_set(skb, dst); } dst_hold(dst); -- GitLab From 1046035d3b8b461194ce693d7f8e33e99f4f4abe Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 22 Mar 2020 14:49:06 +0200 Subject: [PATCH 0671/1278] RDMA/mlx5: Block delay drop to unprivileged users commit ba80013fba656b9830ef45cd40a6a1e44707f47a upstream. It has been discovered that this feature can globally block the RX port, so it should be allowed for highly privileged users only. Fixes: 03404e8ae652("IB/mlx5: Add support to dropless RQ") Link: https://lore.kernel.org/r/20200322124906.1173790-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 84c962820aa2..0cb60072c82f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4896,6 +4896,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO) && + init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) + return ERR_PTR(-EPERM); + dev = to_mdev(pd->device); switch (init_attr->wq_type) { case IB_WQT_RQ: -- GitLab From c91b46394b340ed790d6aed76f0044c1c7838de0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 9 Feb 2020 21:15:29 +0800 Subject: [PATCH 0672/1278] xfrm: fix uctx len check in verify_sec_ctx_len commit 171d449a028573b2f0acdc7f31ecbb045391b320 upstream. It's not sufficient to do 'uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)' check only, as uctx->len may be greater than nla_len(rt), in which case it will cause slab-out-of-bounds when accessing uctx->ctx_str later. This patch is to fix it by return -EINVAL when uctx->len > nla_len(rt). Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 339a070da597..81d96a664afa 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -109,7 +109,8 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) return 0; uctx = nla_data(rt); - if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) + if (uctx->len > nla_len(rt) || + uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) return -EINVAL; return 0; -- GitLab From 25106012e91a2399c487f495f81a48186f5a6a73 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 9 Feb 2020 21:16:38 +0800 Subject: [PATCH 0673/1278] xfrm: add the missing verify_sec_ctx_len check in xfrm_add_acquire commit a1a7e3a36e01ca6e67014f8cf673cb8e47be5550 upstream. Without doing verify_sec_ctx_len() check in xfrm_add_acquire(), it may be out-of-bounds to access uctx->ctx_str with uctx->ctx_len, as noticed by syz: BUG: KASAN: slab-out-of-bounds in selinux_xfrm_alloc_user+0x237/0x430 Read of size 768 at addr ffff8880123be9b4 by task syz-executor.1/11650 Call Trace: dump_stack+0xe8/0x16e print_address_description.cold.3+0x9/0x23b kasan_report.cold.4+0x64/0x95 memcpy+0x1f/0x50 selinux_xfrm_alloc_user+0x237/0x430 security_xfrm_policy_alloc+0x5c/0xb0 xfrm_policy_construct+0x2b1/0x650 xfrm_add_acquire+0x21d/0xa10 xfrm_user_rcv_msg+0x431/0x6f0 netlink_rcv_skb+0x15a/0x410 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x50e/0x6a0 netlink_sendmsg+0x8ae/0xd40 sock_sendmsg+0x133/0x170 ___sys_sendmsg+0x834/0x9a0 __sys_sendmsg+0x100/0x1e0 do_syscall_64+0xe5/0x660 entry_SYSCALL_64_after_hwframe+0x6a/0xdf So fix it by adding the missing verify_sec_ctx_len check there. Fixes: 980ebd25794f ("[IPSEC]: Sync series - acquire insert") Reported-by: Hangbin Liu Signed-off-by: Xin Long Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 81d96a664afa..86084086a472 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2212,6 +2212,9 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_mark_get(attrs, &mark); err = verify_newpolicy_info(&ua->policy); + if (err) + goto free_state; + err = verify_sec_ctx_len(attrs); if (err) goto free_state; -- GitLab From dc0ea9b710102ef628a26663d892031a2c381549 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 23 Mar 2020 15:32:39 +0800 Subject: [PATCH 0674/1278] xfrm: policy: Fix doulbe free in xfrm_policy_timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4c59406ed00379c8663f8663d82b2537467ce9d7 upstream. After xfrm_add_policy add a policy, its ref is 2, then xfrm_policy_timer read_lock xp->walk.dead is 0 .... mod_timer() xfrm_policy_kill policy->walk.dead = 1 .... del_timer(&policy->timer) xfrm_pol_put //ref is 1 xfrm_pol_put //ref is 0 xfrm_policy_destroy call_rcu xfrm_pol_hold //ref is 1 read_unlock xfrm_pol_put //ref is 0 xfrm_policy_destroy call_rcu xfrm_policy_destroy is called twice, which may leads to double free. Call Trace: RIP: 0010:refcount_warn_saturate+0x161/0x210 ... xfrm_policy_timer+0x522/0x600 call_timer_fn+0x1b3/0x5e0 ? __xfrm_decode_session+0x2990/0x2990 ? msleep+0xb0/0xb0 ? _raw_spin_unlock_irq+0x24/0x40 ? __xfrm_decode_session+0x2990/0x2990 ? __xfrm_decode_session+0x2990/0x2990 run_timer_softirq+0x5c5/0x10e0 Fix this by use write_lock_bh in xfrm_policy_kill. Fixes: ea2dea9dacc2 ("xfrm: remove policy lock when accessing policy->walk.dead") Signed-off-by: YueHaibing Acked-by: Timo Teräs Acked-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b5006a091fd6..f76bb5237df3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -301,7 +301,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy); static void xfrm_policy_kill(struct xfrm_policy *policy) { + write_lock_bh(&policy->lock); policy->walk.dead = 1; + write_unlock_bh(&policy->lock); atomic_inc(&policy->genid); -- GitLab From ba2bc76526e19cc6d67bbaa3c979ab3c576d71e9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Mar 2020 14:27:16 +0100 Subject: [PATCH 0675/1278] netfilter: nft_fwd_netdev: validate family and chain type commit 76a109fac206e158eb3c967af98c178cff738e6a upstream. Make sure the forward action is only used from ingress. Fixes: 39e6dea28adc ("netfilter: nf_tables: add forward expression to the netdev family") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_fwd_netdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index ce13a50b9189..ee190fa4dc34 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -62,6 +62,13 @@ static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } +static int nft_fwd_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS)); +} + static struct nft_expr_type nft_fwd_netdev_type; static const struct nft_expr_ops nft_fwd_netdev_ops = { .type = &nft_fwd_netdev_type, @@ -69,6 +76,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .eval = nft_fwd_netdev_eval, .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, + .validate = nft_fwd_validate, }; static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { -- GitLab From ae943d0066fe0896fc342d9d3141a38f48c33764 Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Wed, 11 Mar 2020 11:19:06 +0100 Subject: [PATCH 0676/1278] vti6: Fix memory leak of skb if input policy check fails commit 2a9de3af21aa8c31cd68b0b39330d69f8c1e59df upstream. The vti6_rcv function performs some tests on the retrieved tunnel including checking the IP protocol, the XFRM input policy, the source and destination address. In all but one places the skb is released in the error case. When the input policy check fails the network packet is leaked. Using the same goto-label discard in this case to fix this problem. Fixes: ed1efb2aefbb ("ipv6: Add support for IPsec virtual tunnel interfaces") Signed-off-by: Torsten Hilbrich Reviewed-by: Nicolas Dichtel Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2f0217657ba2..207bf342e995 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -315,7 +315,7 @@ static int vti6_rcv(struct sk_buff *skb) if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { rcu_read_unlock(); - return 0; + goto discard; } ipv6h = ipv6_hdr(skb); -- GitLab From 1c3a33390d209732725371dc9a510e293c0cef10 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 6 Aug 2018 15:20:19 -0700 Subject: [PATCH 0677/1278] Input: raydium_i2c_ts - use true and false for boolean values [ Upstream commit 6cad4e269e25dddd7260a53e9d9d90ba3a3cc35a ] Return statements in functions returning bool should use true or false instead of an integer value. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/raydium_i2c_ts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 4f1d3fd5d412..172f66e9da2d 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -466,7 +466,7 @@ static bool raydium_i2c_boot_trigger(struct i2c_client *client) } } - return 0; + return false; } static bool raydium_i2c_fw_trigger(struct i2c_client *client) @@ -492,7 +492,7 @@ static bool raydium_i2c_fw_trigger(struct i2c_client *client) } } - return 0; + return false; } static int raydium_i2c_check_path(struct i2c_client *client) -- GitLab From c97a86b9ea96f3c72c1b5288f8b25f21bf7fad20 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Mar 2020 11:50:51 -0800 Subject: [PATCH 0678/1278] Input: raydium_i2c_ts - fix error codes in raydium_i2c_boot_trigger() [ Upstream commit 32cf3a610c35cb21e3157f4bbf29d89960e30a36 ] These functions are supposed to return negative error codes but instead it returns true on failure and false on success. The error codes are eventually propagated back to user space. Fixes: 48a2b783483b ("Input: add Raydium I2C touchscreen driver") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200303101306.4potflz7na2nn3od@kili.mountain Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/raydium_i2c_ts.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 172f66e9da2d..7da44956555e 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -441,7 +441,7 @@ static int raydium_i2c_write_object(struct i2c_client *client, return 0; } -static bool raydium_i2c_boot_trigger(struct i2c_client *client) +static int raydium_i2c_boot_trigger(struct i2c_client *client) { static const u8 cmd[7][6] = { { 0x08, 0x0C, 0x09, 0x00, 0x50, 0xD7 }, @@ -466,10 +466,10 @@ static bool raydium_i2c_boot_trigger(struct i2c_client *client) } } - return false; + return 0; } -static bool raydium_i2c_fw_trigger(struct i2c_client *client) +static int raydium_i2c_fw_trigger(struct i2c_client *client) { static const u8 cmd[5][11] = { { 0, 0x09, 0x71, 0x0C, 0x09, 0x00, 0x50, 0xD7, 0, 0, 0 }, @@ -492,7 +492,7 @@ static bool raydium_i2c_fw_trigger(struct i2c_client *client) } } - return false; + return 0; } static int raydium_i2c_check_path(struct i2c_client *client) -- GitLab From 7225ad08985b405a93d0ad8600693d9ed1d79e29 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Mar 2020 13:36:01 +0000 Subject: [PATCH 0679/1278] afs: Fix some tracing details [ Upstream commit 4636cf184d6d9a92a56c2554681ea520dd4fe49a ] Fix a couple of tracelines to indicate the usage count after the atomic op, not the usage count before it to be consistent with other afs and rxrpc trace lines. Change the wording of the afs_call_trace_work trace ID label from "WORK" to "QUEUE" to reflect the fact that it's queueing work, not doing work. Fixes: 341f741f04be ("afs: Refcount the afs_call struct") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/afs/rxrpc.c | 4 ++-- include/trace/events/afs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ccc9c708a860..7dc9c78a1c31 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -173,7 +173,7 @@ void afs_put_call(struct afs_call *call) int n = atomic_dec_return(&call->usage); int o = atomic_read(&afs_outstanding_calls); - trace_afs_call(call, afs_call_trace_put, n + 1, o, + trace_afs_call(call, afs_call_trace_put, n, o, __builtin_return_address(0)); ASSERTCMP(n, >=, 0); @@ -619,7 +619,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, u = __atomic_add_unless(&call->usage, 1, 0); if (u != 0) { - trace_afs_call(call, afs_call_trace_wake, u, + trace_afs_call(call, afs_call_trace_wake, u + 1, atomic_read(&afs_outstanding_calls), __builtin_return_address(0)); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8b95c16b7045..0978bdae2243 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -40,7 +40,7 @@ enum afs_call_trace { EM(afs_call_trace_free, "FREE ") \ EM(afs_call_trace_put, "PUT ") \ EM(afs_call_trace_wake, "WAKE ") \ - E_(afs_call_trace_work, "WORK ") + E_(afs_call_trace_work, "QUEUE") /* * Export enum symbols via userspace. -- GitLab From aa7ad24c67fbbbcb5d33e5825da28f6ab1a1de62 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Wed, 25 Mar 2020 06:44:17 +0100 Subject: [PATCH 0680/1278] USB: serial: option: add support for ASKEY WWHC050 commit 007d20dca2376a751b1dad03442f118438b7e65e upstream. ASKEY WWHC050 is a mcie LTE modem. The oem configuration states: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1690 ProdID=7588 Rev=ff.ff S: Manufacturer=Android S: Product=Android S: SerialNumber=813f0eef6e6e C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Tested on openwrt distribution. Co-developed-by: Cezary Jackiewicz Signed-off-by: Cezary Jackiewicz Signed-off-by: Pawel Dembicki Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 24a4ed76663a..bed20de9ab92 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1994,6 +1994,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ + { USB_DEVICE_INTERFACE_CLASS(0x1690, 0x7588, 0xff), /* ASKEY WWHC050 */ + .driver_info = RSVD(1) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff), /* BroadMobi BM818 */ -- GitLab From 5c30e53bf3f524df0b5313df3986885e71134990 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Wed, 25 Mar 2020 06:44:18 +0100 Subject: [PATCH 0681/1278] USB: serial: option: add BroadMobi BM806U commit 6cb2669cb97fc4fdf526127159ac59caae052247 upstream. BroadMobi BM806U is an Qualcomm MDM9225 based 3G/4G modem. Tested hardware BM806U is mounted on D-Link DWR-921-C3 router. T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=2033 Rev= 2.28 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=f842866cfd5a C:* #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Co-developed-by: Cezary Jackiewicz Signed-off-by: Cezary Jackiewicz Signed-off-by: Pawel Dembicki Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bed20de9ab92..50350516e76b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1998,6 +1998,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(1) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2033, 0xff), /* BroadMobi BM806U */ + .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff), /* BroadMobi BM818 */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ -- GitLab From 72a89da07addd6bff8a6d755cbe72850d18322fa Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Wed, 25 Mar 2020 06:44:19 +0100 Subject: [PATCH 0682/1278] USB: serial: option: add Wistron Neweb D19Q1 commit dfee7e2f478346b12ea651d5c28b069f6a4af563 upstream. This modem is embedded on dlink dwr-960 router. The oem configuration states: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1435 ProdID=d191 Rev=ff.ff S: Manufacturer=Android S: Product=Android S: SerialNumber=0123456789ABCDEF C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Tested on openwrt distribution Signed-off-by: Pawel Dembicki Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 50350516e76b..3621bde2a0ed 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1994,6 +1994,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ + { USB_DEVICE_INTERFACE_CLASS(0x1435, 0xd191, 0xff), /* Wistron Neweb D19Q1 */ + .driver_info = RSVD(1) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x1690, 0x7588, 0xff), /* ASKEY WWHC050 */ .driver_info = RSVD(1) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ -- GitLab From 57194c6fd8c478f468f1aa7ae2175ea77c4d8de1 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Fri, 27 Mar 2020 16:03:50 +0100 Subject: [PATCH 0683/1278] USB: cdc-acm: restore capability check order commit 62d65bdd9d05158aa2547f8ef72375535f3bc6e3 upstream. commit b401f8c4f492c ("USB: cdc-acm: fix rounding error in TIOCSSERIAL") introduced a regression by changing the order of capability and close settings change checks. When running with CAP_SYS_ADMIN setting the close settings to the values already set resulted in -EOPNOTSUPP. Fix this by changing the check order back to how it was before. Fixes: b401f8c4f492c ("USB: cdc-acm: fix rounding error in TIOCSSERIAL") Cc: Anthony Mallet Cc: stable Cc: Oliver Neukum Signed-off-by: Matthias Reichl Link: https://lore.kernel.org/r/20200327150350.3657-1-hias@horus.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 30a124b74d45..5e171e45c685 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -961,16 +961,16 @@ static int set_serial_info(struct acm *acm, mutex_lock(&acm->port.mutex); - if ((new_serial.close_delay != old_close_delay) || - (new_serial.closing_wait != old_closing_wait)) { - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) { + if ((new_serial.close_delay != old_close_delay) || + (new_serial.closing_wait != old_closing_wait)) retval = -EPERM; - else { - acm->port.close_delay = close_delay; - acm->port.closing_wait = closing_wait; - } - } else - retval = -EOPNOTSUPP; + else + retval = -EOPNOTSUPP; + } else { + acm->port.close_delay = close_delay; + acm->port.closing_wait = closing_wait; + } mutex_unlock(&acm->port.mutex); return retval; -- GitLab From e68fb968fdd1d6f8c8558907edf1ce0b33a8108c Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Wed, 25 Mar 2020 15:52:37 +0800 Subject: [PATCH 0684/1278] USB: serial: io_edgeport: fix slab-out-of-bounds read in edge_interrupt_callback commit 57aa9f294b09463492f604feaa5cc719beaace32 upstream. Fix slab-out-of-bounds read in the interrupt-URB completion handler. The boundary condition should be (length - 1) as we access data[position + 1]. Reported-and-tested-by: syzbot+37ba33391ad5f3935bbd@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 3705b64ab948..45d5e5c899e1 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -714,7 +714,7 @@ static void edge_interrupt_callback(struct urb *urb) /* grab the txcredits for the ports if available */ position = 2; portNumber = 0; - while ((position < length) && + while ((position < length - 1) && (portNumber < edge_serial->serial->num_ports)) { txCredits = data[position] | (data[position+1] << 8); if (txCredits) { -- GitLab From 5bece4940e5075e10e2a44a4bad2754a363f8d74 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 16 Mar 2020 16:11:35 -0500 Subject: [PATCH 0685/1278] usb: musb: fix crash with highmen PIO and usbmon commit 52974d94a206ce428d9d9b6eaa208238024be82a upstream. When handling a PIO bulk transfer with highmem buffer, a temporary mapping is assigned to urb->transfer_buffer. After the transfer is complete, an invalid address is left behind in this pointer. This is not ordinarily a problem since nothing touches that buffer before the urb is released. However, when usbmon is active, usbmon_urb_complete() calls (indirectly) mon_bin_get_data() which does access the transfer buffer if it is set. To prevent an invalid memory access here, reset urb->transfer_buffer to NULL when finished (musb_host_rx()), or do not set it at all (musb_host_tx()). Fixes: 8e8a55165469 ("usb: musb: host: Handle highmem in PIO mode") Signed-off-by: Mans Rullgard Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Link: https://lore.kernel.org/r/20200316211136.2274-8-b-liu@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 802388bb42ba..3ec0752e67ac 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1494,10 +1494,7 @@ void musb_host_tx(struct musb *musb, u8 epnum) * We need to map sg if the transfer_buffer is * NULL. */ - if (!urb->transfer_buffer) - qh->use_sg = true; - - if (qh->use_sg) { + if (!urb->transfer_buffer) { /* sg_miter_start is already done in musb_ep_program */ if (!sg_miter_next(&qh->sg_miter)) { dev_err(musb->controller, "error: sg list empty\n"); @@ -1505,9 +1502,8 @@ void musb_host_tx(struct musb *musb, u8 epnum) status = -EINVAL; goto done; } - urb->transfer_buffer = qh->sg_miter.addr; length = min_t(u32, length, qh->sg_miter.length); - musb_write_fifo(hw_ep, length, urb->transfer_buffer); + musb_write_fifo(hw_ep, length, qh->sg_miter.addr); qh->sg_miter.consumed = length; sg_miter_stop(&qh->sg_miter); } else { @@ -1516,11 +1512,6 @@ void musb_host_tx(struct musb *musb, u8 epnum) qh->segsize = length; - if (qh->use_sg) { - if (offset + length >= urb->transfer_buffer_length) - qh->use_sg = false; - } - musb_ep_select(mbase, epnum); musb_writew(epio, MUSB_TXCSR, MUSB_TXCSR_H_WZC_BITS | MUSB_TXCSR_TXPKTRDY); @@ -2038,8 +2029,10 @@ void musb_host_rx(struct musb *musb, u8 epnum) urb->actual_length += xfer_len; qh->offset += xfer_len; if (done) { - if (qh->use_sg) + if (qh->use_sg) { qh->use_sg = false; + urb->transfer_buffer = NULL; + } if (urb->status == -EINPROGRESS) urb->status = status; -- GitLab From a693e94b98fc2a029f92952ca5034285fae501ee Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Jan 2020 17:35:08 +0100 Subject: [PATCH 0686/1278] media: flexcop-usb: fix endpoint sanity check commit bca243b1ce0e46be26f7c63b5591dfbb41f558e5 upstream. commit 1b976fc6d684 ("media: b2c2-flexcop-usb: add sanity checking") added an endpoint sanity check to address a NULL-pointer dereference on probe. Unfortunately the check was done on the current altsetting which was later changed. Fix this by moving the sanity check to after the altsetting is changed. Fixes: 1b976fc6d684 ("media: b2c2-flexcop-usb: add sanity checking") Cc: Oliver Neukum Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/b2c2/flexcop-usb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index 427cda457af6..5104678f29b7 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -510,6 +510,9 @@ static int flexcop_usb_init(struct flexcop_usb *fc_usb) return ret; } + if (fc_usb->uintf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + switch (fc_usb->udev->speed) { case USB_SPEED_LOW: err("cannot handle USB speed because it is too slow."); @@ -543,9 +546,6 @@ static int flexcop_usb_probe(struct usb_interface *intf, struct flexcop_device *fc = NULL; int ret; - if (intf->cur_altsetting->desc.bNumEndpoints < 1) - return -ENODEV; - if ((fc = flexcop_device_kmalloc(sizeof(struct flexcop_usb))) == NULL) { err("out of memory\n"); return -ENOMEM; -- GitLab From 1ebcd216ebcc993a6f5b67dd7e35bcc8b79660b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Jan 2020 18:18:18 +0100 Subject: [PATCH 0687/1278] media: usbtv: fix control-message timeouts commit 536f561d871c5781bc33d26d415685211b94032e upstream. The driver was issuing synchronous uninterruptible control requests without using a timeout. This could lead to the driver hanging on various user requests due to a malfunctioning (or malicious) device until the device is physically disconnected. The USB upper limit of five seconds per request should be more than enough. Fixes: f3d27f34fdd7 ("[media] usbtv: Add driver for Fushicai USBTV007 video frame grabber") Fixes: c53a846c48f2 ("[media] usbtv: add video controls") Cc: stable # 3.11 Signed-off-by: Johan Hovold Acked-by: Lubomir Rintel Reviewed-by: Greg Kroah-Hartman Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbtv/usbtv-core.c | 2 +- drivers/media/usb/usbtv/usbtv-video.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c index 68df16b3ce72..50a61143898b 100644 --- a/drivers/media/usb/usbtv/usbtv-core.c +++ b/drivers/media/usb/usbtv/usbtv-core.c @@ -56,7 +56,7 @@ int usbtv_set_regs(struct usbtv *usbtv, const u16 regs[][2], int size) ret = usb_control_msg(usbtv->udev, pipe, USBTV_REQUEST_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, NULL, 0, 0); + value, index, NULL, 0, USB_CTRL_GET_TIMEOUT); if (ret < 0) return ret; } diff --git a/drivers/media/usb/usbtv/usbtv-video.c b/drivers/media/usb/usbtv/usbtv-video.c index 3668a04359e8..7c23d82313a8 100644 --- a/drivers/media/usb/usbtv/usbtv-video.c +++ b/drivers/media/usb/usbtv/usbtv-video.c @@ -720,7 +720,8 @@ static int usbtv_s_ctrl(struct v4l2_ctrl *ctrl) ret = usb_control_msg(usbtv->udev, usb_rcvctrlpipe(usbtv->udev, 0), USBTV_CONTROL_REG, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, USBTV_BASE + 0x0244, (void *)data, 3, 0); + 0, USBTV_BASE + 0x0244, (void *)data, 3, + USB_CTRL_GET_TIMEOUT); if (ret < 0) goto error; } @@ -771,7 +772,7 @@ static int usbtv_s_ctrl(struct v4l2_ctrl *ctrl) ret = usb_control_msg(usbtv->udev, usb_sndctrlpipe(usbtv->udev, 0), USBTV_CONTROL_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, index, (void *)data, size, 0); + 0, index, (void *)data, size, USB_CTRL_SET_TIMEOUT); error: if (ret < 0) -- GitLab From cf850f45cb26dde4d008f1deeb952e908d950c40 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 21 Mar 2020 13:00:11 -0500 Subject: [PATCH 0688/1278] staging: rtl8188eu: Add ASUS USB-N10 Nano B1 to device table commit 38ef48f7d4b7342f145a1b4f96023bde99aeb245 upstream. The ASUS USB-N10 Nano B1 has been reported as a new RTL8188EU device. Add it to the device tables. Signed-off-by: Larry Finger Reported-by: kovi Cc: Stable Link: https://lore.kernel.org/r/20200321180011.26153-1-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 83b5f5c63ae1..30615b8fb657 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -40,6 +40,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { /****** 8188EUS ********/ {USB_DEVICE(0x056e, 0x4008)}, /* Elecom WDC-150SU2M */ {USB_DEVICE(0x07b8, 0x8179)}, /* Abocom - Abocom */ + {USB_DEVICE(0x0B05, 0x18F0)}, /* ASUS USB-N10 Nano B1 */ {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ -- GitLab From 9775295b0fae1500ea0fa7f1fc74fdd4e1c5895b Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Wed, 25 Mar 2020 15:06:46 +0800 Subject: [PATCH 0689/1278] staging: wlan-ng: fix ODEBUG bug in prism2sta_disconnect_usb commit a1f165a6b738f0c9d744bad4af7a53909278f5fc upstream. We should cancel hw->usb_work before kfree(hw). Reported-by: syzbot+6d2e7f6fa90e27be9d62@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Cc: stable Link: https://lore.kernel.org/r/1585120006-30042-1-git-send-email-hqjagain@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/prism2usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/wlan-ng/prism2usb.c b/drivers/staging/wlan-ng/prism2usb.c index b5ba176004c1..d8d86761b790 100644 --- a/drivers/staging/wlan-ng/prism2usb.c +++ b/drivers/staging/wlan-ng/prism2usb.c @@ -180,6 +180,7 @@ static void prism2sta_disconnect_usb(struct usb_interface *interface) cancel_work_sync(&hw->link_bh); cancel_work_sync(&hw->commsqual_bh); + cancel_work_sync(&hw->usb_work); /* Now we complete any outstanding commands * and tell everyone who is waiting for their -- GitLab From a146689b6121d28830e613a1c93b8983f8848057 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Thu, 26 Mar 2020 21:18:50 +0800 Subject: [PATCH 0690/1278] staging: wlan-ng: fix use-after-free Read in hfa384x_usbin_callback commit 1165dd73e811a07d947aee218510571f516081f6 upstream. We can't handle the case length > WLAN_DATA_MAXLEN. Because the size of rxfrm->data is WLAN_DATA_MAXLEN(2312), and we can't read more than that. Thanks-to: Hillf Danton Reported-and-tested-by: syzbot+7d42d68643a35f71ac8a@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Cc: stable Link: https://lore.kernel.org/r/20200326131850.17711-1-hqjagain@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/hfa384x_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index 2a22e448a2b3..fb1a76c4c927 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -3495,6 +3495,8 @@ static void hfa384x_int_rxmonitor(struct wlandevice *wlandev, WLAN_HDR_A4_LEN + WLAN_DATA_MAXLEN + WLAN_CRC_LEN)) { pr_debug("overlen frm: len=%zd\n", skblen - sizeof(struct p80211_caphdr)); + + return; } skb = dev_alloc_skb(skblen); -- GitLab From bd6c7cd12c7ece54e6531ae7c3195ac8578933f0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 7 Mar 2020 18:38:49 -0800 Subject: [PATCH 0691/1278] libfs: fix infoleak in simple_attr_read() commit a65cab7d7f05c2061a3e2490257d3086ff3202c6 upstream. Reading from a debugfs file at a nonzero position, without first reading at position 0, leaks uninitialized memory to userspace. It's a bit tricky to do this, since lseek() and pread() aren't allowed on these files, and write() doesn't update the position on them. But writing to them with splice() *does* update the position: #define _GNU_SOURCE 1 #include #include #include int main() { int pipes[2], fd, n, i; char buf[32]; pipe(pipes); write(pipes[1], "0", 1); fd = open("/sys/kernel/debug/fault_around_bytes", O_RDWR); splice(pipes[0], NULL, fd, NULL, 1, 0); n = read(fd, buf, sizeof(buf)); for (i = 0; i < n; i++) printf("%02x", buf[i]); printf("\n"); } Output: 5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a30 Fix the infoleak by making simple_attr_read() always fill simple_attr::get_buf if it hasn't been filled yet. Reported-by: syzbot+fcab69d1ada3e8d6f06b@syzkaller.appspotmail.com Reported-by: Alexander Potapenko Fixes: acaefc25d21f ("[PATCH] libfs: add simple attribute files") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Kees Cook Link: https://lore.kernel.org/r/20200308023849.988264-1-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 49623301e5f0..cb9310b091f5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -802,7 +802,7 @@ int simple_attr_open(struct inode *inode, struct file *file, { struct simple_attr *attr; - attr = kmalloc(sizeof(*attr), GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -842,9 +842,11 @@ ssize_t simple_attr_read(struct file *file, char __user *buf, if (ret) return ret; - if (*ppos) { /* continued read */ + if (*ppos && attr->get_buf[0]) { + /* continued read */ size = strlen(attr->get_buf); - } else { /* first read */ + } else { + /* first read */ u64 val; ret = attr->get(attr->data, &val); if (ret) -- GitLab From ea7d13d02a76c76dd3a0ee7bda97bb4361473b62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Jan 2020 17:35:09 +0100 Subject: [PATCH 0692/1278] media: ov519: add missing endpoint sanity checks commit 998912346c0da53a6dbb71fab3a138586b596b30 upstream. Make sure to check that we have at least one endpoint before accessing the endpoint array to avoid dereferencing a NULL-pointer on stream start. Note that these sanity checks are not redundant as the driver is mixing looking up altsettings by index and by number, which need not coincide. Fixes: 1876bb923c98 ("V4L/DVB (12079): gspca_ov519: add support for the ov511 bridge") Fixes: b282d87332f5 ("V4L/DVB (12080): gspca_ov519: Fix ov518+ with OV7620AE (Trust spacecam 320)") Cc: stable # 2.6.31 Cc: Hans de Goede Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/ov519.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/media/usb/gspca/ov519.c b/drivers/media/usb/gspca/ov519.c index 8106a47a0dd0..b51d2de1aca8 100644 --- a/drivers/media/usb/gspca/ov519.c +++ b/drivers/media/usb/gspca/ov519.c @@ -3478,6 +3478,11 @@ static void ov511_mode_init_regs(struct sd *sd) return; } + if (alt->desc.bNumEndpoints < 1) { + sd->gspca_dev.usb_err = -ENODEV; + return; + } + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); reg_w(sd, R51x_FIFO_PSIZE, packet_size >> 5); @@ -3604,6 +3609,11 @@ static void ov518_mode_init_regs(struct sd *sd) return; } + if (alt->desc.bNumEndpoints < 1) { + sd->gspca_dev.usb_err = -ENODEV; + return; + } + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); ov518_reg_w32(sd, R51x_FIFO_PSIZE, packet_size & ~7, 2); -- GitLab From d3af052c88a7d1415ec3eb6e816e4b8576dd2475 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Jan 2020 17:35:12 +0100 Subject: [PATCH 0693/1278] media: dib0700: fix rc endpoint lookup commit f52981019ad8d6718de79b425a574c6bddf81f7c upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid submitting an URB to an invalid endpoint. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: c4018fa2e4c0 ("[media] dib0700: fix RC support on Hauppauge Nova-TD") Cc: stable # 3.16 Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dib0700_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 1ee7ec558293..33dd54c8fa04 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -821,7 +821,7 @@ int dib0700_rc_setup(struct dvb_usb_device *d, struct usb_interface *intf) /* Starting in firmware 1.20, the RC info is provided on a bulk pipe */ - if (intf->altsetting[0].desc.bNumEndpoints < rc_ep + 1) + if (intf->cur_altsetting->desc.bNumEndpoints < rc_ep + 1) return -ENODEV; purb = usb_alloc_urb(0, GFP_KERNEL); @@ -841,7 +841,7 @@ int dib0700_rc_setup(struct dvb_usb_device *d, struct usb_interface *intf) * Some devices like the Hauppauge NovaTD model 52009 use an interrupt * endpoint, while others use a bulk one. */ - e = &intf->altsetting[0].endpoint[rc_ep].desc; + e = &intf->cur_altsetting->endpoint[rc_ep].desc; if (usb_endpoint_dir_in(e)) { if (usb_endpoint_xfer_bulk(e)) { pipe = usb_rcvbulkpipe(d->udev, rc_ep); -- GitLab From b4ac6fbe5b1a3cb83d84cb9f7ab8df71d104f535 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Jan 2020 17:35:10 +0100 Subject: [PATCH 0694/1278] media: stv06xx: add missing descriptor sanity checks commit 485b06aadb933190f4bc44e006076bc27a23f205 upstream. Make sure to check that we have two alternate settings and at least one endpoint before accessing the second altsetting structure and dereferencing the endpoint arrays. This specifically avoids dereferencing NULL-pointers or corrupting memory when a device does not have the expected descriptors. Note that the sanity checks in stv06xx_start() and pb0100_start() are not redundant as the driver is mixing looking up altsettings by index and by number, which may not coincide. Fixes: 8668d504d72c ("V4L/DVB (12082): gspca_stv06xx: Add support for st6422 bridge and sensor") Fixes: c0b33bdc5b8d ("[media] gspca-stv06xx: support bandwidth changing") Cc: stable # 2.6.31 Cc: Hans de Goede Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/stv06xx/stv06xx.c | 19 ++++++++++++++++++- .../media/usb/gspca/stv06xx/stv06xx_pb0100.c | 4 ++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/gspca/stv06xx/stv06xx.c b/drivers/media/usb/gspca/stv06xx/stv06xx.c index e72c3e1ab9ff..9caa5ef9d9e0 100644 --- a/drivers/media/usb/gspca/stv06xx/stv06xx.c +++ b/drivers/media/usb/gspca/stv06xx/stv06xx.c @@ -289,6 +289,9 @@ static int stv06xx_start(struct gspca_dev *gspca_dev) return -EIO; } + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); err = stv06xx_write_bridge(sd, STV_ISO_SIZE_L, packet_size); if (err < 0) @@ -313,11 +316,21 @@ static int stv06xx_start(struct gspca_dev *gspca_dev) static int stv06xx_isoc_init(struct gspca_dev *gspca_dev) { + struct usb_interface_cache *intfc; struct usb_host_interface *alt; struct sd *sd = (struct sd *) gspca_dev; + intfc = gspca_dev->dev->actconfig->intf_cache[0]; + + if (intfc->num_altsetting < 2) + return -ENODEV; + + alt = &intfc->altsetting[1]; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + /* Start isoc bandwidth "negotiation" at max isoc bandwidth */ - alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; alt->endpoint[0].desc.wMaxPacketSize = cpu_to_le16(sd->sensor->max_packet_size[gspca_dev->curr_mode]); @@ -330,6 +343,10 @@ static int stv06xx_isoc_nego(struct gspca_dev *gspca_dev) struct usb_host_interface *alt; struct sd *sd = (struct sd *) gspca_dev; + /* + * Existence of altsetting and endpoint was verified in + * stv06xx_isoc_init() + */ alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); min_packet_size = sd->sensor->min_packet_size[gspca_dev->curr_mode]; diff --git a/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c b/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c index e1ce96e9405f..8d855b2756ba 100644 --- a/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c +++ b/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c @@ -194,6 +194,10 @@ static int pb0100_start(struct sd *sd) alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt); if (!alt) return -ENODEV; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); /* If we don't have enough bandwidth use a lower framerate */ -- GitLab From e251c592321cb82e5f31b06542b2f1f6e81cb2b8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Jan 2020 17:35:11 +0100 Subject: [PATCH 0695/1278] media: xirlink_cit: add missing descriptor sanity checks commit a246b4d547708f33ff4d4b9a7a5dbac741dc89d8 upstream. Make sure to check that we have two alternate settings and at least one endpoint before accessing the second altsetting structure and dereferencing the endpoint arrays. This specifically avoids dereferencing NULL-pointers or corrupting memory when a device does not have the expected descriptors. Note that the sanity check in cit_get_packet_size() is not redundant as the driver is mixing looking up altsettings by index and by number, which may not coincide. Fixes: 659fefa0eb17 ("V4L/DVB: gspca_xirlink_cit: Add support for camera with a bcd version of 0.01") Fixes: 59f8b0bf3c12 ("V4L/DVB: gspca_xirlink_cit: support bandwidth changing for devices with 1 alt setting") Cc: stable # 2.6.37 Cc: Hans de Goede Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/xirlink_cit.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/gspca/xirlink_cit.c b/drivers/media/usb/gspca/xirlink_cit.c index 68656e7986c7..765a5d03e7cc 100644 --- a/drivers/media/usb/gspca/xirlink_cit.c +++ b/drivers/media/usb/gspca/xirlink_cit.c @@ -1451,6 +1451,9 @@ static int cit_get_packet_size(struct gspca_dev *gspca_dev) return -EIO; } + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + return le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); } @@ -2634,6 +2637,7 @@ static int sd_start(struct gspca_dev *gspca_dev) static int sd_isoc_init(struct gspca_dev *gspca_dev) { + struct usb_interface_cache *intfc; struct usb_host_interface *alt; int max_packet_size; @@ -2649,8 +2653,17 @@ static int sd_isoc_init(struct gspca_dev *gspca_dev) break; } + intfc = gspca_dev->dev->actconfig->intf_cache[0]; + + if (intfc->num_altsetting < 2) + return -ENODEV; + + alt = &intfc->altsetting[1]; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + /* Start isoc bandwidth "negotiation" at max isoc bandwidth */ - alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; alt->endpoint[0].desc.wMaxPacketSize = cpu_to_le16(max_packet_size); return 0; @@ -2673,6 +2686,9 @@ static int sd_isoc_nego(struct gspca_dev *gspca_dev) break; } + /* + * Existence of altsetting and endpoint was verified in sd_isoc_init() + */ alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); if (packet_size <= min_packet_size) -- GitLab From 07dc42ff9b9c38eae221b36acda7134ab8670af8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 26 Mar 2020 15:51:34 +0100 Subject: [PATCH 0696/1278] mac80211: Check port authorization in the ieee80211_tx_dequeue() case commit ce2e1ca703071723ca2dd94d492a5ab6d15050da upstream. mac80211 used to check port authorization in the Data frame enqueue case when going through start_xmit(). However, that authorization status may change while the frame is waiting in a queue. Add a similar check in the dequeue case to avoid sending previously accepted frames after authorization change. This provides additional protection against potential leaking of frames after a station has been disconnected and the keys for it are being removed. Cc: stable@vger.kernel.org Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200326155133.ced84317ea29.I34d4c47cd8cc8a4042b38a76f16a601fbcbfd9b3@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 09c7aa519ca8..2feec8e2d11e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3451,8 +3451,25 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, tx.skb = skb; tx.sdata = vif_to_sdata(info->control.vif); - if (txq->sta) + if (txq->sta) { tx.sta = container_of(txq->sta, struct sta_info, sta); + /* + * Drop unicast frames to unauthorised stations unless they are + * EAPOL frames from the local station. + */ + if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) && + tx.sdata->vif.type != NL80211_IFTYPE_OCB && + !is_multicast_ether_addr(hdr->addr1) && + !test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) && + (!(info->control.flags & + IEEE80211_TX_CTRL_PORT_CTRL_PROTO) || + !ether_addr_equal(tx.sdata->vif.addr, + hdr->addr2)))) { + I802_DEBUG_INC(local->tx_handlers_drop_unauth_port); + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + } /* * The key can be removed while the packet was queued, so need to call -- GitLab From 8ad73f9e86bdb079043868e3543d302b57068b80 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 29 Mar 2020 22:50:06 +0200 Subject: [PATCH 0697/1278] mac80211: fix authentication with iwlwifi/mvm commit be8c827f50a0bcd56361b31ada11dc0a3c2fd240 upstream. The original patch didn't copy the ieee80211_is_data() condition because on most drivers the management frames don't go through this path. However, they do on iwlwifi/mvm, so we do need to keep the condition here. Cc: stable@vger.kernel.org Fixes: ce2e1ca70307 ("mac80211: Check port authorization in the ieee80211_tx_dequeue() case") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Cc: Woody Suwalski Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2feec8e2d11e..513d071ccac7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3457,7 +3457,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * Drop unicast frames to unauthorised stations unless they are * EAPOL frames from the local station. */ - if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) && + if (unlikely(ieee80211_is_data(hdr->frame_control) && + !ieee80211_vif_is_mesh(&tx.sdata->vif) && tx.sdata->vif.type != NL80211_IFTYPE_OCB && !is_multicast_ether_addr(hdr->addr1) && !test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) && -- GitLab From 56676fb5e9e61aa216759cf05fb55edd45a67c14 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 19 Feb 2020 08:39:43 +0100 Subject: [PATCH 0698/1278] vt: selection, introduce vc_is_sel commit dce05aa6eec977f1472abed95ccd71276b9a3864 upstream. Avoid global variables (namely sel_cons) by introducing vc_is_sel. It checks whether the parameter is the current selection console. This will help putting sel_cons to a struct later. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200219073951.16151-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 5 +++++ drivers/tty/vt/vt.c | 7 ++++--- drivers/tty/vt/vt_ioctl.c | 2 +- include/linux/selection.h | 4 +++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 91ffe3f2b8a0..8687b17f6cf0 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -83,6 +83,11 @@ void clear_selection(void) } } +bool vc_is_sel(struct vc_data *vc) +{ + return vc == sel_cons; +} + /* * User settable table: what characters are to be considered alphabetic? * 128 bits. Locked by the console lock. diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 826433af4bdd..1cbe89e5338c 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -584,8 +584,9 @@ static void hide_softcursor(struct vc_data *vc) static void hide_cursor(struct vc_data *vc) { - if (vc == sel_cons) + if (vc_is_sel(vc)) clear_selection(); + vc->vc_sw->con_cursor(vc, CM_ERASE); hide_softcursor(vc); } @@ -595,7 +596,7 @@ static void set_cursor(struct vc_data *vc) if (!con_is_fg(vc) || console_blanked || vc->vc_mode == KD_GRAPHICS) return; if (vc->vc_deccm) { - if (vc == sel_cons) + if (vc_is_sel(vc)) clear_selection(); add_softcursor(vc); if ((vc->vc_cursor_type & 0x0f) != 1) @@ -873,7 +874,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (!newscreen) return -ENOMEM; - if (vc == sel_cons) + if (vc_is_sel(vc)) clear_selection(); old_rows = vc->vc_rows; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index c320fefab360..b46b76f80f22 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -43,7 +43,7 @@ char vt_dont_switch; extern struct tty_driver *console_driver; #define VT_IS_IN_USE(i) (console_driver->ttys[i] && console_driver->ttys[i]->count) -#define VT_BUSY(i) (VT_IS_IN_USE(i) || i == fg_console || vc_cons[i].d == sel_cons) +#define VT_BUSY(i) (VT_IS_IN_USE(i) || i == fg_console || vc_is_sel(vc_cons[i].d)) /* * Console (vt and kd) routines, as defined by USL SVR4 manual, and by diff --git a/include/linux/selection.h b/include/linux/selection.h index 5b278ce99d8d..35937a61da06 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -13,8 +13,8 @@ struct tty_struct; -extern struct vc_data *sel_cons; struct tty_struct; +struct vc_data; extern void clear_selection(void); extern int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty); @@ -23,6 +23,8 @@ extern int sel_loadlut(char __user *p); extern int mouse_reporting(void); extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); +bool vc_is_sel(struct vc_data *vc); + extern int console_blanked; extern const unsigned char color_table[]; -- GitLab From 8d1c9fea39a3a55af0735a4e304d23df7f02dde1 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 19 Feb 2020 08:39:44 +0100 Subject: [PATCH 0699/1278] vt: ioctl, switch VT_IS_IN_USE and VT_BUSY to inlines commit e587e8f17433ddb26954f0edf5b2f95c42155ae9 upstream. These two were macros. Switch them to static inlines, so that it's more understandable what they are doing. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200219073951.16151-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index b46b76f80f22..9391a2b7ede7 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -40,10 +40,25 @@ #include char vt_dont_switch; -extern struct tty_driver *console_driver; -#define VT_IS_IN_USE(i) (console_driver->ttys[i] && console_driver->ttys[i]->count) -#define VT_BUSY(i) (VT_IS_IN_USE(i) || i == fg_console || vc_is_sel(vc_cons[i].d)) +static inline bool vt_in_use(unsigned int i) +{ + extern struct tty_driver *console_driver; + + return console_driver->ttys[i] && console_driver->ttys[i]->count; +} + +static inline bool vt_busy(int i) +{ + if (vt_in_use(i)) + return true; + if (i == fg_console) + return true; + if (vc_is_sel(vc_cons[i].d)) + return true; + + return false; +} /* * Console (vt and kd) routines, as defined by USL SVR4 manual, and by @@ -289,7 +304,7 @@ static int vt_disallocate(unsigned int vc_num) int ret = 0; console_lock(); - if (VT_BUSY(vc_num)) + if (vt_busy(vc_num)) ret = -EBUSY; else if (vc_num) vc = vc_deallocate(vc_num); @@ -311,7 +326,7 @@ static void vt_disallocate_all(void) console_lock(); for (i = 1; i < MAX_NR_CONSOLES; i++) - if (!VT_BUSY(i)) + if (!vt_busy(i)) vc[i] = vc_deallocate(i); else vc[i] = NULL; @@ -648,7 +663,7 @@ int vt_ioctl(struct tty_struct *tty, state = 1; /* /dev/tty0 is always open */ for (i = 0, mask = 2; i < MAX_NR_CONSOLES && mask; ++i, mask <<= 1) - if (VT_IS_IN_USE(i)) + if (vt_in_use(i)) state |= mask; ret = put_user(state, &vtstat->v_state); } @@ -661,7 +676,7 @@ int vt_ioctl(struct tty_struct *tty, case VT_OPENQRY: /* FIXME: locking ? - but then this is a stupid API */ for (i = 0; i < MAX_NR_CONSOLES; ++i) - if (! VT_IS_IN_USE(i)) + if (!vt_in_use(i)) break; uival = i < MAX_NR_CONSOLES ? (i+1) : -1; goto setint; -- GitLab From 56a5db17b2985e01e0fa425b119bb7586c0ece28 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 19 Feb 2020 08:39:48 +0100 Subject: [PATCH 0700/1278] vt: switch vt_dont_switch to bool commit f400991bf872debffb01c46da882dc97d7e3248e upstream. vt_dont_switch is pure boolean, no need for whole char. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200219073951.16151-6-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 6 +++--- include/linux/vt_kern.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 9391a2b7ede7..09c2c91b1f80 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -39,7 +39,7 @@ #include #include -char vt_dont_switch; +bool vt_dont_switch; static inline bool vt_in_use(unsigned int i) { @@ -1026,12 +1026,12 @@ int vt_ioctl(struct tty_struct *tty, case VT_LOCKSWITCH: if (!capable(CAP_SYS_TTY_CONFIG)) return -EPERM; - vt_dont_switch = 1; + vt_dont_switch = true; break; case VT_UNLOCKSWITCH: if (!capable(CAP_SYS_TTY_CONFIG)) return -EPERM; - vt_dont_switch = 0; + vt_dont_switch = false; break; case VT_GETHIFONTMASK: ret = put_user(vc->vc_hi_font_mask, diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 3fd07912909c..a3de234d3350 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -142,7 +142,7 @@ static inline bool vt_force_oops_output(struct vc_data *vc) return false; } -extern char vt_dont_switch; +extern bool vt_dont_switch; extern int default_utf8; extern int global_cursor_default; -- GitLab From ac7136b9f15740d5f17a017a5febdf875239a3ea Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Feb 2020 00:03:26 -0800 Subject: [PATCH 0701/1278] vt: vt_ioctl: remove unnecessary console allocation checks commit 1aa6e058dd6cd04471b1f21298270014daf48ac9 upstream. The vc_cons_allocated() checks in vt_ioctl() and vt_compat_ioctl() are unnecessary because they can only be reached by calling ioctl() on an open tty, which implies the corresponding virtual console is allocated. And even if the virtual console *could* be freed concurrently, then these checks would be broken since they aren't done under console_lock, and the vc_data is dereferenced before them anyway. So, remove these unneeded checks to avoid confusion. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200224080326.295046-1-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 09c2c91b1f80..476745cb806c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -350,22 +350,13 @@ int vt_ioctl(struct tty_struct *tty, { struct vc_data *vc = tty->driver_data; struct console_font_op op; /* used in multiple places here */ - unsigned int console; + unsigned int console = vc->vc_num; unsigned char ucval; unsigned int uival; void __user *up = (void __user *)arg; int i, perm; int ret = 0; - console = vc->vc_num; - - - if (!vc_cons_allocated(console)) { /* impossible? */ - ret = -ENOIOCTLCMD; - goto out; - } - - /* * To have permissions to do most of the vt ioctls, we either have * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG. @@ -1195,18 +1186,10 @@ long vt_compat_ioctl(struct tty_struct *tty, { struct vc_data *vc = tty->driver_data; struct console_font_op op; /* used in multiple places here */ - unsigned int console; void __user *up = (void __user *)arg; int perm; int ret = 0; - console = vc->vc_num; - - if (!vc_cons_allocated(console)) { /* impossible? */ - ret = -ENOIOCTLCMD; - goto out; - } - /* * To have permissions to do most of the vt ioctls, we either have * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG. @@ -1266,7 +1249,7 @@ long vt_compat_ioctl(struct tty_struct *tty, arg = (unsigned long)compat_ptr(arg); goto fallback; } -out: + return ret; fallback: -- GitLab From b9eb60a0ef3971101c94f9cddb09708c2f900b35 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 21 Mar 2020 20:43:04 -0700 Subject: [PATCH 0702/1278] vt: vt_ioctl: fix VT_DISALLOCATE freeing in-use virtual console commit ca4463bf8438b403596edd0ec961ca0d4fbe0220 upstream. The VT_DISALLOCATE ioctl can free a virtual console while tty_release() is still running, causing a use-after-free in con_shutdown(). This occurs because VT_DISALLOCATE considers a virtual console's 'struct vc_data' to be unused as soon as the corresponding tty's refcount hits 0. But actually it may be still being closed. Fix this by making vc_data be reference-counted via the embedded 'struct tty_port'. A newly allocated virtual console has refcount 1. Opening it for the first time increments the refcount to 2. Closing it for the last time decrements the refcount (in tty_operations::cleanup() so that it happens late enough), as does VT_DISALLOCATE. Reproducer: #include #include #include #include int main() { if (fork()) { for (;;) close(open("/dev/tty5", O_RDWR)); } else { int fd = open("/dev/tty10", O_RDWR); for (;;) ioctl(fd, VT_DISALLOCATE, 5); } } KASAN report: BUG: KASAN: use-after-free in con_shutdown+0x76/0x80 drivers/tty/vt/vt.c:3278 Write of size 8 at addr ffff88806a4ec108 by task syz_vt/129 CPU: 0 PID: 129 Comm: syz_vt Not tainted 5.6.0-rc2 #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20191223_100556-anatol 04/01/2014 Call Trace: [...] con_shutdown+0x76/0x80 drivers/tty/vt/vt.c:3278 release_tty+0xa8/0x410 drivers/tty/tty_io.c:1514 tty_release_struct+0x34/0x50 drivers/tty/tty_io.c:1629 tty_release+0x984/0xed0 drivers/tty/tty_io.c:1789 [...] Allocated by task 129: [...] kzalloc include/linux/slab.h:669 [inline] vc_allocate drivers/tty/vt/vt.c:1085 [inline] vc_allocate+0x1ac/0x680 drivers/tty/vt/vt.c:1066 con_install+0x4d/0x3f0 drivers/tty/vt/vt.c:3229 tty_driver_install_tty drivers/tty/tty_io.c:1228 [inline] tty_init_dev+0x94/0x350 drivers/tty/tty_io.c:1341 tty_open_by_driver drivers/tty/tty_io.c:1987 [inline] tty_open+0x3ca/0xb30 drivers/tty/tty_io.c:2035 [...] Freed by task 130: [...] kfree+0xbf/0x1e0 mm/slab.c:3757 vt_disallocate drivers/tty/vt/vt_ioctl.c:300 [inline] vt_ioctl+0x16dc/0x1e30 drivers/tty/vt/vt_ioctl.c:818 tty_ioctl+0x9db/0x11b0 drivers/tty/tty_io.c:2660 [...] Fixes: 4001d7b7fc27 ("vt: push down the tty lock so we can see what is left to tackle") Cc: # v3.4+ Reported-by: syzbot+522643ab5729b0421998@syzkaller.appspotmail.com Acked-by: Jiri Slaby Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200322034305.210082-2-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 23 ++++++++++++++++++++++- drivers/tty/vt/vt_ioctl.c | 12 ++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 1cbe89e5338c..46defa3be9a4 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -751,6 +751,17 @@ static void visual_init(struct vc_data *vc, int num, int init) vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row; } +static void vc_port_destruct(struct tty_port *port) +{ + struct vc_data *vc = container_of(port, struct vc_data, port); + + kfree(vc); +} + +static const struct tty_port_operations vc_port_ops = { + .destruct = vc_port_destruct, +}; + int vc_allocate(unsigned int currcons) /* return 0 on success */ { struct vt_notifier_param param; @@ -776,6 +787,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */ vc_cons[currcons].d = vc; tty_port_init(&vc->port); + vc->port.ops = &vc_port_ops; INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); visual_init(vc, currcons, 1); @@ -2895,6 +2907,7 @@ static int con_install(struct tty_driver *driver, struct tty_struct *tty) tty->driver_data = vc; vc->port.tty = tty; + tty_port_get(&vc->port); if (!tty->winsize.ws_row && !tty->winsize.ws_col) { tty->winsize.ws_row = vc_cons[currcons].d->vc_rows; @@ -2930,6 +2943,13 @@ static void con_shutdown(struct tty_struct *tty) console_unlock(); } +static void con_cleanup(struct tty_struct *tty) +{ + struct vc_data *vc = tty->driver_data; + + tty_port_put(&vc->port); +} + static int default_color = 7; /* white */ static int default_italic_color = 2; // green (ASCII) static int default_underline_color = 3; // cyan (ASCII) @@ -3054,7 +3074,8 @@ static const struct tty_operations con_ops = { .throttle = con_throttle, .unthrottle = con_unthrottle, .resize = vt_resize, - .shutdown = con_shutdown + .shutdown = con_shutdown, + .cleanup = con_cleanup, }; static struct cdev vc0_cdev; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 476745cb806c..a8d46a5bf6c3 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -310,10 +310,8 @@ static int vt_disallocate(unsigned int vc_num) vc = vc_deallocate(vc_num); console_unlock(); - if (vc && vc_num >= MIN_NR_CONSOLES) { - tty_port_destroy(&vc->port); - kfree(vc); - } + if (vc && vc_num >= MIN_NR_CONSOLES) + tty_port_put(&vc->port); return ret; } @@ -333,10 +331,8 @@ static void vt_disallocate_all(void) console_unlock(); for (i = 1; i < MAX_NR_CONSOLES; i++) { - if (vc[i] && i >= MIN_NR_CONSOLES) { - tty_port_destroy(&vc[i]->port); - kfree(vc[i]); - } + if (vc[i] && i >= MIN_NR_CONSOLES) + tty_port_put(&vc[i]->port); } } -- GitLab From ebba196c3959d7d00476bba4efae3483dd8fe0ba Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 21 Mar 2020 20:43:05 -0700 Subject: [PATCH 0703/1278] vt: vt_ioctl: fix use-after-free in vt_in_use() commit 7cf64b18b0b96e751178b8d0505d8466ff5a448f upstream. vt_in_use() dereferences console_driver->ttys[i] without proper locking. This is broken because the tty can be closed and freed concurrently. We could fix this by using 'READ_ONCE(console_driver->ttys[i]) != NULL' and skipping the check of tty_struct::count. But, looking at console_driver->ttys[i] isn't really appropriate anyway because even if it is NULL the tty can still be in the process of being closed. Instead, fix it by making vt_in_use() require console_lock() and check whether the vt is allocated and has port refcount > 1. This works since following the patch "vt: vt_ioctl: fix VT_DISALLOCATE freeing in-use virtual console" the port refcount is incremented while the vt is open. Reproducer (very unreliable, but it worked for me after a few minutes): #include #include int main() { int fd, nproc; struct vt_stat state; char ttyname[16]; fd = open("/dev/tty10", O_RDONLY); for (nproc = 1; nproc < 8; nproc *= 2) fork(); for (;;) { sprintf(ttyname, "/dev/tty%d", rand() % 8); close(open(ttyname, O_RDONLY)); ioctl(fd, VT_GETSTATE, &state); } } KASAN report: BUG: KASAN: use-after-free in vt_in_use drivers/tty/vt/vt_ioctl.c:48 [inline] BUG: KASAN: use-after-free in vt_ioctl+0x1ad3/0x1d70 drivers/tty/vt/vt_ioctl.c:657 Read of size 4 at addr ffff888065722468 by task syz-vt2/132 CPU: 0 PID: 132 Comm: syz-vt2 Not tainted 5.6.0-rc5-00130-g089b6d3654916 #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20191223_100556-anatol 04/01/2014 Call Trace: [...] vt_in_use drivers/tty/vt/vt_ioctl.c:48 [inline] vt_ioctl+0x1ad3/0x1d70 drivers/tty/vt/vt_ioctl.c:657 tty_ioctl+0x9db/0x11b0 drivers/tty/tty_io.c:2660 [...] Allocated by task 136: [...] kzalloc include/linux/slab.h:669 [inline] alloc_tty_struct+0x96/0x8a0 drivers/tty/tty_io.c:2982 tty_init_dev+0x23/0x350 drivers/tty/tty_io.c:1334 tty_open_by_driver drivers/tty/tty_io.c:1987 [inline] tty_open+0x3ca/0xb30 drivers/tty/tty_io.c:2035 [...] Freed by task 41: [...] kfree+0xbf/0x200 mm/slab.c:3757 free_tty_struct+0x8d/0xb0 drivers/tty/tty_io.c:177 release_one_tty+0x22d/0x2f0 drivers/tty/tty_io.c:1468 process_one_work+0x7f1/0x14b0 kernel/workqueue.c:2264 worker_thread+0x8b/0xc80 kernel/workqueue.c:2410 [...] Fixes: 4001d7b7fc27 ("vt: push down the tty lock so we can see what is left to tackle") Cc: # v3.4+ Acked-by: Jiri Slaby Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200322034305.210082-3-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index a8d46a5bf6c3..699ad55e3ec6 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -43,9 +43,15 @@ bool vt_dont_switch; static inline bool vt_in_use(unsigned int i) { - extern struct tty_driver *console_driver; + const struct vc_data *vc = vc_cons[i].d; - return console_driver->ttys[i] && console_driver->ttys[i]->count; + /* + * console_lock must be held to prevent the vc from being deallocated + * while we're checking whether it's in-use. + */ + WARN_CONSOLE_UNLOCKED(); + + return vc && kref_read(&vc->port.kref) > 1; } static inline bool vt_busy(int i) @@ -643,15 +649,16 @@ int vt_ioctl(struct tty_struct *tty, struct vt_stat __user *vtstat = up; unsigned short state, mask; - /* Review: FIXME: Console lock ? */ if (put_user(fg_console + 1, &vtstat->v_active)) ret = -EFAULT; else { state = 1; /* /dev/tty0 is always open */ + console_lock(); /* required by vt_in_use() */ for (i = 0, mask = 2; i < MAX_NR_CONSOLES && mask; ++i, mask <<= 1) if (vt_in_use(i)) state |= mask; + console_unlock(); ret = put_user(state, &vtstat->v_state); } break; @@ -661,10 +668,11 @@ int vt_ioctl(struct tty_struct *tty, * Returns the first available (non-opened) console. */ case VT_OPENQRY: - /* FIXME: locking ? - but then this is a stupid API */ + console_lock(); /* required by vt_in_use() */ for (i = 0; i < MAX_NR_CONSOLES; ++i) if (!vt_in_use(i)) break; + console_unlock(); uival = i < MAX_NR_CONSOLES ? (i+1) : -1; goto setint; -- GitLab From f1a1c911032ddd270ebd7357e6d3fa425957110f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Georg=20M=C3=BCller?= Date: Mon, 3 Feb 2020 21:11:06 +0100 Subject: [PATCH 0704/1278] platform/x86: pmc_atom: Add Lex 2I385SW to critclk_systems DMI table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 95b31e35239e5e1689e3d965d692a313c71bd8ab upstream. The Lex 2I385SW board has two Intel I211 ethernet controllers. Without this patch, only the first port is usable. The second port fails to start with the following message: igb: probe of 0000:02:00.0 failed with error -2 Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Tested-by: Georg Müller Signed-off-by: Georg Müller Reviewed-by: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/pmc_atom.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 74997194fd88..92205b90c25c 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -443,6 +443,14 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"), }, }, + { + /* pmc_plt_clk* - are used for ethernet controllers */ + .ident = "Lex 2I385SW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lex BayTrail"), + DMI_MATCH(DMI_PRODUCT_NAME, "2I385SW"), + }, + }, { /* pmc_plt_clk* - are used for ethernet controllers */ .ident = "Beckhoff CB3163", -- GitLab From ba1ebf3aef04922bfbe549bb5254765379d62f77 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Mar 2020 10:48:13 +0100 Subject: [PATCH 0705/1278] bpf: Explicitly memset the bpf_attr structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8096f229421f7b22433775e928d506f0342e5907 upstream. For the bpf syscall, we are relying on the compiler to properly zero out the bpf_attr union that we copy userspace data into. Unfortunately that doesn't always work properly, padding and other oddities might not be correctly zeroed, and in some tests odd things have been found when the stack is pre-initialized to other values. Fix this by explicitly memsetting the structure to 0 before using it. Reported-by: Maciej Żenczykowski Reported-by: John Stultz Reported-by: Alexander Potapenko Reported-by: Alistair Delva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://android-review.googlesource.com/c/kernel/common/+/1235490 Link: https://lore.kernel.org/bpf/20200320094813.GA421650@kroah.com Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f5c1d5479ba3..8081e233a564 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1474,7 +1474,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { - union bpf_attr attr = {}; + union bpf_attr attr; int err; if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) @@ -1486,6 +1486,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz size = min_t(u32, size, sizeof(attr)); /* copy attributes from user space, may be less than sizeof(bpf_attr) */ + memset(&attr, 0, sizeof(attr)); if (copy_from_user(&attr, uattr, size) != 0) return -EFAULT; -- GitLab From 7855a721d9db4a2c0a64758ac08e73aa627fcecd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Mar 2020 17:22:58 +0100 Subject: [PATCH 0706/1278] bpf: Explicitly memset some bpf info structures declared on the stack commit 5c6f25887963f15492b604dd25cb149c501bbabf upstream. Trying to initialize a structure with "= {};" will not always clean out all padding locations in a structure. So be explicit and call memset to initialize everything for a number of bpf information structures that are then copied from userspace, sometimes from smaller memory locations than the size of the structure. Reported-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200320162258.GA794295@kroah.com Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8081e233a564..21073682061d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1364,7 +1364,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, union bpf_attr __user *uattr) { struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_prog_info info = {}; + struct bpf_prog_info info; u32 info_len = attr->info.info_len; char __user *uinsns; u32 ulen; @@ -1375,6 +1375,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return err; info_len = min_t(u32, sizeof(info), info_len); + memset(&info, 0, sizeof(info)); if (copy_from_user(&info, uinfo, info_len)) return -EFAULT; @@ -1420,7 +1421,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, union bpf_attr __user *uattr) { struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_map_info info = {}; + struct bpf_map_info info; u32 info_len = attr->info.info_len; int err; @@ -1429,6 +1430,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, return err; info_len = min_t(u32, sizeof(info), info_len); + memset(&info, 0, sizeof(info)); info.type = map->map_type; info.id = map->id; info.key_size = map->key_size; -- GitLab From f36ca3a8c3a808178181c9b1556a73114997249d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:25 +0100 Subject: [PATCH 0707/1278] gpiolib: acpi: Add quirk to ignore EC wakeups on HP x2 10 CHT + AXP288 model commit 0c625ccfe6f754d0896b8881f5c85bcb81699f1f upstream. There are at least 3 models of the HP x2 10 models: Bay Trail SoC + AXP288 PMIC Cherry Trail SoC + AXP288 PMIC Cherry Trail SoC + TI PMIC Like on the other HP x2 10 models we need to ignore wakeup for ACPI GPIO events on the external embedded-controller pin to avoid spurious wakeups on the HP x2 10 CHT + AXP288 model too. This commit adds an extra DMI based quirk for the HP x2 10 CHT + AXP288 model, ignoring wakeups for ACPI GPIO events on the EC interrupt pin on this model. This fixes spurious wakeups from suspend on this model. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Reported-and-tested-by: Marc Lehmann Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-4-hdegoede@redhat.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 319d8b07e4e9..7c06f4541c5d 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1440,6 +1440,21 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { .ignore_wake = "INT33FC:02@28", }, }, + { + /* + * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FF:01 pin 0, causing spurious wakeups. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_BOARD_NAME, "813E"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FF:01@0", + }, + }, {} /* Terminating entry */ }; -- GitLab From 14f307ec369c074208f51bb5e4e45d83bf415506 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 25 Mar 2020 15:25:47 +0100 Subject: [PATCH 0708/1278] net: ks8851-ml: Fix IO operations, again commit 8262e6f9b1034ede34548a04dec4c302d92c9497 upstream. This patch reverts 58292104832f ("net: ks8851-ml: Fix 16-bit IO operation") and edacb098ea9c ("net: ks8851-ml: Fix 16-bit data access"), because it turns out these were only necessary due to buggy hardware. This patch adds a check for such a buggy hardware to prevent any such mistakes again. While working further on the KS8851 driver, it came to light that the KS8851-16MLL is capable of switching bus endianness by a hardware strap, EESK pin. If this strap is incorrect, the IO accesses require such endian swapping as is being reverted by this patch. Such swapping also impacts the performance significantly. Hence, in addition to removing it, detect that the hardware is broken, report to user, and fail to bind with such hardware. Fixes: 58292104832f ("net: ks8851-ml: Fix 16-bit IO operation") Fixes: edacb098ea9c ("net: ks8851-ml: Fix 16-bit data access") Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/micrel/ks8851_mll.c | 56 ++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index c699a779757e..1b0e1fc7825f 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -474,6 +474,50 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ +/** + * ks_check_endian - Check whether endianness of the bus is correct + * @ks : The chip information + * + * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit + * bus. To maintain optimum performance, the bus endianness should be set + * such that it matches the endianness of the CPU. + */ + +static int ks_check_endian(struct ks_net *ks) +{ + u16 cider; + + /* + * Read CIDER register first, however read it the "wrong" way around. + * If the endian strap on the KS8851-16MLL in incorrect and the chip + * is operating in different endianness than the CPU, then the meaning + * of BE[3:0] byte-enable bits is also swapped such that: + * BE[3,2,1,0] becomes BE[1,0,3,2] + * + * Luckily for us, the byte-enable bits are the top four MSbits of + * the address register and the CIDER register is at offset 0xc0. + * Hence, by reading address 0xc0c0, which is not impacted by endian + * swapping, we assert either BE[3:2] or BE[1:0] while reading the + * CIDER register. + * + * If the bus configuration is correct, reading 0xc0c0 asserts + * BE[3:2] and this read returns 0x0000, because to read register + * with bottom two LSbits of address set to 0, BE[1:0] must be + * asserted. + * + * If the bus configuration is NOT correct, reading 0xc0c0 asserts + * BE[1:0] and this read returns non-zero 0x8872 value. + */ + iowrite16(BE3 | BE2 | KS_CIDER, ks->hw_addr_cmd); + cider = ioread16(ks->hw_addr); + if (!cider) + return 0; + + netdev_err(ks->netdev, "incorrect EESK endian strap setting\n"); + + return -EINVAL; +} + /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -484,7 +528,7 @@ static int msg_enable; static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } @@ -499,7 +543,7 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } @@ -515,7 +559,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); + *wptr++ = (u16)ioread16(ks->hw_addr); } /** @@ -529,7 +573,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); + iowrite16(*wptr++, ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) @@ -1539,6 +1583,10 @@ static int ks8851_probe(struct platform_device *pdev) goto err_free; } + err = ks_check_endian(ks); + if (err) + goto err_free; + netdev->irq = platform_get_irq(pdev, 0); if ((int)netdev->irq < 0) { -- GitLab From e89a0cfbdcc80add9c1b9a3f1c138bee982ac959 Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Thu, 19 Mar 2020 23:45:28 +0200 Subject: [PATCH 0709/1278] arm64: alternative: fix build with clang integrated assembler commit 6f5459da2b8736720afdbd67c4bd2d1edba7d0e3 upstream. Building an arm64 defconfig with clang's integrated assembler, this error occurs: :2:2: error: unrecognized instruction mnemonic _ASM_EXTABLE 9999b, 9f ^ arch/arm64/mm/cache.S:50:1: note: while in macro instantiation user_alt 9f, "dc cvau, x4", "dc civac, x4", 0 ^ While GNU as seems fine with case-sensitive macro instantiations, clang doesn't, so use the actual macro name (_asm_extable) as in the rest of the file. Also checked that the generated assembly matches the GCC output. Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Fixes: 290622efc76e ("arm64: fix "dc cvau" cache operation on errata-affected core") Link: https://github.com/ClangBuiltLinux/linux/issues/924 Signed-off-by: Ilie Halip Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/alternative.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 4cd4a793dc32..4ed869845a23 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -216,7 +216,7 @@ alternative_endif .macro user_alt, label, oldinstr, newinstr, cond 9999: alternative_insn "\oldinstr", "\newinstr", \cond - _ASM_EXTABLE 9999b, \label + _asm_extable 9999b, \label .endm /* -- GitLab From aefc7ec47022d223f4551f1391ed02392c933f07 Mon Sep 17 00:00:00 2001 From: disconnect3d Date: Mon, 9 Mar 2020 11:48:53 +0100 Subject: [PATCH 0710/1278] perf map: Fix off by one in strncpy() size argument commit db2c549407d4a76563c579e4768f7d6d32afefba upstream. This patch fixes an off-by-one error in strncpy size argument in tools/perf/util/map.c. The issue is that in: strncmp(filename, "/system/lib/", 11) the passed string literal: "/system/lib/" has 12 bytes (without the NULL byte) and the passed size argument is 11. As a result, the logic won't match the ending "/" byte and will pass filepaths that are stored in other directories e.g. "/system/libmalicious/bin" or just "/system/libmalicious". This functionality seems to be present only on Android. I assume the /system/ directory is only writable by the root user, so I don't think this bug has much (or any) security impact. Fixes: eca818369996 ("perf tools: Add automatic remapping of Android libraries") Signed-off-by: disconnect3d Cc: Alexander Shishkin Cc: Changbin Du Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Michael Lentine Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200309104855.3775-1-dominik.b.czarnota@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 63db9872c880..a49f27aa0c95 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -90,7 +90,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (!strncmp(filename, "/system/lib/", 12)) { char *ndk, *app; const char *arch; size_t ndk_length; -- GitLab From 53cdc9f5a25ae224af22a031b6eabca569b288f6 Mon Sep 17 00:00:00 2001 From: Sungbo Eo Date: Sat, 21 Mar 2020 23:36:53 +0900 Subject: [PATCH 0711/1278] ARM: dts: oxnas: Fix clear-mask property commit deeabb4c1341a12bf8b599e6a2f4cfa4fd74738c upstream. Disable all rps-irq interrupts during driver initialization to prevent an accidental interrupt on GIC. Fixes: 84316f4ef141 ("ARM: boot: dts: Add Oxford Semiconductor OX810SE dtsi") Fixes: 38d4a53733f5 ("ARM: dts: Add support for OX820 and Pogoplug V3") Signed-off-by: Sungbo Eo Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ox810se.dtsi | 4 ++-- arch/arm/boot/dts/ox820.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/ox810se.dtsi b/arch/arm/boot/dts/ox810se.dtsi index 46aa6db8353a..3d2f91234f1a 100644 --- a/arch/arm/boot/dts/ox810se.dtsi +++ b/arch/arm/boot/dts/ox810se.dtsi @@ -322,8 +322,8 @@ interrupt-controller; reg = <0 0x200>; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index 459207536a46..8355cb034525 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -239,8 +239,8 @@ reg = <0 0x200>; interrupts = ; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { -- GitLab From 93431d69e69969bd589a809a5cb967fd94fb52c9 Mon Sep 17 00:00:00 2001 From: Nick Hudson Date: Thu, 12 Mar 2020 09:03:45 +0000 Subject: [PATCH 0712/1278] ARM: bcm2835-rpi-zero-w: Add missing pinctrl name commit 6687c201fdc3139315c2ea7ef96c157672805cdc upstream. Define the sdhci pinctrl state as "default" so it gets applied correctly and to match all other RPis. Fixes: 2c7c040c73e9 ("ARM: dts: bcm2835: Add Raspberry Pi Zero W") Signed-off-by: Nick Hudson Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm2835-rpi-zero-w.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index b8565fc33eea..e5f2cca86f04 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -118,6 +118,7 @@ &sdhci { #address-cells = <1>; #size-cells = <0>; + pinctrl-names = "default"; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; mmc-pwrseq = <&wifi_pwrseq>; non-removable; -- GitLab From a34bb888980be1fa40117512c34cbf1da3231056 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Mar 2020 14:05:57 +0200 Subject: [PATCH 0713/1278] arm64: dts: ls1043a-rdb: correct RGMII delay mode to rgmii-id commit 4022d808c45277693ea86478fab1f081ebf997e8 upstream. The correct setting for the RGMII ports on LS1043ARDB is to enable delay on both Rx and Tx so the interface mode used must be PHY_INTERFACE_MODE_RGMII_ID. Since commit 1b3047b5208a80 ("net: phy: realtek: add support for configuring the RX delay on RTL8211F") the Realtek 8211F PHY driver has control over the RGMII RX delay and it is disabling it for RGMII_TXID. The LS1043ARDB uses two such PHYs in RGMII_ID mode but in the device tree the mode was described as "rgmii_txid". This issue was not apparent at the time as the PHY driver took the same action for RGMII_TXID and RGMII_ID back then but it became visible (RX no longer working) after the above patch. Changing the phy-connection-type to "rgmii-id" to address the issue. Fixes: bf02f2ffe59c ("arm64: dts: add LS1043A DPAA FMan support") Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index 3dc0c8e9663d..3aead63e5475 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -155,12 +155,12 @@ ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { -- GitLab From 80bf2f8eeede04bcfb52a81f373b3598347c67b0 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Mar 2020 14:05:58 +0200 Subject: [PATCH 0714/1278] arm64: dts: ls1046ardb: set RGMII interfaces to RGMII_ID mode commit d79e9d7c1e4ba5f95f2ff3541880c40ea9722212 upstream. The correct setting for the RGMII ports on LS1046ARDB is to enable delay on both Rx and Tx so the interface mode used must be PHY_INTERFACE_MODE_RGMII_ID. Since commit 1b3047b5208a80 ("net: phy: realtek: add support for configuring the RX delay on RTL8211F") the Realtek 8211F PHY driver has control over the RGMII RX delay and it is disabling it for RGMII_TXID. The LS1046ARDB uses two such PHYs in RGMII_ID mode but in the device tree the mode was described as "rgmii". Changing the phy-connection-type to "rgmii-id" to address the issue. Fixes: 3fa395d2c48a ("arm64: dts: add LS1046A DPAA FMan nodes") Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts index 5dc2782e2a58..e775e59d0370 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts @@ -162,12 +162,12 @@ &fman0 { ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { -- GitLab From 4520f06b03ae667e442da1ab9351fd28cd7ac598 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Apr 2020 16:34:38 +0200 Subject: [PATCH 0715/1278] Linux 4.14.175 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index edc6b62bd892..3b792208fbc2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 174 +SUBLEVEL = 175 EXTRAVERSION = NAME = Petit Gorille -- GitLab From ec4174eff9f58c34c94a2151c32e3fcaa0a43eff Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 17 Mar 2020 14:17:44 -0700 Subject: [PATCH 0716/1278] ANDROID: kbuild: fix module linker script flags for LTO KBUILD_LDS_MODULE didn't exist in 4.19, it was added in upstream commit 10df06385582 ("kbuild: rebuild modules when module linker scripts are updated"), which means the module-lto.lds linker script is not actually passed to the linker. Append the flags directly to KBUILD_LDFLAGS_MODULE instead. Bug: 151700304 Fixes: 6cea04778e12 ("ANDROID: kbuild: merge module sections with LTO") Change-Id: I600db54d2ff9cd4e287913e8ddd463a20741a4a3 Signed-off-by: Sami Tolvanen --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b6386c308244..26cf64841fca 100644 --- a/Makefile +++ b/Makefile @@ -847,7 +847,7 @@ LD_FLAGS_LTO_CLANG := -mllvm -import-instr-limit=5 KBUILD_LDFLAGS += $(LD_FLAGS_LTO_CLANG) KBUILD_LDFLAGS_MODULE += $(LD_FLAGS_LTO_CLANG) -KBUILD_LDS_MODULE += $(srctree)/scripts/module-lto.lds +KBUILD_LDFLAGS_MODULE += -T $(srctree)/scripts/module-lto.lds # allow disabling only clang LTO where needed DISABLE_LTO_CLANG := -fno-lto -- GitLab From bf3b3bbdfcb8d25c872653215269bb4a22fbfe49 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 31 Mar 2020 14:32:27 -0700 Subject: [PATCH 0717/1278] ANDROID: kbuild: ensure __cfi_check is correctly aligned On modules with no executable code, LLVM generates a __cfi_check stub, but won't align it to page size as expected. This change ensures the function is at the beginning of the .text section and correctly aligned for the CFI shadow. Bug: 148458318 Change-Id: I85ea31fa851bc23988f649b021b3ac7e9d9dcb38 Signed-off-by: Sami Tolvanen --- Makefile | 2 +- scripts/Makefile | 2 ++ scripts/{module-lto.lds => module-lto.lds.S} | 14 +++++++++++++- 3 files changed, 16 insertions(+), 2 deletions(-) rename scripts/{module-lto.lds => module-lto.lds.S} (68%) diff --git a/Makefile b/Makefile index 26cf64841fca..a5b0dfcaeb70 100644 --- a/Makefile +++ b/Makefile @@ -847,7 +847,7 @@ LD_FLAGS_LTO_CLANG := -mllvm -import-instr-limit=5 KBUILD_LDFLAGS += $(LD_FLAGS_LTO_CLANG) KBUILD_LDFLAGS_MODULE += $(LD_FLAGS_LTO_CLANG) -KBUILD_LDFLAGS_MODULE += -T $(srctree)/scripts/module-lto.lds +KBUILD_LDFLAGS_MODULE += -T scripts/module-lto.lds # allow disabling only clang LTO where needed DISABLE_LTO_CLANG := -fno-lto diff --git a/scripts/Makefile b/scripts/Makefile index 25ab143cbe14..c1d31dd3d760 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -30,6 +30,8 @@ always := $(hostprogs-y) $(hostprogs-m) # The following hostprogs-y programs are only build on demand hostprogs-y += unifdef +extra-$(CONFIG_LTO_CLANG) += module-lto.lds + # These targets are used internally to avoid "is up to date" messages PHONY += build_unifdef build_unifdef: $(obj)/unifdef diff --git a/scripts/module-lto.lds b/scripts/module-lto.lds.S similarity index 68% rename from scripts/module-lto.lds rename to scripts/module-lto.lds.S index 5ba0e9461e13..c0f4fdeb84a0 100644 --- a/scripts/module-lto.lds +++ b/scripts/module-lto.lds.S @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + /* * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and * -ffunction-sections, which increases the size of the final module. @@ -18,5 +21,14 @@ SECTIONS { .rela.rodata : { *(.rela.rodata .rela.rodata.[0-9a-zA-Z_]*) } .rela.text : { *(.rela.text .rela.text.[0-9a-zA-Z_]*) } .rodata : { *(.rodata .rodata.[0-9a-zA-Z_]*) } - .text : { *(.text .text.[0-9a-zA-Z_]*) } + + /* + * With CFI_CLANG, ensure __cfi_check is at the beginning of the + * .text section, and that the section is aligned to page size. + */ + .text : ALIGN(PAGE_SIZE) { + *(.text.__cfi_check) + *(.text .text.[0-9a-zA-Z_]* .text..L.cfi*) + } + } -- GitLab From a4cad4eb80676d4917c62768b3c4f155669c5a71 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 26 Mar 2020 12:53:11 -0700 Subject: [PATCH 0718/1278] ANDROID: Incremental fs: Fix four resource bugs Without these, you can't unmount a volume on which incfs was mounted and the tests run. Also incfs_tests would fail sporadically without the fix to test_inode Test: Run incfs_test and unmount underlying volume 1000 times Bug: 152636070 Signed-off-by: Paul Lawrence Change-Id: I88f11f5d4269c22d9073e5eb671d0c7cc4629f6c (cherry picked from commit c062bc8e769f0f6e47cc41fb9b7ab30e3e7f2689) --- fs/incfs/vfs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index f531e28a90c8..2708a99f0ae2 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -349,8 +349,8 @@ static int inode_test(struct inode *inode, void *opaque) return (node->n_backing_inode == backing_inode) && inode->i_ino == search->ino; - } - return 1; + } else + return inode->i_ino == search->ino; } static int inode_set(struct inode *inode, void *opaque) @@ -896,6 +896,7 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, } bfc = incfs_alloc_bfc(new_file); + fput(new_file); if (IS_ERR(bfc)) { error = PTR_ERR(bfc); bfc = NULL; @@ -1678,6 +1679,7 @@ static int final_file_delete(struct mount_info *mi, if (d_really_is_positive(index_file_dentry)) error = incfs_unlink(index_file_dentry); out: + dput(index_file_dentry); if (error) pr_debug("incfs: delete_file_from_index err:%d\n", error); return error; @@ -1980,6 +1982,7 @@ static void dentry_release(struct dentry *d) if (di) path_put(&di->backing_path); + kfree(d->d_fsdata); d->d_fsdata = NULL; } @@ -2191,7 +2194,7 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, path_put(&backing_dir_path); sb->s_flags |= SB_ACTIVE; - pr_debug("infs: mount\n"); + pr_debug("incfs: mount\n"); return dget(sb->s_root); err: sb->s_fs_info = NULL; @@ -2217,7 +2220,7 @@ static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) pr_debug("incfs: new timeout_ms=%d", options.read_timeout_ms); } - pr_debug("infs: remount\n"); + pr_debug("incfs: remount\n"); return 0; } @@ -2225,7 +2228,7 @@ void incfs_kill_sb(struct super_block *sb) { struct mount_info *mi = sb->s_fs_info; - pr_debug("infs: unmount\n"); + pr_debug("incfs: unmount\n"); incfs_free_mount_info(mi); generic_shutdown_super(sb); } -- GitLab From c5d2219efd8df80fba9ab557e45a845c2b54e5bf Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Thu, 2 Apr 2020 13:56:04 -0700 Subject: [PATCH 0719/1278] ANDROID: Fix wq fp check for CFI builds A previous change added a test on the wrong config flag; rename CFI to CFI_CLANG. Bug: 145210207 Change-Id: Id8aead2eb2c75ad6442d10165f6cb86ccfb9c2f9 Signed-off-by: Alistair Delva --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c6bd56031d25..863d53901660 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1532,7 +1532,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); -#ifndef CONFIG_CFI +#ifndef CONFIG_CFI_CLANG WARN_ON_ONCE(timer->function != delayed_work_timer_fn); #endif WARN_ON_ONCE(timer->data != (unsigned long)dwork); -- GitLab From df5824ee40f027ffb0300462376dc38247778e75 Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Sun, 29 Mar 2020 14:13:32 -0700 Subject: [PATCH 0720/1278] ANDROID: Incremental fs: get_filled_blocks: better index_out When returning incomplete results index_out has to be usable to call the function again and resume from the same location. This means that if the output buffer was too small the function needs to check for that when encountering the _beginning_ of a next output range, not the end of it. Otherwise resuming from the end of the range that didn't fit into the buffer would cause the call to never return that range + Make the backing file header flags update thread safe Bug: 152691988 Test: libincfs-test, incfs_test passes Signed-off-by: Yurii Zubrytskyi Change-Id: I351156beba0b74e1942a39117279d3fcdb5e0c78 Signed-off-by: Paul Lawrence --- fs/incfs/data_mgmt.c | 69 +++++++++++++++++++++++++++++++++++++------- fs/incfs/format.c | 2 +- fs/incfs/format.h | 2 +- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 1f9016b1a3f5..bc0aa420f3ef 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -372,11 +372,19 @@ static int get_data_file_block(struct data_file *df, int index, return 0; } +static int check_room_for_one_range(u32 size, u32 size_out) +{ + if (size_out + sizeof(struct incfs_filled_range) > size) + return -ERANGE; + return 0; +} + static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, u32 size, u32 *size_out) { - if (*size_out + sizeof(*range) > size) - return -ERANGE; + int error = check_room_for_one_range(size, *size_out); + if (error) + return error; if (copy_to_user(((char *)buffer) + *size_out, range, sizeof(*range))) return -EFAULT; @@ -385,6 +393,34 @@ static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, return 0; } +static int update_file_header_flags(struct data_file *df, u32 bits_to_reset, + u32 bits_to_set) +{ + int result; + u32 new_flags; + struct backing_file_context *bfc; + + if (!df) + return -EFAULT; + bfc = df->df_backing_file_context; + if (!bfc) + return -EFAULT; + + result = mutex_lock_interruptible(&bfc->bc_mutex); + if (result) + return result; + + new_flags = (df->df_header_flags & ~bits_to_reset) | bits_to_set; + if (new_flags != df->df_header_flags) { + df->df_header_flags = new_flags; + result = incfs_write_file_header_flags(bfc, new_flags); + } + + mutex_unlock(&bfc->bc_mutex); + + return result; +} + int incfs_get_filled_blocks(struct data_file *df, struct incfs_get_filled_blocks_args *arg) { @@ -408,14 +444,22 @@ int incfs_get_filled_blocks(struct data_file *df, arg->index_out = arg->start_index; return 0; } + arg->index_out = arg->start_index; + + error = check_room_for_one_range(size, *size_out); + if (error) + return error; range = (struct incfs_filled_range){ .begin = arg->start_index, .end = end_index, }; + error = copy_one_range(&range, buffer, size, size_out); + if (error) + return error; arg->index_out = end_index; - return copy_one_range(&range, buffer, size, size_out); + return 0; } for (arg->index_out = arg->start_index; arg->index_out < end_index; @@ -430,13 +474,20 @@ int incfs_get_filled_blocks(struct data_file *df, continue; if (!in_range) { + error = check_room_for_one_range(size, *size_out); + if (error) + break; in_range = true; range.begin = arg->index_out; } else { range.end = arg->index_out; error = copy_one_range(&range, buffer, size, size_out); - if (error) + if (error) { + /* there will be another try out of the loop, + * it will reset the index_out if it fails too + */ break; + } in_range = false; } } @@ -444,17 +495,15 @@ int incfs_get_filled_blocks(struct data_file *df, if (in_range) { range.end = arg->index_out; error = copy_one_range(&range, buffer, size, size_out); + if (error) + arg->index_out = range.begin; } if (!error && in_range && arg->start_index == 0 && end_index == df->df_total_block_count && *size_out == sizeof(struct incfs_filled_range)) { - int result; - - df->df_header_flags |= INCFS_FILE_COMPLETE; - result = incfs_update_file_header_flags( - df->df_backing_file_context, df->df_header_flags); - + int result = + update_file_header_flags(df, 0, INCFS_FILE_COMPLETE); /* Log failure only, since it's just a failed optimization */ pr_debug("Marked file full with result %d", result); } diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 96f4e3d54f58..1a7c4646a291 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -215,7 +215,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, return result; } -int incfs_update_file_header_flags(struct backing_file_context *bfc, u32 flags) +int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags) { if (!bfc) return -EFAULT; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 33e5ea4eba56..deb5ca5bb0da 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -311,7 +311,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size); -int incfs_update_file_header_flags(struct backing_file_context *bfc, u32 flags); +int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags); int incfs_make_empty_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); -- GitLab From 1a000625086fd90f1f55acd79c3798ca81d314a4 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 31 Mar 2020 15:05:33 -0700 Subject: [PATCH 0721/1278] ANDROID: Incremental fs: Fix crash polling 0 size read_log When read log is 0 sized, we still need to init the wait queue to avoid kernel panics if someone does decide to poll on the read log. Test: Added test for this condition, incfs_test crashes With fix, incfs_test doesn't crash Signed-off-by: Paul Lawrence Bug: 152909243 Change-Id: Ic3250523bb7ddb1839f8e95852c17103e5ffb782 --- fs/incfs/data_mgmt.c | 2 +- .../selftests/filesystems/incfs/incfs_test.c | 55 ++++++++++++++++--- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index bc0aa420f3ef..d30bb4bc477c 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -34,13 +34,13 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mutex_init(&mi->mi_dir_struct_mutex); mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); + init_waitqueue_head(&mi->mi_log.ml_notif_wq); INIT_LIST_HEAD(&mi->mi_reads_list_head); if (options->read_log_pages != 0) { size_t buf_size = PAGE_SIZE * options->read_log_pages; spin_lock_init(&mi->mi_log.rl_writer_lock); - init_waitqueue_head(&mi->mi_log.ml_notif_wq); mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index df2d224d322f..f1a239f6376d 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -1930,7 +1930,8 @@ static int hash_tree_test(char *mount_dir) return TEST_FAILURE; } -static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) +static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, + bool no_rlog) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; struct incfs_pending_read_info prs[100] = {}; @@ -1957,7 +1958,19 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) goto failure; } - read_count = wait_for_pending_reads(log_fd, 0, prs, prs_size); + read_count = + wait_for_pending_reads(log_fd, no_rlog ? 10 : 0, prs, prs_size); + if (no_rlog) { + if (read_count == 0) + goto success; + if (read_count < 0) + ksft_print_msg("Error reading logged reads %s.\n", + strerror(-read_count)); + else + ksft_print_msg("Somehow read empty logs.\n"); + goto failure; + } + if (read_count < 0) { ksft_print_msg("Error reading logged reads %s.\n", strerror(-read_count)); @@ -2001,6 +2014,8 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) goto failure; } } + +success: close(fd); return TEST_SUCCESS; @@ -2029,7 +2044,7 @@ static int read_log_test(char *mount_dir) goto failure; log_fd = open_log_file(mount_dir); - if (cmd_fd < 0) + if (log_fd < 0) ksft_print_msg("Can't open log file.\n"); /* Write data. */ @@ -2048,7 +2063,7 @@ static int read_log_test(char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file)) + if (validate_logs(mount_dir, log_fd, file, false)) goto failure; } @@ -2069,20 +2084,46 @@ static int read_log_test(char *mount_dir) goto failure; log_fd = open_log_file(mount_dir); - if (cmd_fd < 0) + if (log_fd < 0) ksft_print_msg("Can't open log file.\n"); /* Validate data again */ for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file)) + if (validate_logs(mount_dir, log_fd, file, false)) goto failure; } - /* Final unmount */ + /* + * Unmount and mount again with no read log to make sure poll + * doesn't crash + */ close(cmd_fd); close(log_fd); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=0") != + 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (log_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Validate data again - note should fail this time */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file, true)) + goto failure; + } + + /* Final unmount */ + close(log_fd); free(backing_dir); if (umount(mount_dir) != 0) { print_error("Can't unmout FS"); -- GitLab From ecd6f86bed262b0fd0dd0128ecddb9865f3c5f59 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 1 Apr 2020 10:15:12 -0700 Subject: [PATCH 0722/1278] ANDROID: Incremental fs: Protect get_fill_block, and add a field Since INCFS_IOC_GET_FILLED_BLOCKS potentially leaks information about usage patterns, and is only useful to someone filling the file, best protect it in the same way as INCFS_IOC_FILL_BLOCKS. Add useful field data_block_out as well Test: incfs_test passes Bug: 152983639 Signed-off-by: Paul Lawrence Change-Id: I126a8cf711e56592479093e9aadbfd0e7f700752 --- fs/incfs/data_mgmt.c | 1 + fs/incfs/vfs.c | 3 ++ include/uapi/linux/incrementalfs.h | 3 ++ .../selftests/filesystems/incfs/incfs_test.c | 52 +++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index d30bb4bc477c..7d2276c8316d 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -437,6 +437,7 @@ int incfs_get_filled_blocks(struct data_file *df, if (end_index > df->df_total_block_count) end_index = df->df_total_block_count; arg->total_blocks_out = df->df_total_block_count; + arg->data_blocks_out = df->df_data_block_count; if (df->df_header_flags & INCFS_FILE_COMPLETE) { pr_debug("File marked full, fast get_filled_blocks"); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 2708a99f0ae2..9fcc3352ee37 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -1447,6 +1447,9 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) if (!df) return -EINVAL; + if ((uintptr_t)f->private_data != CAN_FILL) + return -EPERM; + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) return -EINVAL; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 0fb1c86d2f9d..ac775b64bdcf 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -328,6 +328,9 @@ struct incfs_get_filled_blocks_args { /* Actual number of blocks in file */ __u32 total_blocks_out; + /* The number of data blocks in file */ + __u32 data_blocks_out; + /* Number of bytes written to range buffer */ __u32 range_buffer_size_out; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index f1a239f6376d..639e2fb1544c 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2189,12 +2189,29 @@ static int validate_ranges(const char *mount_dir, struct test_file *file) int error = TEST_SUCCESS; int i; int range_cnt; + int cmd_fd = -1; + struct incfs_permit_fill permit_fill; fd = open(filename, O_RDONLY); free(filename); if (fd <= 0) return TEST_FAILURE; + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + if (error != -1 || errno != EPERM) { + ksft_print_msg("INCFS_IOC_GET_FILLED_BLOCKS not blocked\n"); + error = -EPERM; + goto out; + } + + cmd_fd = open_commands_file(mount_dir); + permit_fill.file_descriptor = fd; + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + print_error("INCFS_IOC_PERMIT_FILL failed"); + return -EPERM; + goto out; + } + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); if (error && errno != ERANGE) goto out; @@ -2212,6 +2229,11 @@ static int validate_ranges(const char *mount_dir, struct test_file *file) goto out; } + if (fba.data_blocks_out != block_cnt) { + error = -EINVAL; + goto out; + } + range_cnt = (block_cnt + 3) / 4; if (range_cnt > 128) range_cnt = 128; @@ -2282,6 +2304,7 @@ static int validate_ranges(const char *mount_dir, struct test_file *file) out: close(fd); + close(cmd_fd); return error; } @@ -2392,6 +2415,7 @@ static int emit_partial_test_file_hash(char *mount_dir, struct test_file *file) static int validate_hash_ranges(const char *mount_dir, struct test_file *file) { + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; char *filename = concat_file_name(mount_dir, file->name); int fd; struct incfs_filled_range ranges[128]; @@ -2402,6 +2426,8 @@ static int validate_hash_ranges(const char *mount_dir, struct test_file *file) int error = TEST_SUCCESS; int file_blocks = (file->size + INCFS_DATA_FILE_BLOCK_SIZE - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int cmd_fd = -1; + struct incfs_permit_fill permit_fill; if (file->size <= 4096 / 32 * 4096) return 0; @@ -2411,10 +2437,35 @@ static int validate_hash_ranges(const char *mount_dir, struct test_file *file) if (fd <= 0) return TEST_FAILURE; + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + if (error != -1 || errno != EPERM) { + ksft_print_msg("INCFS_IOC_GET_FILLED_BLOCKS not blocked\n"); + error = -EPERM; + goto out; + } + + cmd_fd = open_commands_file(mount_dir); + permit_fill.file_descriptor = fd; + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + print_error("INCFS_IOC_PERMIT_FILL failed"); + return -EPERM; + goto out; + } + error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); if (error) goto out; + if (fba.total_blocks_out <= block_cnt) { + error = -EINVAL; + goto out; + } + + if (fba.data_blocks_out != block_cnt) { + error = -EINVAL; + goto out; + } + if (fba.range_buffer_size_out != sizeof(struct incfs_filled_range)) { error = -EINVAL; goto out; @@ -2427,6 +2478,7 @@ static int validate_hash_ranges(const char *mount_dir, struct test_file *file) } out: + close(cmd_fd); close(fd); return error; } -- GitLab From d73d0b4d33a807bb72f8f6abda4160c5a842348c Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 1 Apr 2020 16:04:39 -0700 Subject: [PATCH 0723/1278] ANDROID: Incremental fs: Fix remount Bug: 153017385 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I13f3a3c91d746d725e0e21b1e2bcfe0a64a13716 --- fs/incfs/data_mgmt.c | 35 ++++++++++++------ fs/incfs/data_mgmt.h | 3 ++ fs/incfs/vfs.c | 7 ++-- .../selftests/filesystems/incfs/incfs_test.c | 37 +++++++++++++++---- .../selftests/filesystems/incfs/utils.c | 5 ++- .../selftests/filesystems/incfs/utils.h | 2 +- 6 files changed, 63 insertions(+), 26 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 7d2276c8316d..caa5770f7f4c 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -27,7 +27,6 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, return ERR_PTR(-ENOMEM); mi->mi_sb = sb; - mi->mi_options = *options; mi->mi_backing_dir_path = *backing_dir_path; mi->mi_owner = get_current_cred(); path_get(&mi->mi_backing_dir_path); @@ -35,26 +34,38 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); init_waitqueue_head(&mi->mi_log.ml_notif_wq); + spin_lock_init(&mi->mi_log.rl_writer_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); + error = incfs_realloc_mount_info(mi, options); + if (error) + goto err; + + return mi; + +err: + incfs_free_mount_info(mi); + return ERR_PTR(error); +} + +int incfs_realloc_mount_info(struct mount_info *mi, + struct mount_options *options) +{ + kfree(mi->mi_log.rl_ring_buf); + mi->mi_log.rl_ring_buf = NULL; + mi->mi_log.rl_size = 0; + + mi->mi_options = *options; if (options->read_log_pages != 0) { size_t buf_size = PAGE_SIZE * options->read_log_pages; - spin_lock_init(&mi->mi_log.rl_writer_lock); - mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); - if (!mi->mi_log.rl_ring_buf) { - error = -ENOMEM; - goto err; - } + if (!mi->mi_log.rl_ring_buf) + return -ENOMEM; } - return mi; - -err: - incfs_free_mount_info(mi); - return ERR_PTR(error); + return 0; } void incfs_free_mount_info(struct mount_info *mi) diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 41f74e68187d..3cdb95a7661a 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -249,6 +249,9 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, struct mount_options *options, struct path *backing_dir_path); +int incfs_realloc_mount_info(struct mount_info *mi, + struct mount_options *options); + void incfs_free_mount_info(struct mount_info *mi); struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 9fcc3352ee37..cb4c787c825d 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -2218,10 +2218,9 @@ static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) if (err) return err; - if (mi->mi_options.read_timeout_ms != options.read_timeout_ms) { - mi->mi_options.read_timeout_ms = options.read_timeout_ms; - pr_debug("incfs: new timeout_ms=%d", options.read_timeout_ms); - } + err = incfs_realloc_mount_info(mi, &options); + if (err) + return err; pr_debug("incfs: remount\n"); return 0; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 639e2fb1544c..f9661a9eb3fa 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2029,14 +2029,14 @@ static int read_log_test(char *mount_dir) struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; int i = 0; - int cmd_fd = -1, log_fd = -1; + int cmd_fd = -1, log_fd = -1, drop_caches = -1; char *backing_dir; backing_dir = create_backing_dir(mount_dir); if (!backing_dir) goto failure; - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -2076,7 +2076,7 @@ static int read_log_test(char *mount_dir) goto failure; } - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -2106,8 +2106,8 @@ static int read_log_test(char *mount_dir) goto failure; } - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=0") != - 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=0", + false) != 0) goto failure; log_fd = open_log_file(mount_dir); @@ -2122,6 +2122,29 @@ static int read_log_test(char *mount_dir) goto failure; } + /* + * Remount and check that logs start working again + */ + drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY); + if (drop_caches == -1) + goto failure; + i = write(drop_caches, "3", 1); + close(drop_caches); + if (i != 1) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=4", + true) != 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file, false)) + goto failure; + } + /* Final unmount */ close(log_fd); free(backing_dir); @@ -2320,7 +2343,7 @@ static int get_blocks_test(char *mount_dir) if (!backing_dir) goto failure; - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -2495,7 +2518,7 @@ static int get_hash_blocks_test(char *mount_dir) if (!backing_dir) goto failure; - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index 3a72fa5d5e9a..545497685d14 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -41,12 +41,13 @@ int mount_fs(const char *mount_dir, const char *backing_dir, } int mount_fs_opt(const char *mount_dir, const char *backing_dir, - const char *opt) + const char *opt, bool remount) { static const char fs_name[] = INCFS_NAME; int result; - result = mount(backing_dir, mount_dir, fs_name, 0, opt); + result = mount(backing_dir, mount_dir, fs_name, + remount ? MS_REMOUNT : 0, opt); if (result != 0) perror("Error mounting fs."); return result; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 23c8a099662a..24b43287fcdd 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -23,7 +23,7 @@ int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms); int mount_fs_opt(const char *mount_dir, const char *backing_dir, - const char *opt); + const char *opt, bool remount); int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); -- GitLab From b8bcfe587e1724f14394a8cae68d7d9ad7ccb55e Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 10 Mar 2020 14:12:30 +0100 Subject: [PATCH 0724/1278] UPSTREAM: loop: Only change blocksize when needed. Return early in loop_set_block_size() if the requested block size is identical to the one we already have; this avoids expensive calls to freeze the block queue. Bug: 148607611 Reviewed-by: Christoph Hellwig Signed-off-by: Martijn Coenen Signed-off-by: Jens Axboe (cherry picked from commit 7e81f99afd91c937f0e66dc135e26c1c4f78b003) Change-Id: I61778680579dbfeeb193133527a3926d376e0bac --- drivers/block/loop.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ca912eecc74e..fb2708f03fc1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1378,16 +1378,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; - if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size == arg) + return 0; + + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { + if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, -- GitLab From bb8ba1da294318ef080cfedb3a632d0035bb0949 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 10 Mar 2020 14:06:54 +0100 Subject: [PATCH 0725/1278] UPSTREAM: loop: Only freeze block queue when needed. __loop_update_dio() can be called as a part of loop_set_fd(), when the block queue is not yet up and running; avoid freezing the block queue in that case, since that is an expensive operation. Bug: 148607611 Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Martijn Coenen Signed-off-by: Jens Axboe (cherry picked from commit 0fbcf57982346763ec636f176d5afaa367b5f71b) Change-Id: I17d8de6b6b54a667703d60ea1c62449bb14331da --- drivers/block/loop.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fb2708f03fc1..39b119af65f7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -211,7 +211,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup * will get updated by ioctl(LOOP_GET_STATUS) */ - blk_mq_freeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; if (use_dio) { queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); @@ -220,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } - blk_mq_unfreeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_unfreeze_queue(lo->lo_queue); } static int -- GitLab From 64029e7ca4f4bf40bccdabdb29d324a81afb11d1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 18 Mar 2020 16:05:11 -0700 Subject: [PATCH 0726/1278] FROMLIST: kmod: make request_module() return an error when autoloading is disabled It's long been possible to disable kernel module autoloading completely (while still allowing manual module insertion) by setting /proc/sys/kernel/modprobe to the empty string. This can be preferable to setting it to a nonexistent file since it avoids the overhead of an attempted execve(), avoids potential deadlocks, and avoids the call to security_kernel_module_request() and thus on SELinux-based systems eliminates the need to write SELinux rules to dontaudit module_request. However, when module autoloading is disabled in this way, request_module() returns 0. This is broken because callers expect 0 to mean that the module was successfully loaded. Apparently this was never noticed because this method of disabling module autoloading isn't used much, and also most callers don't use the return value of request_module() since it's always necessary to check whether the module registered its functionality or not anyway. But improperly returning 0 can indeed confuse a few callers, for example get_fs_type() in fs/filesystems.c where it causes a WARNING to be hit: if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); } This is easily reproduced with: echo > /proc/sys/kernel/modprobe mount -t NONEXISTENT none / It causes: request_module fs-NONEXISTENT succeeded, but still no fs? WARNING: CPU: 1 PID: 1106 at fs/filesystems.c:275 get_fs_type+0xd6/0xf0 [...] This should actually use pr_warn_once() rather than WARN_ONCE(), since it's also user-reachable if userspace immediately unloads the module. Regardless, request_module() should correctly return an error when it fails. So let's make it return -ENOENT, which matches the error when the modprobe binary doesn't exist. I've also sent patches to document and test this case. Acked-by: Luis Chamberlain Reviewed-by: Jessica Yu Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: NeilBrown Link: https://lore.kernel.org/r/20200318230515.171692-2-ebiggers@kernel.org Bug: 151589316 Change-Id: I5e04f85e12a4f85da23e53bc11da1ade565abcd6 Signed-off-by: Eric Biggers --- kernel/kmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index bc6addd9152b..a2de58de6ab6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ static int call_modprobe(char *module_name, int wait) * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); -- GitLab From 285ad248f6744133988a915887f1ec35f980ee74 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Feb 2020 18:26:46 +0800 Subject: [PATCH 0727/1278] f2fs: fix to check i_compr_blocks correctly inode.i_blocks counts based on 512byte sector, we need to convert to 4kb sized block count before comparing to i_compr_blocks. In addition, add to print message when sanity check on inode compression configs failed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 78c3f1d70f1d..a7c8a7347ff7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -291,13 +291,30 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_log_cluster_size)) { - if (ri->i_compress_algorithm >= COMPRESS_MAX) + if (ri->i_compress_algorithm >= COMPRESS_MAX) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " + "compress algorithm: %u, run fsck to fix", + __func__, inode->i_ino, + ri->i_compress_algorithm); return false; - if (le64_to_cpu(ri->i_compr_blocks) > inode->i_blocks) + } + if (le64_to_cpu(ri->i_compr_blocks) > + SECTOR_TO_BLOCK(inode->i_blocks)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent " + "i_compr_blocks:%llu, i_blocks:%lu, run fsck to fix", + __func__, inode->i_ino, + le64_to_cpu(ri->i_compr_blocks), + SECTOR_TO_BLOCK(inode->i_blocks)); return false; + } if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || - ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " + "log cluster size: %u, run fsck to fix", + __func__, inode->i_ino, + ri->i_log_cluster_size); return false; + } } return true; -- GitLab From b3837cc810a4911be2faecc8058ad1fc6b40bc58 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:03 +0800 Subject: [PATCH 0728/1278] f2fs: cover last_disk_size update with spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change solves below hangtask issue: INFO: task kworker/u16:1:58 blocked for more than 122 seconds. Not tainted 5.6.0-rc2-00590-g9983bdae4974e #11 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u16:1 D 0 58 2 0x00000000 Workqueue: writeback wb_workfn (flush-179:0) Backtrace: (__schedule) from [] (schedule+0x78/0xf4) (schedule) from [] (rwsem_down_write_slowpath+0x24c/0x4c0) (rwsem_down_write_slowpath) from [] (down_write+0x6c/0x70) (down_write) from [] (f2fs_write_single_data_page+0x608/0x7ac) (f2fs_write_single_data_page) from [] (f2fs_write_cache_pages+0x2b4/0x7c4) (f2fs_write_cache_pages) from [] (f2fs_write_data_pages+0x344/0x35c) (f2fs_write_data_pages) from [] (do_writepages+0x3c/0xd4) (do_writepages) from [] (__writeback_single_inode+0x44/0x454) (__writeback_single_inode) from [] (writeback_sb_inodes+0x204/0x4b0) (writeback_sb_inodes) from [] (__writeback_inodes_wb+0x50/0xe4) (__writeback_inodes_wb) from [] (wb_writeback+0x294/0x338) (wb_writeback) from [] (wb_workfn+0x35c/0x54c) (wb_workfn) from [] (process_one_work+0x214/0x544) (process_one_work) from [] (worker_thread+0x4c/0x574) (worker_thread) from [] (kthread+0x144/0x170) (kthread) from [] (ret_from_fork+0x14/0x2c) Reported-and-tested-by: Ondřej Jirman Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/file.c | 4 ++-- fs/f2fs/super.c | 1 + 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4550d3531e2e..8672e00133ea 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -903,10 +903,10 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - down_write(&fi->i_sem); + spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) fi->last_disk_size = psize; - up_write(&fi->i_sem); + spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2dfeeda3fea9..eba1331fc8f7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2648,10 +2648,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (err) { file_set_keep_isize(inode); } else { - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } done: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 89b9edc78176..04b19d43bce1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -694,6 +694,7 @@ struct f2fs_inode_info { struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ + spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; @@ -2854,9 +2855,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) if (!f2fs_is_time_consistent(inode)) return false; - down_read(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); - up_read(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); return ret; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b5e87d0403e8..7e47100ed073 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -931,10 +931,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); inode->i_mtime = inode->i_ctime = current_time(inode); F2FS_I(inode)->last_disk_size = i_size_read(inode); - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } __setattr_copy(inode, attr); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cdd5152cee89..013e7d096b3e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -960,6 +960,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); init_rwsem(&fi->i_sem); + spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); -- GitLab From 9da39983e06357a315a10649317b5c81302810d5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:04 +0800 Subject: [PATCH 0729/1278] f2fs: remove i_sem lock coverage in f2fs_setxattr() f2fs_inode.xattr_ver field was gone after commit d260081ccf37 ("f2fs: change recovery policy of xattr node block"), remove i_sem lock coverage in f2fs_setxattr() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 296b3189448a..6794a13064be 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -758,12 +758,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - /* protect xattr_ver */ - down_write(&F2FS_I(inode)->i_sem); down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); up_write(&F2FS_I(inode)->i_xattr_sem); - up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); f2fs_update_time(sbi, REQ_TIME); -- GitLab From 5c8becaa65863358ad8f6901b381743b7853bbf5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:05 +0800 Subject: [PATCH 0730/1278] f2fs: fix inconsistent comments Lack of maintenance on comments may mislead developers, fix them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 ++++-------------- fs/f2fs/data.c | 19 ++++++------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 1 - fs/f2fs/gc.c | 5 ++++- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 2 +- fs/f2fs/node.c | 6 +----- fs/f2fs/shrinker.c | 2 +- fs/f2fs/super.c | 4 ++-- 10 files changed, 21 insertions(+), 40 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 485cae1ccc90..0d74d6d23f81 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -50,9 +50,6 @@ struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return page; } -/* - * We guarantee no failure on the returned page. - */ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, bool is_meta) { @@ -206,7 +203,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, } /* - * Readahead CP/NAT/SIT/SSA pages + * Readahead CP/NAT/SIT/SSA/POR pages */ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) @@ -898,7 +895,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) return -ENOMEM; /* * Finding out valid cp block involves read both - * sets( cp pack1 and cp pack 2) + * sets( cp pack 1 and cp pack 2) */ cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); @@ -1385,10 +1382,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - /* - * modify checkpoint - * version number is already updated - */ + /* start to update checkpoint, cp ver is already updated previously */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { @@ -1541,9 +1535,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } -/* - * We guarantee that this checkpoint procedure will not fail. - */ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1611,7 +1602,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_sit_entries(sbi, cpc); - /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) f2fs_release_discard_addrs(sbi); @@ -1624,7 +1614,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_RECOVERY) f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); - /* do checkpoint periodically */ + /* update CP_TIME to trigger checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eba1331fc8f7..bb8cd804101c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -365,9 +365,6 @@ static void f2fs_write_end_io(struct bio *bio) bio_put(bio); } -/* - * Return true, if pre_bio's bdev is same as its target device. - */ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -404,6 +401,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) return 0; } +/* + * Return true, if pre_bio's bdev is same as its target device. + */ static bool __same_bdev(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -411,9 +411,6 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno; } -/* - * Low-level block read/write IO operations. - */ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) { struct f2fs_sb_info *sbi = fio->sbi; @@ -1389,13 +1386,9 @@ void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) } /* - * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with - * f2fs_map_blocks structure. - * If original data blocks are allocated, then give them to blockdev. - * Otherwise, - * a. preallocate requested block addresses - * b. do not use extent cache for better performance - * c. give the block addresses to blockdev + * f2fs_map_blocks() tries to find or build mapping relationship which + * maps continuous logical blocks to physical blocks, and return such + * info via f2fs_map_blocks structure. */ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 04b19d43bce1..cfdb81aac921 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2387,9 +2387,9 @@ static inline block_t datablock_addr(struct inode *inode, raw_node = F2FS_NODE(node_page); - /* from GC path only */ if (is_inode) { if (!inode) + /* from GC path only */ base = offset_in_addr(&raw_node->i); else if (f2fs_has_extra_attr(inode)) base = get_extra_isize(inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7e47100ed073..d7463a8938c7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -111,7 +111,6 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) } } - /* fill the page */ f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7c28b794e438..16780393023a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -192,7 +192,10 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - /* we need to check every dirty segments in the FG_GC case */ + /* + * adjust candidates range, should select all dirty segments for + * foreground GC and urgent GC cases. + */ if (gc_type != FG_GC && (sbi->gc_mode != GC_URGENT) && p->max_search > sbi->max_victim_search) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a7c8a7347ff7..34b9e1fe1625 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -776,7 +776,7 @@ void f2fs_evict_inode(struct inode *inode) else f2fs_inode_synced(inode); - /* ino == 0, if f2fs_new_inode() was failed t*/ + /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index efdab303d94a..5dbd23e26413 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -177,7 +177,7 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) } /* - * Set multimedia files as cold files for hot/cold data separation + * Set file's temperature for hot/cold data separation */ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fe63e74bd02a..7b66a87db607 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -510,9 +510,6 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -/* - * This function always returns success - */ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -714,8 +711,7 @@ static int get_node_path(struct inode *inode, long block, /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and - * f2fs_unlock_op() only if ro is not set RDONLY_NODE. - * In the case of RDONLY_NODE, we don't need to care about mutex. + * f2fs_unlock_op() only if mode is set with ALLOC_NODE. */ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index a467aca29cfe..d66de5999a26 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -58,7 +58,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, /* count extent cache entries */ count += __count_extent_cache(sbi); - /* shrink clean nat cache entries */ + /* count clean nat cache entries */ count += __count_nat_entries(sbi); /* count free nids cache entries */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 013e7d096b3e..2771bae2a5de 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1666,7 +1666,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) out_unlock: up_write(&sbi->gc_lock); restore_flag: - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } @@ -3618,7 +3618,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif - /* if there are nt orphan nodes free them */ + /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); if (err) goto free_meta; -- GitLab From c591051c764e939c59a377071d63431a9794ede9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 2 Mar 2020 17:34:27 +0800 Subject: [PATCH 0731/1278] f2fs: fix to avoid using uninitialized variable In f2fs_vm_page_mkwrite(), if inode is compress one, and current mmapped page locates in compressed cluster, we have to call f2fs_get_dnode_of_data() to get its physical block address before f2fs_wait_on_block_writeback(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d7463a8938c7..68718a588ed4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -105,10 +105,18 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) err = f2fs_get_block(&dn, page->index); f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - if (err) { - unlock_page(page); - goto out_sem; - } + } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (!need_alloc) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + f2fs_put_dnode(&dn); + } +#endif + if (err) { + unlock_page(page); + goto out_sem; } f2fs_wait_on_page_writeback(page, DATA, false, true); -- GitLab From 4c97ea65e3a6d3861467d398d11d2ed23b51e3db Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 28 Feb 2020 18:08:46 +0800 Subject: [PATCH 0732/1278] f2fs: fix to avoid use-after-free in f2fs_write_multi_pages() In compress cluster, if physical block number is less than logic page number, race condition will cause use-after-free issue as described below: - f2fs_write_compressed_pages - fio.page = cic->rpages[0]; - f2fs_outplace_write_data - f2fs_compress_write_end_io - kfree(cic->rpages); - kfree(cic); - fio.page = cic->rpages[1]; f2fs_write_multi_pages+0xfd0/0x1a98 f2fs_write_data_pages+0x74c/0xb5c do_writepages+0x64/0x108 __writeback_single_inode+0xdc/0x4b8 writeback_sb_inodes+0x4d0/0xa68 __writeback_inodes_wb+0x88/0x178 wb_writeback+0x1f0/0x424 wb_workfn+0x2f4/0x574 process_one_work+0x210/0x48c worker_thread+0x2e8/0x44c kthread+0x110/0x120 ret_from_fork+0x10/0x18 Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 8672e00133ea..d8d9c4ede746 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -853,7 +853,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - fio.page = cic->rpages[i]; + fio.page = cc->rpages[i]; fio.old_blkaddr = blkaddr; /* cluster header */ -- GitLab From 64b81bbb172c8e62b86eb31083b1db07241c0170 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 5 Mar 2020 15:20:26 -0800 Subject: [PATCH 0733/1278] f2fs: fix wrong check on F2FS_IOC_FSSETXATTR This fixes the incorrect failure when enabling project quota on casefold-enabled file. Cc: Daniel Rosenberg Cc: kernel-team@android.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 68718a588ed4..3f49693898ff 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1796,12 +1796,15 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); + u32 masked_flags = fi->i_flags & mask; + + f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_empty_dir(inode)) @@ -1815,9 +1818,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -EINVAL; } - if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { + if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { if (S_ISREG(inode->i_mode) && - (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || + (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || F2FS_HAS_BLOCKS(inode))) return -EINVAL; if (iflags & F2FS_NOCOMP_FL) @@ -1834,8 +1837,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) set_compress_context(inode); } } - if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { - if (fi->i_flags & F2FS_COMPR_FL) + if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { + if (masked_flags & F2FS_COMPR_FL) return -EINVAL; } -- GitLab From 12f0e805e660b35d5c94074cbc6a9bf8d67cde26 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:07 +0800 Subject: [PATCH 0734/1278] f2fs: fix to account compressed inode correctly stat_inc_compr_inode() needs to check FI_COMPRESSED_FILE flag, so in f2fs_disable_compressed_file(), we should call stat_dec_compr_inode() before clearing FI_COMPRESSED_FILE flag. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cfdb81aac921..8b0bb4441c69 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3821,8 +3821,8 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) return fi->i_compr_blocks; fi->i_flags &= ~F2FS_COMPR_FL; - clear_inode_flag(inode, FI_COMPRESSED_FILE); stat_dec_compr_inode(inode); + clear_inode_flag(inode, FI_COMPRESSED_FILE); return 0; } -- GitLab From aa117b000232c4b5142ac0e7e22540d3a309a55f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:08 +0800 Subject: [PATCH 0735/1278] f2fs: fix to check dirty pages during compressed inode conversion Compressed cluster can be generated during dirty data writeback, if there is dirty pages on compressed inode, it needs to disable converting compressed inode to non-compressed one. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8b0bb4441c69..fd73c67cb916 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3817,6 +3817,8 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) if (!f2fs_compressed_file(inode)) return 0; + if (get_dirty_pages(inode)) + return 1; if (fi->i_compr_blocks) return fi->i_compr_blocks; -- GitLab From 32ed535f9d8b702c6860e059638dba6cbafbc2c9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:09 +0800 Subject: [PATCH 0736/1278] f2fs: allow to clear F2FS_COMPR_FL flag If regular inode has no compressed cluster, allow using 'chattr -c' to remove its compress flag, recovering it to a non-compressed file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3f49693898ff..8cedd97e5c64 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1819,10 +1819,10 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) } if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { - if (S_ISREG(inode->i_mode) && - (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || - F2FS_HAS_BLOCKS(inode))) - return -EINVAL; + if (S_ISREG(inode->i_mode) && (masked_flags & F2FS_COMPR_FL)) { + if (f2fs_disable_compressed_file(inode)) + return -EINVAL; + } if (iflags & F2FS_NOCOMP_FL) return -EINVAL; if (iflags & F2FS_COMPR_FL) { -- GitLab From 92e8fe1dc03606b4213951690d91b4e9c8210a2d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 10:33:53 +0100 Subject: [PATCH 0737/1278] f2fs: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 227d3db5c989..a6e9e8d8bd4b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -109,47 +109,47 @@ static ssize_t features_show(struct f2fs_attr *a, return sprintf(buf, "0\n"); if (f2fs_sb_has_encrypt(sbi)) - len += snprintf(buf, PAGE_SIZE - len, "%s", + len += scnprintf(buf, PAGE_SIZE - len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_compression(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "compression"); - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "pin_file"); - len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -233,16 +233,16 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); return len; } -- GitLab From a092e482b89898806b5a6a462c12f0bb1be009fb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Feb 2020 19:08:16 -0800 Subject: [PATCH 0738/1278] f2fs: show mounted time Let's show mounted time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 5 +++++ fs/f2fs/debug.c | 3 +++ fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 7 ++++++- fs/f2fs/sysfs.c | 8 ++++++++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index e0a8a2c2c0c1..3b4080fa15b8 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -318,3 +318,8 @@ Date: September 2019 Contact: "Hridya Valsaraju" Description: Average number of valid blocks. Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//mounted_time_sec +Date: February 2020 +Contact: "Jaegeuk Kim" +Description: Show the mounted time in secs of this partition. diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 77a82f93b54c..05e9ad91167e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -301,6 +301,9 @@ static int stat_show(struct seq_file *s, void *v) si->ssa_area_segs, si->main_area_segs); seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", si->overp_segs, si->rsvd_segs); + seq_printf(s, "Current Time Sec: %llu / Mounted Time Sec: %llu\n\n", + ktime_get_boottime_seconds(), + SIT_I(si->sbi)->mounted_time); if (test_opt(si->sbi, DISCARD)) seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n", si->utilization, si->valid_count, si->discard_blks); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dea5e265c8cc..43990d4eca5f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4070,7 +4070,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = ktime_get_real_seconds(); + sit_i->mounted_time = ktime_get_boottime_seconds(); init_rwsem(&sit_i->sentry_lock); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 459dc3901a57..c476e7ca3595 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -752,11 +752,16 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) #endif } +static inline time64_t ktime_get_boottime_seconds(void) +{ + return ktime_divns(ktime_get_boottime(), NSEC_PER_SEC); +} + static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi, bool base_time) { struct sit_info *sit_i = SIT_I(sbi); - time64_t diff, now = ktime_get_real_seconds(); + time64_t diff, now = ktime_get_boottime_seconds(); if (now >= sit_i->mounted_time) return sit_i->elapsed_time + now - sit_i->mounted_time; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a6e9e8d8bd4b..25d6fd82833a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -185,6 +185,12 @@ static ssize_t encoding_show(struct f2fs_attr *a, return sprintf(buf, "(none)"); } +static ssize_t mounted_time_sec_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time); +} + #ifdef CONFIG_F2FS_STAT_FS static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) @@ -544,6 +550,7 @@ F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +F2FS_GENERAL_RO_ATTR(mounted_time_sec); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -621,6 +628,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), + ATTR_LIST(mounted_time_sec), #ifdef CONFIG_F2FS_STAT_FS ATTR_LIST(cp_foreground_calls), ATTR_LIST(cp_background_calls), -- GitLab From 3e183428c5c048fc7241667428a5b1684a134bf6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:10 +0800 Subject: [PATCH 0739/1278] f2fs: clean up codes with {f2fs_,}data_blkaddr() - rename datablock_addr() to data_blkaddr(). - wrap data_blkaddr() with f2fs_data_blkaddr() to clean up parameters. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++---- fs/f2fs/data.c | 12 +++++------- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 15 +++++---------- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 3 +-- fs/f2fs/recovery.c | 7 +++---- 7 files changed, 24 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d8d9c4ede746..6fbccc0fa9c8 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -562,7 +562,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); if (blkaddr != NULL_ADDR) ret++; @@ -802,7 +802,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_unlock_op; for (i = 0; i < cc->cluster_size; i++) { - if (datablock_addr(dn.inode, dn.node_page, + if (data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i) == NULL_ADDR) goto out_put_dnode; } @@ -851,8 +851,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, - dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); fio.page = cc->rpages[i]; fio.old_blkaddr = blkaddr; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bb8cd804101c..52cd935488e0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1045,8 +1045,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { - block_t blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@ -1298,8 +1297,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) if (err) return err; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr != NULL_ADDR) goto alloc; @@ -1461,7 +1459,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: - blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { @@ -2061,7 +2059,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -2090,7 +2088,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct page *page = dic->cpages[i]; block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); if (bio && !page_is_mergeable(sbi, bio, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fd73c67cb916..debf88009006 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2377,7 +2377,7 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t datablock_addr(struct inode *inode, +static inline block_t data_blkaddr(struct inode *inode, struct page *node_page, unsigned int offset) { struct f2fs_node *raw_node; @@ -2399,6 +2399,11 @@ static inline block_t datablock_addr(struct inode *inode, return le32_to_cpu(addr_array[base + offset]); } +static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) +{ + return data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node); +} + static inline int f2fs_test_bit(unsigned int nr, char *addr) { int mask; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8cedd97e5c64..021d345ddd54 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -454,8 +454,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), @@ -1118,8 +1117,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { - *blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + *blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(*blkaddr) && !f2fs_is_valid_blkaddr(sbi, *blkaddr, @@ -1208,8 +1206,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { - dn.data_blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + dn.data_blkaddr = f2fs_data_blkaddr(&dn); f2fs_truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { @@ -1385,8 +1382,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, int ret; for (; index < end; index++, dn->ofs_in_node++) { - if (datablock_addr(dn->inode, dn->node_page, - dn->ofs_in_node) == NULL_ADDR) + if (f2fs_data_blkaddr(dn) == NULL_ADDR) count++; } @@ -1397,8 +1393,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->ofs_in_node = ofs_in_node; for (index = start; index < end; index++, dn->ofs_in_node++) { - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); /* * f2fs_reserve_new_blocks will not guarantee entire block * allocation. diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 16780393023a..bc03deb04bb0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -637,7 +637,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } *nofs = ofs_of_node(node_page); - source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); + source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7b66a87db607..1e0a31af9248 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -803,8 +803,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->nid = nids[level]; dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); return 0; release_pages: diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f177e2749f19..00939ca2949d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -496,8 +496,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return 0; truncate_out: - if (datablock_addr(tdn.inode, tdn.node_page, - tdn.ofs_in_node) == blkaddr) + if (f2fs_data_blkaddr(&tdn) == blkaddr) f2fs_truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); @@ -560,8 +559,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, for (; start < end; start++, dn.ofs_in_node++) { block_t src, dest; - src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + src = f2fs_data_blkaddr(&dn); + dest = data_blkaddr(dn.inode, page, dn.ofs_in_node); if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { -- GitLab From 4552aa82f95f1e03c70ceb4806272206b59ede64 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:11 +0800 Subject: [PATCH 0740/1278] f2fs: clean up parameter of macro XATTR_SIZE() Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 10 ++++------ fs/f2fs/xattr.h | 3 ++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 6794a13064be..d4800df6302d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -312,12 +312,12 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!xnid && !inline_size) return -ENODATA; - *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE; + *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE; txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; - last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode); + last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(inode); /* read from inline xattr */ if (inline_size) { @@ -539,7 +539,6 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); - nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; void *base_addr, *last_base_addr; int error = 0; @@ -551,7 +550,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; - last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + last_base_addr = (void *)base_addr + XATTR_SIZE(inode); list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = @@ -609,7 +608,6 @@ static int __f2fs_setxattr(struct inode *inode, int index, { struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; - nid_t xnid = F2FS_I(inode)->i_xattr_nid; int found, newsize; size_t len; __u32 new_hsize; @@ -633,7 +631,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) return error; - last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + last_base_addr = (void *)base_addr + XATTR_SIZE(inode); /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index de0c600b9cab..574beea46494 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -73,7 +73,8 @@ struct f2fs_xattr_entry { entry = XATTR_NEXT_ENTRY(entry)) #define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) #define XATTR_PADDING_SIZE (sizeof(__u32)) -#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \ +#define XATTR_SIZE(i) ((F2FS_I(i)->i_xattr_nid ? \ + VALID_XATTR_BLOCK_SIZE : 0) + \ (inline_xattr_size(i))) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) -- GitLab From 139d4f6df027a8119e91ba8f0d7d2496861abca3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:45:11 +0800 Subject: [PATCH 0741/1278] f2fs: fix to show norecovery mount option Previously, 'norecovery' mount option will be shown as 'disable_roll_forward', fix to show original option name correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index debf88009006..c7dd5ba32d2e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -101,6 +101,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 +#define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2771bae2a5de..8d3b06179351 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -446,7 +446,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_norecovery: /* this option mounts f2fs with ro */ - set_opt(sbi, DISABLE_ROLL_FORWARD); + set_opt(sbi, NORECOVERY); if (!f2fs_readonly(sb)) return -EINVAL; break; @@ -1455,6 +1455,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) } if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, NORECOVERY)) + seq_puts(seq, ",norecovery"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); else @@ -3627,7 +3629,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto reset_checkpoint; /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + if (!test_opt(sbi, DISABLE_ROLL_FORWARD) && + !test_opt(sbi, NORECOVERY)) { /* * mount should be failed, when device has readonly mode, and * previous checkpoint was not done by clean system shutdown. -- GitLab From 8c021bf25a38df07bc358cefca1a869590dbdc28 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:12 +0800 Subject: [PATCH 0742/1278] f2fs: clean up lfs/adaptive mount option This patch removes F2FS_MOUNT_ADAPTIVE and F2FS_MOUNT_LFS mount options, and add F2FS_OPTION.fs_mode with below two status to indicate filesystem mode. enum { FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_LFS, /* use lfs allocation only */ }; It can enhance code readability and fs mode's scalability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 27 ++++++++++----------------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 12 ++++++------ fs/f2fs/super.c | 16 ++++++++-------- 6 files changed, 30 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 52cd935488e0..9e9d5247d6a0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -443,7 +443,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (test_opt(sbi, LFS) && current->plug) + if (f2fs_lfs_mode(sbi) && current->plug) blk_finish_plug(current->plug); if (F2FS_IO_ALIGNED(sbi)) @@ -1414,7 +1414,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1469,7 +1469,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); if (err) @@ -2389,7 +2389,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return true; if (S_ISDIR(inode->i_mode)) return true; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7dd5ba32d2e..da4b94b826d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -92,8 +92,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 -#define F2FS_MOUNT_ADAPTIVE 0x00020000 -#define F2FS_MOUNT_LFS 0x00040000 #define F2FS_MOUNT_USRQUOTA 0x00080000 #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 @@ -140,6 +138,7 @@ struct f2fs_mount_info { int whint_mode; int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ + int fs_mode; /* fs mode: LFS or ADAPTIVE */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -1171,6 +1170,11 @@ enum { GC_URGENT, }; +enum { + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ +}; + enum { WHINT_MODE_OFF, /* not pass down write hints */ WHINT_MODE_USER, /* try to pass down hints given by users */ @@ -3907,20 +3911,9 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) return false; } - -static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) { - clear_opt(sbi, ADAPTIVE); - clear_opt(sbi, LFS); - - switch (mt) { - case F2FS_MOUNT_ADAPTIVE: - set_opt(sbi, ADAPTIVE); - break; - case F2FS_MOUNT_LFS: - set_opt(sbi, LFS); - break; - } + return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } static inline bool f2fs_may_encrypt(struct inode *inode) @@ -3975,7 +3968,7 @@ static inline int allow_outplace_dio(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - return (test_opt(sbi, LFS) && (rw == WRITE) && + return (f2fs_lfs_mode(sbi) && (rw == WRITE) && !block_unaligned_IO(inode, iocb, iter)); } @@ -3997,7 +3990,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, */ if (f2fs_sb_has_blkzoned(sbi)) return true; - if (test_opt(sbi, LFS) && (rw == WRITE)) { + if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { if (block_unaligned_IO(inode, iocb, iter)) return true; if (F2FS_IO_ALIGNED(sbi)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 021d345ddd54..4f82a70d2b5f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1128,7 +1128,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { - if (test_opt(sbi, LFS)) { + if (f2fs_lfs_mode(sbi)) { f2fs_put_dnode(&dn); return -EOPNOTSUPP; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bc03deb04bb0..95f40b7262be 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -765,7 +765,7 @@ static int move_data_block(struct inode *inode, block_t bidx, struct page *page, *mpage; block_t newaddr; int err = 0; - bool lfs_mode = test_opt(fio.sbi, LFS); + bool lfs_mode = f2fs_lfs_mode(fio.sbi); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 43990d4eca5f..cbd55a96c3f0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -172,7 +172,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return false; if (sbi->gc_mode == GC_URGENT) return true; @@ -1939,7 +1939,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1971,7 +1971,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { + if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2829,7 +2829,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3192,7 +3192,7 @@ static void update_device_state(struct f2fs_io_info *fio) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) down_read(&fio->sbi->io_order_lock); @@ -4420,7 +4420,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; - if (!test_opt(sbi, LFS)) + if (!f2fs_lfs_mode(sbi)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8d3b06179351..2377ee0eb904 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -600,10 +600,10 @@ static int parse_options(struct super_block *sb, char *options) kvfree(name); return -EINVAL; } - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (strlen(name) == 3 && !strncmp(name, "lfs", 3)) { - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { kvfree(name); return -EINVAL; @@ -904,7 +904,7 @@ static int parse_options(struct super_block *sb, char *options) } #endif - if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { + if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", F2FS_IO_SIZE_KB(sbi)); return -EINVAL; @@ -934,7 +934,7 @@ static int parse_options(struct super_block *sb, char *options) } } - if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -1508,9 +1508,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",data_flush"); seq_puts(seq, ",mode="); - if (test_opt(sbi, ADAPTIVE)) + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_ADAPTIVE) seq_puts(seq, "adaptive"); - else if (test_opt(sbi, LFS)) + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) @@ -1597,9 +1597,9 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi)) - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; else - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); -- GitLab From 6ea0030474263a125cc7fb08bdf984d71a2524dd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:13 +0800 Subject: [PATCH 0743/1278] f2fs: clean up bggc mount option There are three status for background gc: on, off and sync, it's a little bit confused to use test_opt(BG_GC) and test_opt(FORCE_FG_GC) combinations to indicate status of background gc. So let's remove F2FS_MOUNT_BG_GC and F2FS_MOUNT_FORCE_FG_GC mount options, and add F2FS_OPTION().bggc_mode with below three status to clean up codes and enhance bggc mode's scalability. enum { BGGC_MODE_ON, /* background gc is on */ BGGC_MODE_OFF, /* background gc is off */ BGGC_MODE_SYNC, /* * background gc is on, migrating blocks * like foreground gc */ }; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++++++-- fs/f2fs/gc.c | 6 +++++- fs/f2fs/super.c | 29 +++++++++++++---------------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da4b94b826d4..2b3295fd34f7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -75,7 +75,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; /* * For mount options */ -#define F2FS_MOUNT_BG_GC 0x00000001 #define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 #define F2FS_MOUNT_DISCARD 0x00000004 #define F2FS_MOUNT_NOHEAP 0x00000008 @@ -89,7 +88,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 -#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_USRQUOTA 0x00080000 @@ -139,6 +137,7 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ + int bggc_mode; /* bggc mode: off, on or sync */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -1170,6 +1169,15 @@ enum { GC_URGENT, }; +enum { + BGGC_MODE_ON, /* background gc is on */ + BGGC_MODE_OFF, /* background gc is off */ + BGGC_MODE_SYNC, /* + * background gc is on, migrating blocks + * like foreground gc + */ +}; + enum { FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_LFS, /* use lfs allocation only */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 95f40b7262be..5851500b0790 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,6 +31,8 @@ static int gc_thread_func(void *data) set_freezable(); do { + bool sync_mode; + wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || gc_th->gc_wake, @@ -101,8 +103,10 @@ static int gc_thread_func(void *data) do_gc: stat_inc_bggc_count(sbi->stat_info); + sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2377ee0eb904..c2620c73fe41 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -427,14 +427,11 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; if (strlen(name) == 2 && !strncmp(name, "on", 2)) { - set_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { - clear_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { - set_opt(sbi, BG_GC); - set_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { kvfree(name); return -EINVAL; @@ -1445,14 +1442,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) { - if (test_opt(sbi, FORCE_FG_GC)) - seq_printf(seq, ",background_gc=%s", "sync"); - else - seq_printf(seq, ",background_gc=%s", "on"); - } else { + if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) + seq_printf(seq, ",background_gc=%s", "sync"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_ON) + seq_printf(seq, ",background_gc=%s", "on"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); - } + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1584,8 +1580,8 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -1791,7 +1787,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if ((*flags & MS_RDONLY) || + F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3685,7 +3682,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { + if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) -- GitLab From 649229ca4b63e4fe013c6784c62afd28561e0603 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Feb 2020 13:27:09 -0800 Subject: [PATCH 0744/1278] f2fs: add migration count iff migration happens If first segment is empty and migration_granularity is 1, we can't move this at all. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5851500b0790..c69f77132854 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1240,12 +1240,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); + migrated++; freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; - migrated++; if (__is_large_section(sbi) && segno + 1 < end_segno) sbi->next_victim_seg[gc_type] = segno + 1; -- GitLab From 033c2a0b7cddd3cb85ef5a98046989032ba2bd4d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Feb 2020 13:23:28 -0800 Subject: [PATCH 0745/1278] f2fs: skip GC when section is full This fixes skipping GC when segment is full in large section. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c69f77132854..a87baa379678 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1025,8 +1025,8 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, * race condition along with SSR block allocation. */ if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || - get_valid_blocks(sbi, segno, false) == - sbi->blocks_per_seg) + get_valid_blocks(sbi, segno, true) == + BLKS_PER_SEC(sbi)) return submitted; if (check_valid_map(sbi, segno, off) == 0) -- GitLab From ae3d1fa49d4e1c83bcf858b53116b43cf6b91f55 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Feb 2020 17:45:44 +0800 Subject: [PATCH 0746/1278] f2fs: introduce DEFAULT_IO_TIMEOUT As Geert Uytterhoeven reported: for parameter HZ/50 in congestion_wait(BLK_RW_ASYNC, HZ/50); On some platforms, HZ can be less than 50, then unexpected 0 timeout jiffies will be set in congestion_wait(). This patch introduces a macro DEFAULT_IO_TIMEOUT to wrap a determinate value with msecs_to_jiffies(20) to instead HZ/50 to avoid such issue. Quoted from Geert Uytterhoeven: "A timeout of HZ means 1 second. HZ/50 means 20 ms, but has the risk of being zero, if HZ < 50. If you want to use a timeout of 20 ms, you best use msecs_to_jiffies(20), as that takes care of the special cases, and never returns 0." Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/compress.c | 3 ++- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 3 ++- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 5 +++-- fs/f2fs/segment.c | 10 ++++++---- fs/f2fs/super.c | 6 ++++-- 10 files changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0d74d6d23f81..2a1a3e0b1b78 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1260,7 +1260,7 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) if (unlikely(f2fs_cp_error(sbi))) break; - io_schedule_timeout(HZ/50); + io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6fbccc0fa9c8..5551f13cc8c5 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -993,7 +993,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, } else if (ret == -EAGAIN) { ret = 0; cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); lock_page(cc->rpages[i]); clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9e9d5247d6a0..d18a23294295 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2318,7 +2318,7 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } @@ -2909,7 +2909,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + DEFAULT_IO_TIMEOUT); goto retry_write; } goto next; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2b3295fd34f7..907e758a7800 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -556,6 +556,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* congestion wait timeout value, default: 20ms */ +#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) + /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a87baa379678..4b17c336c053 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -977,7 +977,8 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, if (err) { clear_cold_data(page); if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } if (is_dirty) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 34b9e1fe1625..2c3078f4381b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -535,7 +535,7 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino) inode = f2fs_iget(sb, ino); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1e0a31af9248..d33382296352 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2599,7 +2599,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) retry: ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); if (!ipage) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 00939ca2949d..5288a6f71ca2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -534,7 +534,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry_dn; } goto out; @@ -617,7 +617,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err = check_index_in_prev_nodes(sbi, dest, &dn); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry_prev; } goto err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cbd55a96c3f0..f0cf0383fd2e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -245,7 +245,8 @@ static int __revoke_inmem_pages(struct inode *inode, LOOKUP_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -312,7 +313,7 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) skip: iput(inode); } - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); cond_resched(); if (gc_failure) { if (++looped >= count) @@ -415,7 +416,8 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -2800,7 +2802,7 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto next; } skip: diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c2620c73fe41..db725c7f089c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1893,7 +1893,8 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); if (IS_ERR(page)) { if (PTR_ERR(page) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto repeat; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); @@ -1947,7 +1948,8 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, &page, NULL); if (unlikely(err)) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); -- GitLab From 7e3f738ff6fda9ff8a1c21e0d70c90e75443ba0b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Feb 2020 17:46:20 +0800 Subject: [PATCH 0747/1278] f2fs: add prefix for f2fs slab cache name In order to avoid polluting global slab cache namespace. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 8 ++++---- fs/f2fs/segment.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d18a23294295..1cd2aec8bd14 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3847,7 +3847,7 @@ void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) int __init f2fs_init_bio_entry_cache(void) { - bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", sizeof(struct bio_entry)); if (!bio_entry_slab) return -ENOMEM; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d33382296352..4dad40785da3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3190,22 +3190,22 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) int __init f2fs_create_node_manager_caches(void) { - nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry", sizeof(struct nat_entry)); if (!nat_entry_slab) goto fail; - free_nid_slab = f2fs_kmem_cache_create("free_nid", + free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid", sizeof(struct free_nid)); if (!free_nid_slab) goto destroy_nat_entry; - nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", + nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; - fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry", sizeof(struct fsync_node_entry)); if (!fsync_node_entry_slab) goto destroy_nat_entry_set; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f0cf0383fd2e..776636faba67 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4574,22 +4574,22 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) int __init f2fs_create_segment_manager_caches(void) { - discard_entry_slab = f2fs_kmem_cache_create("discard_entry", + discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", sizeof(struct discard_entry)); if (!discard_entry_slab) goto fail; - discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd", + discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", sizeof(struct discard_cmd)); if (!discard_cmd_slab) goto destroy_discard_entry; - sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", + sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destroy_discard_cmd; - inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", + inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry", sizeof(struct inmem_pages)); if (!inmem_entry_slab) goto destroy_sit_entry_set; -- GitLab From 3d0d455670d914fa351e04787d700d93ca930efe Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Feb 2020 18:21:35 +0800 Subject: [PATCH 0748/1278] f2fs: fix to avoid triggering IO in write path If we are in write IO path, we need to avoid using GFP_KERNEL. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 24 +++++++++++++----------- fs/f2fs/f2fs.h | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5551f13cc8c5..9aade96166af 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -661,7 +661,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false); + &last_block_in_bio, false, true); f2fs_destroy_compress_ctx(cc); if (ret) goto release_pages; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1cd2aec8bd14..c2d4a2cf784c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -926,14 +926,15 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, unsigned op_flag, - pgoff_t first_idx) + pgoff_t first_idx, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), + for_write); if (!bio) return ERR_PTR(-ENOMEM); f2fs_target_device(sbi, blkaddr, bio); @@ -968,12 +969,12 @@ static void f2fs_release_read_bio(struct bio *bio) /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, - block_t blkaddr) + block_t blkaddr, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1159,7 +1160,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write); if (err) goto put_err; return page; @@ -1972,7 +1973,8 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, page->index); + is_readahead ? REQ_RAHEAD : 0, page->index, + false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2007,7 +2009,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead) + bool is_readahead, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2101,7 +2103,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, is_readahead ? REQ_RAHEAD : 0, - page->index); + page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2202,7 +2204,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead); + is_readahead, false); f2fs_destroy_compress_ctx(&cc); if (ret) goto set_error_page; @@ -2245,7 +2247,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead); + is_readahead, false); f2fs_destroy_compress_ctx(&cc); } } @@ -3275,7 +3277,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, err = -EFSCORRUPTED; goto fail; } - err = f2fs_submit_page_read(inode, page, blkaddr); + err = f2fs_submit_page_read(inode, page, blkaddr, true); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 907e758a7800..4338e69c10e5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3793,7 +3793,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead); + bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_free_dic(struct decompress_io_ctx *dic); void f2fs_decompress_end_io(struct page **rpages, -- GitLab From e92f66d6c2395b1629c61884cf3ecfff72d4b538 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 21 Feb 2020 18:09:21 +0800 Subject: [PATCH 0749/1278] f2fs: introduce F2FS_IOC_GET_COMPRESS_BLOCKS With this newly introduced interface, user can get block number compression saved in target inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4338e69c10e5..fcdc883cde78 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -427,6 +427,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) +#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4f82a70d2b5f..9440fa332794 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3338,6 +3338,21 @@ static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) return fsverity_ioctl_measure(filp, (void __user *)arg); } +static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u64 blocks; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + blocks = F2FS_I(inode)->i_compr_blocks; + return put_user(blocks, (u64 __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3412,6 +3427,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_enable_verity(filp, arg); case FS_IOC_MEASURE_VERITY: return f2fs_ioc_measure_verity(filp, arg); + case F2FS_IOC_GET_COMPRESS_BLOCKS: + return f2fs_get_compress_blocks(filp, arg); default: return -ENOTTY; } @@ -3566,6 +3583,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_RESIZE_FS: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: + case F2FS_IOC_GET_COMPRESS_BLOCKS: break; default: return -ENOIOCTLCMD; -- GitLab From 799f1afbcc6b784a3a0f94a318018a7ad3f4507f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Feb 2020 18:21:34 +0800 Subject: [PATCH 0750/1278] f2fs: avoid __GFP_NOFAIL in f2fs_bio_alloc __f2fs_bio_alloc() won't fail due to memory pool backend, remove unneeded __GFP_NOFAIL flag in __f2fs_bio_alloc(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++-------- fs/f2fs/f2fs.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c2d4a2cf784c..49041defca99 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -55,17 +55,13 @@ static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask, return bio_alloc_bioset(gfp_mask, nr_iovecs, f2fs_bioset); } -struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail) +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio) { - struct bio *bio; - - if (no_fail) { + if (noio) { /* No failure on bio allocation */ - bio = __f2fs_bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = __f2fs_bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + return __f2fs_bio_alloc(GFP_NOIO, npages); } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); return NULL; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fcdc883cde78..f7e339bbed6d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3338,7 +3338,7 @@ void f2fs_destroy_checkpoint_caches(void); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_bio(struct f2fs_sb_info *sbi, -- GitLab From 09891a6659ba50479794f23dc65653d37d4e8687 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:15 +0800 Subject: [PATCH 0751/1278] f2fs: fix to show tracepoint correctly Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9440fa332794..fefcd7f2a023 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3456,8 +3456,10 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - if (!f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) { -- GitLab From 7cb6607655ab0cd15ba607822ac0773353fe5007 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Feb 2020 13:28:45 -0800 Subject: [PATCH 0752/1278] f2fs: skip migration only when BG_GC is called FG_GC needs to move entire section more quickly. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4b17c336c053..0fa91b657340 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1211,7 +1211,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (get_valid_blocks(sbi, segno, false) == 0) goto freed; - if (__is_large_section(sbi) && + if (gc_type == BG_GC && __is_large_section(sbi) && migrated >= sbi->migration_granularity) goto skip; if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) -- GitLab From 01d02b9acd69c8e3a5e19ef92d00967a329363a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Feb 2020 18:17:10 +0800 Subject: [PATCH 0753/1278] f2fs: use kmem_cache pool during inline xattr lookups It's been observed that kzalloc() on lookup_all_xattrs() are called millions of times on Android, quickly becoming the top abuser of slub memory allocator. Use a dedicated kmem cache pool for xattr lookups to mitigate this. Signed-off-by: Park Ju Hyung Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/super.c | 10 ++++++++- fs/f2fs/xattr.c | 54 ++++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/xattr.h | 4 ++++ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f7e339bbed6d..ee06b69a3ea1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1485,6 +1485,9 @@ struct f2fs_sb_info { __u32 s_chksum_seed; struct workqueue_struct *post_read_wq; /* post read workqueue */ + + struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ + unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ }; struct f2fs_private_dio { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index db725c7f089c..28909e5d2aab 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1210,6 +1210,7 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->raw_super); destroy_device_list(sbi); + f2fs_destroy_xattr_caches(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -3487,12 +3488,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } } + /* init per sbi slab cache */ + err = f2fs_init_xattr_caches(sbi); + if (err) + goto free_io_dummy; + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_io_dummy; + goto free_xattr_cache; } err = f2fs_get_valid_checkpoint(sbi); @@ -3753,6 +3759,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); sbi->meta_inode = NULL; +free_xattr_cache: + f2fs_destroy_xattr_caches(sbi); free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index d4800df6302d..4f6582ef7ee3 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -23,6 +23,25 @@ #include "xattr.h" #include "segment.h" +static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) +{ + if (likely(size == sbi->inline_xattr_slab_size)) { + *is_inline = true; + return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS); + } + *is_inline = false; + return f2fs_kzalloc(sbi, size, GFP_NOFS); +} + +static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr, + bool is_inline) +{ + if (is_inline) + kmem_cache_free(sbi->inline_xattr_slab, xattr_addr); + else + kvfree(xattr_addr); +} + static int f2fs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) @@ -301,7 +320,8 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr, int *base_size) + void **base_addr, int *base_size, + bool *is_inline) { void *cur_addr, *txattr_addr, *last_txattr_addr; void *last_addr = NULL; @@ -313,7 +333,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, return -ENODATA; *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); + txattr_addr = xattr_alloc(F2FS_I_SB(inode), *base_size, is_inline); if (!txattr_addr) return -ENOMEM; @@ -362,7 +382,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *base_addr = txattr_addr; return 0; out: - kvfree(txattr_addr); + xattr_free(F2FS_I_SB(inode), txattr_addr, *is_inline); return err; } @@ -499,6 +519,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, unsigned int size, len; void *base_addr = NULL; int base_size; + bool is_inline; if (name == NULL) return -EINVAL; @@ -509,7 +530,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr, &base_size); + &entry, &base_addr, &base_size, &is_inline); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -532,7 +553,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, } error = size; out: - kvfree(base_addr); + xattr_free(F2FS_I_SB(inode), base_addr, is_inline); return error; } @@ -764,3 +785,26 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_update_time(sbi, REQ_TIME); return err; } + +int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + char slab_name[32]; + + sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev)); + + sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size * + sizeof(__le32) + XATTR_PADDING_SIZE; + + sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name, + sbi->inline_xattr_slab_size); + if (!sbi->inline_xattr_slab) + return -ENOMEM; + + return 0; +} + +void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) +{ + kmem_cache_destroy(sbi->inline_xattr_slab); +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 574beea46494..0153b4c9ef21 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -131,6 +131,8 @@ extern int f2fs_setxattr(struct inode *, int, const char *, extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +extern int f2fs_init_xattr_caches(struct f2fs_sb_info *); +extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL @@ -151,6 +153,8 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, { return -EOPNOTSUPP; } +static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } +static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY -- GitLab From 2874ae2ba39540bfa03719755e6f6e636c24b96a Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 3 Mar 2020 19:59:25 +0530 Subject: [PATCH 0754/1278] f2fs: Fix mount failure due to SPO after a successful online resize FS Even though online resize is successfully done, a SPO immediately after resize, still causes below error in the next mount. [ 11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856 [ 11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint This is because after FS metadata is updated in update_fs_metadata() if the SBI_IS_DIRTY is not dirty, then CP will not be done to reflect the new user_block_count. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0fa91b657340..adb140cc9edc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1578,11 +1578,17 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) goto out; } + mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, -secs); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->cp_mutex); + err = f2fs_sync_fs(sbi->sb, 1); if (err) { + mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, secs); + mutex_unlock(&sbi->cp_mutex); update_sb_metadata(sbi, secs); f2fs_commit_super(sbi, false); } -- GitLab From 27de311b33bf49112f1d53c3f5f9397852b3e269 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 3 Mar 2020 19:59:26 +0530 Subject: [PATCH 0755/1278] f2fs: Add a new CP flag to help fsck fix resize SPO issues Add and set a new CP flag CP_RESIZEFS_FLAG during online resize FS to help fsck fix the metadata mismatch that may happen due to SPO during resize, where SB got updated but CP data couldn't be written yet. fsck errors - Info: CKPT version = 6ed7bccb Wrong user_block_count(2233856) [f2fs_do_mount:3365] Checkpoint is polluted Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++-- include/linux/f2fs_fs.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2a1a3e0b1b78..e5669185a8bd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1298,10 +1298,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || - is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) + __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG); + else + __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); else diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac3f4888b3df..3c383ddd92dd 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -125,6 +125,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_RESIZEFS_FLAG 0x00004000 #define CP_DISABLED_QUICK_FLAG 0x00002000 #define CP_DISABLED_FLAG 0x00001000 #define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 -- GitLab From 282cf1967137ccb05927324831c95714caab7972 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 20:09:25 +0800 Subject: [PATCH 0756/1278] f2fs: fix to update f2fs_super_block fields under sb_lock Fields in struct f2fs_super_block should be updated under coverage of sb_lock, fix to adjust update_sb_metadata() for that rule. Fixes: 04f0b2eaa3b3 ("f2fs: ioctl for removing a range from F2FS") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index adb140cc9edc..9f6d2685ddda 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1442,12 +1442,19 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) { struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); - int section_count = le32_to_cpu(raw_sb->section_count); - int segment_count = le32_to_cpu(raw_sb->segment_count); - int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); - long long block_count = le64_to_cpu(raw_sb->block_count); + int section_count; + int segment_count; + int segment_count_main; + long long block_count; int segs = secs * sbi->segs_per_sec; + down_write(&sbi->sb_lock); + + section_count = le32_to_cpu(raw_sb->section_count); + segment_count = le32_to_cpu(raw_sb->segment_count); + segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + block_count = le64_to_cpu(raw_sb->block_count); + raw_sb->section_count = cpu_to_le32(section_count + secs); raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); @@ -1461,6 +1468,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->devs[last_dev].total_segments = cpu_to_le32(dev_segs + segs); } + + up_write(&sbi->sb_lock); } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) -- GitLab From e6e13f2c44d7c018126e9860cfc631f23de0b290 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 9 Mar 2020 13:10:08 -0500 Subject: [PATCH 0757/1278] f2fs: xattr.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 0153b4c9ef21..e471be77f8f0 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -49,7 +49,7 @@ struct f2fs_xattr_entry { __u8 e_name_index; __u8 e_name_len; __le16 e_value_size; /* size of attribute value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; #define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) -- GitLab From c586bdff9a8da4c183842ce2db78e808847f73db Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 12 Mar 2020 10:45:29 +0800 Subject: [PATCH 0758/1278] f2fs: fix to account compressed blocks in f2fs_compressed_blocks() por_fsstress reports inconsistent status in orphan inode, the root cause of this is in f2fs_write_raw_pages() we decrease i_compr_blocks incorrectly due to wrong calculation in f2fs_compressed_blocks(). So this patch exposes below two functions based on __f2fs_cluster_blocks: - f2fs_compressed_blocks: get count of compressed blocks in compressed cluster - f2fs_cluster_blocks: get count of valid blocks (including reserved blocks) in compressed cluster. Then use f2fs_compress_blocks() to get correct compressed blocks count in f2fs_write_raw_pages(). sanity_check_inode: inode (ino=ad80) hash inconsistent i_compr_blocks:2, i_blocks:1, run fsck to fix Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 9aade96166af..1101fce37e22 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -540,8 +540,7 @@ static bool __cluster_may_compress(struct compress_ctx *cc) return true; } -/* return # of compressed block addresses */ -static int f2fs_compressed_blocks(struct compress_ctx *cc) +static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) { struct dnode_of_data dn; int ret; @@ -564,8 +563,13 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); - if (blkaddr != NULL_ADDR) - ret++; + if (compr) { + if (__is_valid_data_blkaddr(blkaddr)) + ret++; + } else { + if (blkaddr != NULL_ADDR) + ret++; + } } } fail: @@ -573,6 +577,18 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) return ret; } +/* return # of compressed blocks in compressed cluster */ +static int f2fs_compressed_blocks(struct compress_ctx *cc) +{ + return __f2fs_cluster_blocks(cc, true); +} + +/* return # of valid blocks in compressed cluster */ +static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) +{ + return __f2fs_cluster_blocks(cc, false); +} + int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { struct compress_ctx cc = { @@ -582,7 +598,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, }; - return f2fs_compressed_blocks(&cc); + return f2fs_cluster_blocks(&cc, false); } static bool cluster_may_compress(struct compress_ctx *cc) @@ -631,7 +647,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, bool prealloc; retry: - ret = f2fs_compressed_blocks(cc); + ret = f2fs_cluster_blocks(cc, false); if (ret <= 0) return ret; -- GitLab From 8ae58b384a4e9044c03eac2bb75211d4c601a504 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 18 Mar 2020 19:40:45 +0800 Subject: [PATCH 0759/1278] f2fs: don't mark compressed inode dirty during f2fs_iget() - f2fs_iget - do_read_inode - set_inode_flag(, FI_COMPRESSED_FILE) - __mark_inode_dirty_flag(, true) It's unnecessary, so let's just mark compressed inode dirty while compressed inode conversion. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ee06b69a3ea1..8e2f1b6eca33 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2572,7 +2572,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: - case FI_COMPRESSED_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -3834,6 +3833,7 @@ static inline void set_compress_context(struct inode *inode) F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline u64 f2fs_disable_compressed_file(struct inode *inode) @@ -3850,6 +3850,7 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } -- GitLab From 4aecef2da102a16a7014c6eac00be47fe8cd19a3 Mon Sep 17 00:00:00 2001 From: DongDongJu Date: Fri, 20 Mar 2020 15:01:32 +0900 Subject: [PATCH 0760/1278] f2fs: delete DIO read lock This lock can be a contention with multi 4k random read IO with single inode. example) fio --output=test --name=test --numjobs=60 --filename=/media/samsung960pro/file_test --rw=randread --bs=4k --direct=1 --time_based --runtime=7 --ioengine=libaio --iodepth=256 --group_reporting --size=10G With this commit, it remove that possible lock contention. Signed-off-by: Dongjoo Seo Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 49041defca99..494c9801179a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3454,7 +3454,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, rw == WRITE ? get_data_block_dio_write : get_data_block_dio, NULL, f2fs_dio_submit_bio, - DIO_LOCKING | DIO_SKIP_HOLES); + rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES : + DIO_SKIP_HOLES); if (do_opu) up_read(&fi->i_gc_rwsem[READ]); -- GitLab From 11fa712e36822835127470d707b252115a95a52e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 20 Mar 2020 18:14:31 +0800 Subject: [PATCH 0761/1278] f2fs: fix potential deadlock on compressed quota file generic/232 reports below deadlock: fsstress D 0 96980 96969 0x00084000 Call Trace: schedule+0x4a/0xb0 io_schedule+0x12/0x40 __lock_page+0x127/0x1d0 pagecache_get_page+0x1d8/0x250 prepare_compress_overwrite+0xe0/0x490 [f2fs] f2fs_prepare_compress_overwrite+0x5d/0x80 [f2fs] f2fs_write_begin+0x833/0xb90 [f2fs] f2fs_quota_write+0x145/0x1e0 [f2fs] write_blk+0x36/0x80 [quota_tree] do_insert_tree+0x2ac/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] qtree_write_dquot+0x70/0x190 [quota_tree] v2_write_dquot+0x43/0x90 [quota_v2] dquot_acquire+0x77/0x100 f2fs_dquot_acquire+0x2f/0x60 [f2fs] dqget+0x310/0x450 dquot_transfer+0xb2/0x120 f2fs_setattr+0x11a/0x4a0 [f2fs] notify_change+0x349/0x480 chown_common+0x168/0x1c0 do_fchownat+0xbc/0xf0 __x64_sys_lchown+0x21/0x30 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 task PC stack pid father kworker/u256:0 D 0 103444 2 0x80084000 Workqueue: writeback wb_workfn (flush-251:1) Call Trace: schedule+0x4a/0xb0 schedule_timeout+0x15e/0x2f0 io_schedule_timeout+0x19/0x40 congestion_wait+0x7e/0x120 f2fs_write_multi_pages+0x12a/0x840 [f2fs] f2fs_write_cache_pages+0x48f/0x790 [f2fs] f2fs_write_data_pages+0x2db/0x330 [f2fs] do_writepages+0x1a/0x60 __writeback_single_inode+0x3d/0x340 writeback_sb_inodes+0x225/0x4a0 wb_writeback+0xf7/0x320 wb_workfn+0xba/0x470 process_one_work+0x16c/0x3f0 worker_thread+0x4c/0x440 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 fsstress D 0 5277 5266 0x00084000 Call Trace: schedule+0x4a/0xb0 rwsem_down_write_slowpath+0x29d/0x540 block_operations+0x105/0x360 [f2fs] f2fs_write_checkpoint+0x101/0x1010 [f2fs] f2fs_sync_fs+0xa8/0x130 [f2fs] f2fs_do_sync_file+0x1ad/0x890 [f2fs] do_fsync+0x38/0x60 __x64_sys_fdatasync+0x13/0x20 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The root cause is there is potential deadlock between quota data update and writeback. Kworker Thread B Thread C - f2fs_write_cache_pages - lock whole cluster --- A - f2fs_write_multi_pages - f2fs_write_raw_pages - f2fs_write_single_data_page - f2fs_do_write_data_page - f2fs_setattr - f2fs_lock_op --- B - f2fs_write_checkpoint - block_operations - f2fs_lock_all --- B - dquot_transfer - f2fs_quota_write - f2fs_prepare_compress_overwrite - pagecache_get_page --- A - f2fs_trylock_op failed --- B - congestion_wait - goto rewrite To fix this issue, during quota file writeback, just redirty all pages left in cluster rather holding pages' lock in cluster and looping retrying lock cp_rwsem. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1101fce37e22..962ea091f51f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1007,6 +1007,15 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + /* + * for quota file, just redirty left pages to + * avoid deadlock caused by cluster update race + * from foreground operation. + */ + if (IS_NOQUOTA(cc->inode)) { + err = 0; + goto out_err; + } ret = 0; cond_resched(); congestion_wait(BLK_RW_ASYNC, @@ -1016,16 +1025,12 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, goto retry_write; } err = ret; - goto out_fail; + goto out_err; } *submitted += _submitted; } return 0; - -out_fail: - /* TODO: revoke partially updated block addresses */ - BUG_ON(compr_blocks); out_err: for (++i; i < cc->cluster_size; i++) { if (!cc->rpages[i]) -- GitLab From 1c4cc523ad062e3cf5086d8d00b82bd15052b040 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 20 Mar 2020 18:17:54 +0800 Subject: [PATCH 0762/1278] f2fs: don't change inode status under page lock In order to shrink page lock coverage. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 331c90556a0f..7c843196b9eb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -850,12 +850,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, 0); set_page_dirty(page); - dir->i_ctime = dir->i_mtime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir, false); - - if (inode) - f2fs_drop_nlink(dir, inode); - if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_radix_tree_dirty_tag(page); @@ -867,6 +861,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_remove_dirty_inode(dir); } f2fs_put_page(page, 1); + + dir->i_ctime = dir->i_mtime = current_time(dir); + f2fs_mark_inode_dirty_sync(dir, false); + + if (inode) + f2fs_drop_nlink(dir, inode); } bool f2fs_empty_dir(struct inode *dir) -- GitLab From 1c134dc62afb5895e659b7e64f96ea828c2213ec Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:57:57 +0800 Subject: [PATCH 0763/1278] f2fs: fix to avoid potential deadlock We should always check F2FS_I(inode)->cp_task condition in prior to other conditions in __should_serialize_io() to avoid deadloop described in commit 040d2bb318d1 ("f2fs: fix to avoid deadloop if data_flush is on"), however we break this rule when we support compression, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 494c9801179a..c12b7212d251 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2963,15 +2963,17 @@ static int f2fs_write_cache_pages(struct address_space *mapping, static inline bool __should_serialize_io(struct inode *inode, struct writeback_control *wbc) { + /* to avoid deadlock in path of data flush */ + if (F2FS_I(inode)->cp_task) + return false; + if (!S_ISREG(inode->i_mode)) return false; - if (f2fs_compressed_file(inode)) - return true; if (IS_NOQUOTA(inode)) return false; - /* to avoid deadlock in path of data flush */ - if (F2FS_I(inode)->cp_task) - return false; + + if (f2fs_compressed_file(inode)) + return true; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) -- GitLab From 8de7832960bd0f025a8c71ecae014914fc530885 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Mar 2020 20:19:33 +0800 Subject: [PATCH 0764/1278] f2fs: clean up f2fs_may_encrypt() Merge below two conditions into f2fs_may_encrypt() for cleanup - IS_ENCRYPTED() - DUMMY_ENCRYPTION_ENABLED() Check IS_ENCRYPTED(inode) condition in f2fs_init_inode_metadata() is enough since we have already set encrypt flag in f2fs_new_inode(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 +--- fs/f2fs/f2fs.h | 13 +++++++++---- fs/f2fs/namei.c | 4 +--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7c843196b9eb..227bf48abdfd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -471,7 +471,6 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; - int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -498,8 +497,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if ((IS_ENCRYPTED(dir) || dummy_encrypt) && - f2fs_may_encrypt(inode)) { + if (IS_ENCRYPTED(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8e2f1b6eca33..c1d9381d15dd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3932,15 +3932,20 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } -static inline bool f2fs_may_encrypt(struct inode *inode) +static inline bool f2fs_may_encrypt(struct inode *dir, struct inode *inode) { #ifdef CONFIG_FS_ENCRYPTION + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); umode_t mode = inode->i_mode; - return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); -#else - return false; + /* + * If the directory encrypted or dummy encryption enabled, + * then we should encrypt the inode. + */ + if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) + return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); #endif + return false; } static inline bool f2fs_may_compress(struct inode *inode) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 5dbd23e26413..c8056a03a95a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -75,9 +75,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); - /* If the directory encrypted, then we should encrypt the inode. */ - if ((IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && - f2fs_may_encrypt(inode)) + if (f2fs_may_encrypt(dir, inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi)) { -- GitLab From e7259ee04c52579aed76276ceed3608f144d8acb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:58:00 +0800 Subject: [PATCH 0765/1278] f2fs: fix NULL pointer dereference in f2fs_write_begin() BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:f2fs_write_begin+0x823/0xb90 [f2fs] Call Trace: f2fs_quota_write+0x139/0x1d0 [f2fs] write_blk+0x36/0x80 [quota_tree] get_free_dqblk+0x42/0xa0 [quota_tree] do_insert_tree+0x235/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] qtree_write_dquot+0x70/0x190 [quota_tree] v2_write_dquot+0x43/0x90 [quota_v2] dquot_acquire+0x77/0x100 f2fs_dquot_acquire+0x2f/0x60 [f2fs] dqget+0x310/0x450 dquot_transfer+0x7e/0x120 f2fs_setattr+0x11a/0x4a0 [f2fs] notify_change+0x349/0x480 chown_common+0x168/0x1c0 do_fchownat+0xbc/0xf0 __x64_sys_fchownat+0x20/0x30 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Passing fsdata parameter to .write_{begin,end} in f2fs_quota_write(), so that if quota file is compressed one, we can avoid above NULL pointer dereference when updating quota content. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 28909e5d2aab..f0fa4d752497 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1937,6 +1937,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, int offset = off & (sb->s_blocksize - 1); size_t towrite = len; struct page *page; + void *fsdata = NULL; char *kaddr; int err = 0; int tocopy; @@ -1946,7 +1947,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, towrite); retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, - &page, NULL); + &page, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, @@ -1963,7 +1964,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, flush_dcache_page(page); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, - page, NULL); + page, fsdata); offset = 0; towrite -= tocopy; off += tocopy; -- GitLab From 990af856a220049833cb06df5c1abff4d736c113 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:57:58 +0800 Subject: [PATCH 0766/1278] f2fs: don't trigger data flush in foreground operation Data flush can generate heavy IO and cause long latency during flush, so it's not appropriate to trigger it in foreground operation. And also, we may face below potential deadlock during data flush: - f2fs_write_multi_pages - f2fs_write_raw_pages - f2fs_write_single_data_page - f2fs_balance_fs - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - filemap_fdatawrite -- stuck on flush same cluster Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c1d9381d15dd..a2afdb98296e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3235,7 +3235,7 @@ void f2fs_drop_inmem_pages(struct inode *inode); void f2fs_drop_inmem_page(struct inode *inode, struct page *page); int f2fs_commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg); int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9f6d2685ddda..a74a5c53e4fc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -113,7 +113,7 @@ static int gc_thread_func(void *data) prefree_segments(sbi), free_segments(sbi)); /* balancing f2fs's metadata periodically */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); next: sb_end_write(sbi->sb); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4dad40785da3..7ef52f5ed3e1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1976,7 +1976,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, goto skip_write; /* balancing f2fs's metadata in background */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); /* collect a number of dirty node pages and write together */ if (wbc->sync_mode != WB_SYNC_ALL && diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 776636faba67..1065b19afe17 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -496,7 +496,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, false); if (!f2fs_is_checkpoint_ready(sbi)) return; @@ -511,7 +511,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } } -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) { if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return; @@ -540,7 +540,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || f2fs_time_over(sbi, CP_TIME)) { - if (test_opt(sbi, DATA_FLUSH)) { + if (test_opt(sbi, DATA_FLUSH) && from_bg) { struct blk_plug plug; mutex_lock(&sbi->flush_lock); -- GitLab From 1279143275c86339d65b99c398aa4133614ca927 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Mar 2020 20:23:27 +0800 Subject: [PATCH 0767/1278] f2fs: don't call fscrypt_get_encryption_info() explicitly in f2fs_tmpfile() In f2fs_tmpfile(), parent inode's encryption info is only used when inheriting encryption context to its child inode, however, we have already called fscrypt_get_encryption_info() in fscrypt_inherit_context() to get the encryption info, so just removing unneeded one in f2fs_tmpfile(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c8056a03a95a..e23c00bcb1b1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -878,12 +878,6 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { - int err = fscrypt_get_encryption_info(dir); - if (err) - return err; - } - return __f2fs_tmpfile(dir, dentry, mode, NULL); } -- GitLab From 41075c0a58802342d2d60c4f3ff02d7b1d95e627 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Mar 2020 20:24:11 +0800 Subject: [PATCH 0768/1278] f2fs: fix to clear PG_error if fsverity failed In f2fs_decompress_end_io(), we should clear PG_error flag before page unlock, otherwise reread will fail due to the flag as described in commit fb7d70db305a ("f2fs: clear PageError on the read path"). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 962ea091f51f..054fc6abdec4 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1192,15 +1192,17 @@ void f2fs_decompress_end_io(struct page **rpages, if (!rpage) continue; - if (err || PageError(rpage)) { - ClearPageUptodate(rpage); - ClearPageError(rpage); - } else { - if (!verity || fsverity_verify_page(rpage)) - SetPageUptodate(rpage); - else - SetPageError(rpage); + if (err || PageError(rpage)) + goto clear_uptodate; + + if (!verity || fsverity_verify_page(rpage)) { + SetPageUptodate(rpage); + goto unlock; } +clear_uptodate: + ClearPageUptodate(rpage); + ClearPageError(rpage); +unlock: unlock_page(rpage); } } -- GitLab From c0a747e9e05e53ce00d83dbf8bca14e41ca1286e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2020 17:43:04 +0800 Subject: [PATCH 0769/1278] f2fs: fix NULL pointer dereference in f2fs_verity_work() If both compression and fsverity feature is on, generic/572 will report below NULL pointer dereference bug. BUG: kernel NULL pointer dereference, address: 0000000000000018 RIP: 0010:f2fs_verity_work+0x60/0x90 [f2fs] #PF: supervisor read access in kernel mode Workqueue: fsverity_read_queue f2fs_verity_work [f2fs] RIP: 0010:f2fs_verity_work+0x60/0x90 [f2fs] Call Trace: process_one_work+0x16c/0x3f0 worker_thread+0x4c/0x440 ? rescuer_thread+0x350/0x350 kthread+0xf8/0x130 ? kthread_unpark+0x70/0x70 ret_from_fork+0x35/0x40 There are two issue in f2fs_verity_work(): - it needs to traverse and verify all pages in bio. - if pages in bio belong to non-compressed cluster, accessing decompress IO context stored in page private will cause NULL pointer dereference. Fix them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ fs/f2fs/data.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 054fc6abdec4..60a4de93551c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -482,6 +482,8 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) out_vunmap_rbuf: vunmap(dic->rbuf); out_free_dic: + if (verity) + refcount_add(dic->nr_cpages - 1, &dic->ref); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c12b7212d251..0727f45d0d6e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -188,12 +188,37 @@ static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) static void f2fs_verify_bio(struct bio *bio) { - struct page *page = bio->bi_io_vec[0].bv_page; - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + struct decompress_io_ctx *dic; + + dic = (struct decompress_io_ctx *)page_private(page); + + if (dic) { + if (refcount_dec_not_one(&dic->ref)) + continue; + f2fs_verify_pages(dic->rpages, + dic->cluster_size); + f2fs_free_dic(dic); + continue; + } + + if (bio->bi_status || PageError(page)) + goto clear_uptodate; - f2fs_verify_pages(dic->rpages, dic->cluster_size); - f2fs_free_dic(dic); + if (fsverity_verify_page(page)) { + SetPageUptodate(page); + goto unlock; + } +clear_uptodate: + ClearPageUptodate(page); + ClearPageError(page); +unlock: + unlock_page(page); + } } #endif -- GitLab From 6b16e791a35bebacd6cd9c401807acc7bf3615a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2020 11:18:07 +0800 Subject: [PATCH 0770/1278] f2fs: fix potential .flags overflow on 32bit architecture f2fs_inode_info.flags is unsigned long variable, it has 32 bits in 32bit architecture, since we introduced FI_MMAP_FILE flag when we support data compression, we may access memory cross the border of .flags field, corrupting .i_sem field, result in below deadlock. To fix this issue, let's expand .flags as an array to grab enough space to store new flags. Call Trace: __schedule+0x8d0/0x13fc ? mark_held_locks+0xac/0x100 schedule+0xcc/0x260 rwsem_down_write_slowpath+0x3ab/0x65d down_write+0xc7/0xe0 f2fs_drop_nlink+0x3d/0x600 [f2fs] f2fs_delete_inline_entry+0x300/0x440 [f2fs] f2fs_delete_entry+0x3a1/0x7f0 [f2fs] f2fs_unlink+0x500/0x790 [f2fs] vfs_unlink+0x211/0x490 do_unlinkat+0x483/0x520 sys_unlink+0x4a/0x70 do_fast_syscall_32+0x12b/0x683 entry_SYSENTER_32+0xaa/0x102 Fixes: 4c8ff7095bef ("f2fs: support data compression") Tested-by: Ondrej Jirman Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 99 ++++++++++++++++++++++++------------------------- fs/f2fs/inode.c | 2 +- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a2afdb98296e..6bf57a954964 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -676,6 +676,44 @@ enum { MAX_GC_FAILURE }; +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_FREE_NID, /* free allocated nide */ + FI_NO_EXTENT, /* not to use the extent cache */ + FI_INLINE_XATTR, /* used for inline xattr */ + FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ + FI_APPEND_WRITE, /* inode has appended data */ + FI_UPDATE_WRITE, /* inode has in-place-update data */ + FI_NEED_IPU, /* used for ipu per file */ + FI_ATOMIC_FILE, /* indicate atomic file */ + FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ + FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ + FI_DROP_CACHE, /* drop dirty page cache */ + FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ + FI_DO_DEFRAG, /* indicate defragment is running */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ + FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ + FI_MAX, /* max flag, never be used */ +}; + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ @@ -688,7 +726,7 @@ struct f2fs_inode_info { umode_t i_acl_mode; /* keep file acl mode temporarily */ /* Use below internally in f2fs*/ - unsigned long flags; /* use to pass per-file flags */ + unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ @@ -2521,43 +2559,6 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) return flags & F2FS_OTHER_FLMASK; } -/* used for f2fs_inode_info->flags */ -enum { - FI_NEW_INODE, /* indicate newly allocated inode */ - FI_DIRTY_INODE, /* indicate inode is dirty or not */ - FI_AUTO_RECOVER, /* indicate inode is recoverable */ - FI_DIRTY_DIR, /* indicate directory has dirty pages */ - FI_INC_LINK, /* need to increment i_nlink */ - FI_ACL_MODE, /* indicate acl mode */ - FI_NO_ALLOC, /* should not allocate any blocks */ - FI_FREE_NID, /* free allocated nide */ - FI_NO_EXTENT, /* not to use the extent cache */ - FI_INLINE_XATTR, /* used for inline xattr */ - FI_INLINE_DATA, /* used for inline data*/ - FI_INLINE_DENTRY, /* used for inline dentry */ - FI_APPEND_WRITE, /* inode has appended data */ - FI_UPDATE_WRITE, /* inode has in-place-update data */ - FI_NEED_IPU, /* used for ipu per file */ - FI_ATOMIC_FILE, /* indicate atomic file */ - FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ - FI_VOLATILE_FILE, /* indicate volatile file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ - FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ - FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ - FI_HOT_DATA, /* indicate file is hot */ - FI_EXTRA_ATTR, /* indicate file has extra attribute */ - FI_PROJ_INHERIT, /* indicate file inherits projectid */ - FI_PIN_FILE, /* indicate file should not be gced */ - FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ - FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ - FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ - FI_MMAP_FILE, /* indicate file was mmapped */ -}; - static inline void __mark_inode_dirty_flag(struct inode *inode, int flag, bool set) { @@ -2578,20 +2579,18 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, static inline void set_inode_flag(struct inode *inode, int flag) { - if (!test_bit(flag, &F2FS_I(inode)->flags)) - set_bit(flag, &F2FS_I(inode)->flags); + test_and_set_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) { - return test_bit(flag, &F2FS_I(inode)->flags); + return test_bit(flag, F2FS_I(inode)->flags); } static inline void clear_inode_flag(struct inode *inode, int flag) { - if (test_bit(flag, &F2FS_I(inode)->flags)) - clear_bit(flag, &F2FS_I(inode)->flags); + test_and_clear_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, false); } @@ -2682,19 +2681,19 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) struct f2fs_inode_info *fi = F2FS_I(inode); if (ri->i_inline & F2FS_INLINE_XATTR) - set_bit(FI_INLINE_XATTR, &fi->flags); + set_bit(FI_INLINE_XATTR, fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_bit(FI_INLINE_DATA, &fi->flags); + set_bit(FI_INLINE_DATA, fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_bit(FI_INLINE_DENTRY, &fi->flags); + set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_bit(FI_DATA_EXIST, &fi->flags); + set_bit(FI_DATA_EXIST, fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, &fi->flags); + set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) - set_bit(FI_EXTRA_ATTR, &fi->flags); + set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) - set_bit(FI_PIN_FILE, &fi->flags); + set_bit(FI_PIN_FILE, fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2c3078f4381b..be6ac33461d1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -362,7 +362,7 @@ static int do_read_inode(struct inode *inode) fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) fi->i_flags &= ~F2FS_PROJINHERIT_FL; - fi->flags = 0; + bitmap_zero(fi->flags, FI_MAX); fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; -- GitLab From bd494cafc7edc13d0ac46a6f231cf7dbdd5ddf7e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Mar 2020 14:20:57 +0800 Subject: [PATCH 0771/1278] f2fs: fix to avoid double unlock On image that has verity and compression feature, if compressed pages and non-compressed pages are mixed in one bio, we may double unlock non-compressed page in below flow: - f2fs_post_read_work - f2fs_decompress_work - f2fs_decompress_bio - __read_end_io - unlock_page - fsverity_enqueue_verify_work - f2fs_verity_work - f2fs_verify_bio - unlock_page So it should skip handling non-compressed page in f2fs_decompress_work() if verity is on. Besides, add missing dec_page_count() in f2fs_verify_bio(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0727f45d0d6e..88834278fa37 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -140,6 +140,8 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity) f2fs_decompress_pages(bio, page, verity); continue; } + if (verity) + continue; #endif /* PG_error was set if any post_read step failed */ @@ -217,6 +219,7 @@ static void f2fs_verify_bio(struct bio *bio) ClearPageUptodate(page); ClearPageError(page); unlock: + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } } -- GitLab From 581eabd7d67317e0910aad84e464488e1d1025c4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 28 Mar 2020 19:27:36 +0800 Subject: [PATCH 0772/1278] f2fs: xattr.h: Make stub helpers inline Fix gcc warnings: In file included from fs/f2fs/dir.c:15:0: fs/f2fs/xattr.h:157:13: warning: 'f2fs_destroy_xattr_caches' defined but not used [-Wunused-function] static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } ^~~~~~~~~~~~~~~~~~~~~~~~~ fs/f2fs/xattr.h:156:12: warning: 'f2fs_init_xattr_caches' defined but not used [-Wunused-function] static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } Reported-by: Hulk Robot Fixes: a999150f4fe3 ("f2fs: use kmem_cache pool during inline xattr lookups") Signed-off-by: YueHaibing Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index e471be77f8f0..938fcd20565d 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -153,8 +153,8 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, { return -EOPNOTSUPP; } -static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } -static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } +static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY -- GitLab From a77aca10eafc9ac736f8b4878c8cf57d4ecea64d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Mar 2020 18:03:15 +0800 Subject: [PATCH 0773/1278] f2fs: fix to use f2fs_readpage_limit() in f2fs_read_multi_pages() Multipage read flow should consider fsverity, so it needs to use f2fs_readpage_limit() instead of i_size_read() to check EOF condition. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 88834278fa37..6ffa83937ca8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2049,7 +2049,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + last_block_in_file = (f2fs_readpage_limit(inode) + + blocksize - 1) >> blkbits; /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { -- GitLab From 11f45fe08e05f9eee102b6679c635de1d82d9fe3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Mar 2020 17:33:23 +0800 Subject: [PATCH 0774/1278] f2fs: clean up {cic,dic}.ref handling {cic,dic}.ref should be initialized to number of compressed pages, let's initialize it directly rather than doing w/ f2fs_set_compressed_page(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 60a4de93551c..771ef2817809 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -57,7 +57,7 @@ bool f2fs_is_compressed_page(struct page *page) } static void f2fs_set_compressed_page(struct page *page, - struct inode *inode, pgoff_t index, void *data, refcount_t *r) + struct inode *inode, pgoff_t index, void *data) { SetPagePrivate(page); set_page_private(page, (unsigned long)data); @@ -65,8 +65,6 @@ static void f2fs_set_compressed_page(struct page *page, /* i_crypto_info and iv index */ page->index = index; page->mapping = inode->i_mapping; - if (r) - refcount_inc(r); } static void f2fs_put_compressed_page(struct page *page) @@ -483,7 +481,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) vunmap(dic->rbuf); out_free_dic: if (verity) - refcount_add(dic->nr_cpages - 1, &dic->ref); + refcount_set(&dic->ref, dic->nr_cpages); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); @@ -839,7 +837,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; - refcount_set(&cic->ref, 1); + refcount_set(&cic->ref, cc->nr_cpages); cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << cc->log_cluster_size, GFP_NOFS); if (!cic->rpages) @@ -849,8 +847,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, - cic, i ? &cic->ref : NULL); + cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; if (fio.encrypted) { fio.page = cc->rpages[i + 1]; @@ -1100,7 +1097,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; - refcount_set(&dic->ref, 1); + refcount_set(&dic->ref, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; dic->log_cluster_size = cc->log_cluster_size; @@ -1124,8 +1121,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) goto out_free; f2fs_set_compressed_page(page, cc->inode, - start_idx + i + 1, - dic, i ? &dic->ref : NULL); + start_idx + i + 1, dic); dic->cpages[i] = page; } -- GitLab From 64fb0255d82972f7f5fc85b2a502c599703db88a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:05 +0800 Subject: [PATCH 0775/1278] f2fs: change default compression algorithm Use LZ4 as default compression algorithm, as compared to LZO, it shows almost the same compression ratio and much better decompression speed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f0fa4d752497..3218f850231c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1578,7 +1578,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; -- GitLab From 144dba624883437b88919db972e571cc0836300d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 16:57:06 +0800 Subject: [PATCH 0776/1278] f2fs: compress: fix to call missing destroy_compress_ctx() Otherwise, it will cause memory leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 771ef2817809..4340b254256d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -401,6 +401,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cpages[i] = NULL; } + cops->destroy_compress_ctx(cc); + cc->nr_cpages = nr_cpages; trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, -- GitLab From 0a19447934863596099b9414b00925b9faa0a373 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 16:57:07 +0800 Subject: [PATCH 0777/1278] f2fs: compress: add .{init,destroy}_decompress_ctx callback Add below two callback interfaces in struct f2fs_compress_ops: int (*init_decompress_ctx)(struct decompress_io_ctx *dic); void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); Which will be used by zstd compress algorithm later. In addition, this patch adds callback function pointer check, so that specified algorithm can avoid defining unneeded functions. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4340b254256d..4a946ba64912 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -25,6 +25,8 @@ struct f2fs_compress_ops { int (*init_compress_ctx)(struct compress_ctx *cc); void (*destroy_compress_ctx)(struct compress_ctx *cc); int (*compress_pages)(struct compress_ctx *cc); + int (*init_decompress_ctx)(struct decompress_io_ctx *dic); + void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); }; @@ -337,9 +339,11 @@ static int f2fs_compress_pages(struct compress_ctx *cc) trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, cc->cluster_size, fi->i_compress_algorithm); - ret = cops->init_compress_ctx(cc); - if (ret) - goto out; + if (cops->init_compress_ctx) { + ret = cops->init_compress_ctx(cc); + if (ret) + goto out; + } max_len = COMPRESS_HEADER_SIZE + cc->clen; cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); @@ -401,7 +405,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cpages[i] = NULL; } - cops->destroy_compress_ctx(cc); + if (cops->destroy_compress_ctx) + cops->destroy_compress_ctx(cc); cc->nr_cpages = nr_cpages; @@ -421,7 +426,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) kfree(cc->cpages); cc->cpages = NULL; destroy_compress_ctx: - cops->destroy_compress_ctx(cc); + if (cops->destroy_compress_ctx) + cops->destroy_compress_ctx(cc); out: trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, cc->clen, ret); @@ -455,10 +461,16 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) goto out_free_dic; } + if (cops->init_decompress_ctx) { + ret = cops->init_decompress_ctx(dic); + if (ret) + goto out_free_dic; + } + dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL); if (!dic->rbuf) { ret = -ENOMEM; - goto out_free_dic; + goto destroy_decompress_ctx; } dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); @@ -481,6 +493,9 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) vunmap(dic->cbuf); out_vunmap_rbuf: vunmap(dic->rbuf); +destroy_decompress_ctx: + if (cops->destroy_decompress_ctx) + cops->destroy_decompress_ctx(dic); out_free_dic: if (verity) refcount_set(&dic->ref, dic->nr_cpages); -- GitLab From 4e224dfefbf215e5040c8088d24294f50fa389e8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 17:46:02 +0800 Subject: [PATCH 0778/1278] f2fs: compress: support zstd compress algorithm Add zstd compress algorithm support, use "compress_algorithm=zstd" mountoption to enable it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 +- fs/f2fs/Kconfig | 9 ++ fs/f2fs/compress.c | 165 +++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 5 + fs/f2fs/super.c | 7 ++ include/trace/events/f2fs.h | 3 +- 6 files changed, 190 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b4de21867b96..1ca9556ec849 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -235,8 +235,8 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "en hide up to all remaining free space. The actual space that would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. -compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" - and "lz4" algorithm. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo", + "lz4" and "zstd" algorithm. compress_log_size=%u Support configuring compress cluster size, the size will be 4KB * (1 << %u), 16KB is minimum size, also it's default size. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 8f2019d17f34..1940a6574b66 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -130,3 +130,12 @@ config F2FS_FS_LZ4 default y help Support LZ4 compress algorithm, if unsure, say Y. + +config F2FS_FS_ZSTD + bool "ZSTD compression support" + depends on F2FS_FS_COMPRESSION + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS + default y + help + Support ZSTD compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4a946ba64912..a649467db6b4 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -296,6 +297,165 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_ZSTD +#define F2FS_ZSTD_DEFAULT_CLEVEL 1 + +static int zstd_init_compress_ctx(struct compress_ctx *cc) +{ + ZSTD_parameters params; + ZSTD_CStream *stream; + void *workspace; + unsigned int workspace_size; + + params = ZSTD_getParams(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen, 0); + workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + + workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initCStream(params, 0, workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + cc->private = workspace; + cc->private2 = stream; + + cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE; + return 0; +} + +static void zstd_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; + cc->private2 = NULL; +} + +static int zstd_compress_pages(struct compress_ctx *cc) +{ + ZSTD_CStream *stream = cc->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int src_size = cc->rlen; + int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE; + int ret; + + inbuf.pos = 0; + inbuf.src = cc->rbuf; + inbuf.size = src_size; + + outbuf.pos = 0; + outbuf.dst = cc->cbuf->cdata; + outbuf.size = dst_size; + + ret = ZSTD_compressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + ret = ZSTD_endStream(stream, &outbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + cc->clen = outbuf.pos; + return 0; +} + +static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream; + void *workspace; + unsigned int workspace_size; + + workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE); + + workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE, + workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + dic->private = workspace; + dic->private2 = stream; + + return 0; +} + +static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) +{ + kvfree(dic->private); + dic->private = NULL; + dic->private2 = NULL; +} + +static int zstd_decompress_pages(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream = dic->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int ret; + + inbuf.pos = 0; + inbuf.src = dic->cbuf->cdata; + inbuf.size = dic->clen; + + outbuf.pos = 0; + outbuf.dst = dic->rbuf; + outbuf.size = dic->rlen; + + ret = ZSTD_decompressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + if (dic->rlen != outbuf.pos) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + __func__, dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + + return 0; +} + +static const struct f2fs_compress_ops f2fs_zstd_ops = { + .init_compress_ctx = zstd_init_compress_ctx, + .destroy_compress_ctx = zstd_destroy_compress_ctx, + .compress_pages = zstd_compress_pages, + .init_decompress_ctx = zstd_init_decompress_ctx, + .destroy_decompress_ctx = zstd_destroy_decompress_ctx, + .decompress_pages = zstd_decompress_pages, +}; +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -307,6 +467,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#ifdef CONFIG_F2FS_FS_ZSTD + &f2fs_zstd_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6bf57a954964..479dd1f69203 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1265,6 +1265,7 @@ enum fsync_mode { enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, + COMPRESS_ZSTD, COMPRESS_MAX, }; @@ -1294,6 +1295,7 @@ struct compress_ctx { size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ void *private; /* payload buffer for specified compression algorithm */ + void *private2; /* extra payload buffer */ }; /* compress context for write IO path */ @@ -1323,11 +1325,14 @@ struct decompress_io_ctx { size_t clen; /* valid data length in cbuf */ refcount_t ref; /* referrence count of compressed page */ bool failed; /* indicate IO error during decompression */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ }; #define NULL_CLUSTER ((unsigned int)(~0)) #define MIN_COMPRESS_LOG_SIZE 2 #define MAX_COMPRESS_LOG_SIZE 8 +#define MAX_COMPRESS_WINDOW_SIZE ((PAGE_SIZE) << MAX_COMPRESS_LOG_SIZE) struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3218f850231c..1f69ae192c0a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -829,6 +829,10 @@ static int parse_options(struct super_block *sb, char *options) !strcmp(name, "lz4")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + } else if (strlen(name) == 4 && + !strcmp(name, "zstd")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_ZSTD; } else { kfree(name); return -EINVAL; @@ -1427,6 +1431,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_LZ4: algtype = "lz4"; break; + case COMPRESS_ZSTD: + algtype = "zstd"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ed0d788495fc..a4253b003122 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -163,7 +163,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); #define show_compress_algorithm(type) \ __print_symbolic(type, \ { COMPRESS_LZO, "LZO" }, \ - { COMPRESS_LZ4, "LZ4" }) + { COMPRESS_LZ4, "LZ4" }, \ + { COMPRESS_ZSTD, "ZSTD" }) struct f2fs_sb_info; struct f2fs_io_info; -- GitLab From cea63cd68ae5a35f9f58f9d7672a85f7227bd9c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Mar 2020 17:25:07 +0800 Subject: [PATCH 0779/1278] f2fs: clean up dic->tpages assignment Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index a649467db6b4..2f5648abece8 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1313,20 +1313,16 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) goto out_free; for (i = 0; i < dic->cluster_size; i++) { - if (cc->rpages[i]) + if (cc->rpages[i]) { + dic->tpages[i] = cc->rpages[i]; continue; + } dic->tpages[i] = f2fs_grab_page(); if (!dic->tpages[i]) goto out_free; } - for (i = 0; i < dic->cluster_size; i++) { - if (dic->tpages[i]) - continue; - dic->tpages[i] = cc->rpages[i]; - } - return dic; out_free: -- GitLab From 4951eedff03316adc828f5e816d532767d92e4ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Mar 2020 10:22:09 +0800 Subject: [PATCH 0780/1278] f2fs: show compression in statx fstest reports below message when compression is on: generic/424 1s ... - output mismatch --- tests/generic/424.out +++ results/generic/424.out.bad @@ -1,2 +1,26 @@ QA output created by 424 +[!] Attribute compressed should be set +Failed +stat_test failed +[!] Attribute compressed should be set +Failed +stat_test failed We missed to set STATX_ATTR_COMPRESSED on compressed inode in getattr(), fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fefcd7f2a023..55068d7bc0c8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -798,6 +798,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, } flags = fi->i_flags; + if (flags & F2FS_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; if (IS_ENCRYPTED(inode)) @@ -809,7 +811,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, if (IS_VERITY(inode)) stat->attributes |= STATX_ATTR_VERITY; - stat->attributes_mask |= (STATX_ATTR_APPEND | + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | + STATX_ATTR_APPEND | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP | -- GitLab From b2cdebbed3238d55a9215faaf0b56ab864d9eb68 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Mar 2020 17:42:26 +0800 Subject: [PATCH 0781/1278] f2fs: fix to verify tpage before releasing in f2fs_free_dic() In below error path, tpages[i] could be NULL, fix to check it before releasing it. - f2fs_read_multi_pages - f2fs_alloc_dic - f2fs_free_dic Fixes: 61fbae2b2b12 ("f2fs: fix to avoid NULL pointer dereference") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 2f5648abece8..1bf22a430640 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1338,6 +1338,8 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) continue; + if (!dic->tpages[i]) + continue; unlock_page(dic->tpages[i]); put_page(dic->tpages[i]); } -- GitLab From d00929bc1bab53f574b7cc6204de938f7448d2c1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Mar 2020 17:43:56 +0800 Subject: [PATCH 0782/1278] f2fs: switch discard_policy.timeout to bool type While checking discard timeout, we use specified type UMOUNT_DISCARD_TIMEOUT, so just replace doplicy.timeout with it, and switch doplicy.timeout to bool type. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 479dd1f69203..3afa8b5091e8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -331,8 +331,8 @@ struct discard_policy { bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ + bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ - int timeout; /* discard timeout for put_super */ }; struct discard_cmd_control { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1065b19afe17..2158f27fc701 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1080,7 +1080,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = 0; + dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1105,6 +1105,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = 1; + dpolicy->timeout = true; } } @@ -1473,12 +1474,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, int i, issued = 0; bool io_interrupted = false; - if (dpolicy->timeout != 0) - f2fs_update_time(sbi, dpolicy->timeout); + if (dpolicy->timeout) + f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { - if (dpolicy->timeout != 0 && - f2fs_time_over(sbi, dpolicy->timeout)) + if (dpolicy->timeout && + f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (i + 1 < dpolicy->granularity) @@ -1499,8 +1500,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - if (dpolicy->timeout != 0 && - f2fs_time_over(sbi, dpolicy->timeout)) + if (dpolicy->timeout && + f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (dpolicy->io_aware && i < dpolicy->io_aware_gran && @@ -1679,7 +1680,6 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); - dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT; __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); -- GitLab From b74d1a0692215c9a0f7acb8bdc7bd792b5cdd62c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Mar 2020 18:29:00 +0800 Subject: [PATCH 0783/1278] f2fs: add missing CONFIG_F2FS_FS_COMPRESSION Compression sysfs node should not be shown if f2fs module disables compression feature. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 25d6fd82833a..58a213a878e5 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -580,7 +580,9 @@ F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +#ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); +#endif #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -661,7 +663,9 @@ static struct attribute *f2fs_feat_attrs[] = { #endif ATTR_LIST(sb_checksum), ATTR_LIST(casefold), +#ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compression), +#endif NULL, }; -- GitLab From 5c7d7f55f76eba588a223c9feb2f0b63f0e1c2fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Mar 2020 18:29:51 +0800 Subject: [PATCH 0784/1278] f2fs: fix to disable compression on directory It needs to call f2fs_disable_compressed_file() to disable compression on directory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 10 ++++++---- fs/f2fs/file.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3afa8b5091e8..58b6341d4971 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3846,10 +3846,12 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) if (!f2fs_compressed_file(inode)) return 0; - if (get_dirty_pages(inode)) - return 1; - if (fi->i_compr_blocks) - return fi->i_compr_blocks; + if (S_ISREG(inode->i_mode)) { + if (get_dirty_pages(inode)) + return 1; + if (fi->i_compr_blocks) + return fi->i_compr_blocks; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 55068d7bc0c8..5083321e56c6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1817,7 +1817,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) } if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { - if (S_ISREG(inode->i_mode) && (masked_flags & F2FS_COMPR_FL)) { + if (masked_flags & F2FS_COMPR_FL) { if (f2fs_disable_compressed_file(inode)) return -EINVAL; } -- GitLab From 8724f837e9bb0ad5a361f965ebb25e4711b04a88 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Mar 2020 18:29:52 +0800 Subject: [PATCH 0785/1278] f2fs: keep inline_data when compression conversion We can keep compressed inode's data inline before inline conversion. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5083321e56c6..80750bab5fef 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1824,11 +1824,6 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (iflags & F2FS_NOCOMP_FL) return -EINVAL; if (iflags & F2FS_COMPR_FL) { - int err = f2fs_convert_inline_inode(inode); - - if (err) - return err; - if (!f2fs_may_compress(inode)) return -EINVAL; -- GitLab From 5578beabe36cc11d2668a0af668ed1ab49124e3a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 3 Apr 2020 12:06:10 -0700 Subject: [PATCH 0786/1278] ANDROID: block: make blk_crypto_start_using_mode() properly check for support If blk-crypto-fallback is needed but is disabled by kconfig, make blk_crypto_start_using_mode() return an error rather than succeeding. Use ENOPKG, which matches the error code used by fscrypt when crypto API support is missing with fs-layer encryption. Also, if blk-crypto-fallback is needed but the algorithm is missing from the kernel's crypto API, change the error code from ENOENT to ENOPKG. This is needed for VtsKernelEncryptionTest to pass on some devices. Bug: 137270441 Bug: 151100202 Test: 'atest vts_kernel_encryption_test' on Pixel 4 with the inline crypto patches backported, and also on Cuttlefish. Change-Id: Iedf00ca8e48c74a5d4c40b12712f38738a04ef11 Signed-off-by: Eric Biggers --- block/blk-crypto-fallback.c | 36 +++++++++++------------------------- block/blk-crypto-internal.h | 9 +++++++++ block/blk-crypto.c | 25 +++++++++++++++++++++++++ include/linux/blk-crypto.h | 16 ++++------------ 4 files changed, 49 insertions(+), 37 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index 195b04b5df0d..ba452cbafd44 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -487,21 +487,13 @@ bool blk_crypto_queue_decrypt_bio(struct bio *bio) return false; } -/** - * blk_crypto_start_using_mode() - Start using a crypto algorithm on a device - * @mode_num: the blk_crypto_mode we want to allocate ciphers for. - * @data_unit_size: the data unit size that will be used - * @q: the request queue for the device - * - * Upper layers must call this function to ensure that a the crypto API fallback - * has transforms for this algorithm, if they become necessary. - * - * Return: 0 on success and -err on error. +/* + * Prepare blk-crypto-fallback for the specified crypto mode. + * Returns -ENOPKG if the needed crypto API support is missing. */ -int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, - unsigned int data_unit_size, - struct request_queue *q) +int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) { + const char *cipher_str = blk_crypto_modes[mode_num].cipher_str; struct blk_crypto_keyslot *slotp; unsigned int i; int err = 0; @@ -514,25 +506,20 @@ int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, if (likely(smp_load_acquire(&tfms_inited[mode_num]))) return 0; - /* - * If the keyslot manager of the request queue supports this - * crypto mode, then we don't need to allocate this mode. - */ - if (keyslot_manager_crypto_mode_supported(q->ksm, mode_num, - data_unit_size)) - return 0; - mutex_lock(&tfms_init_lock); if (likely(tfms_inited[mode_num])) goto out; for (i = 0; i < blk_crypto_num_keyslots; i++) { slotp = &blk_crypto_keyslots[i]; - slotp->tfms[mode_num] = crypto_alloc_skcipher( - blk_crypto_modes[mode_num].cipher_str, - 0, 0); + slotp->tfms[mode_num] = crypto_alloc_skcipher(cipher_str, 0, 0); if (IS_ERR(slotp->tfms[mode_num])) { err = PTR_ERR(slotp->tfms[mode_num]); + if (err == -ENOENT) { + pr_warn_once("Missing crypto API support for \"%s\"\n", + cipher_str); + err = -ENOPKG; + } slotp->tfms[mode_num] = NULL; goto out_free_tfms; } @@ -558,7 +545,6 @@ int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, mutex_unlock(&tfms_init_lock); return err; } -EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) { diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 40d826b743da..4da998c803f2 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -19,6 +19,8 @@ extern const struct blk_crypto_mode blk_crypto_modes[]; #ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK +int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num); + int blk_crypto_fallback_submit_bio(struct bio **bio_ptr); bool blk_crypto_queue_decrypt_bio(struct bio *bio); @@ -29,6 +31,13 @@ bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc); #else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ +static inline int +blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) +{ + pr_warn_once("crypto API fallback is disabled\n"); + return -ENOPKG; +} + static inline bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) { return false; diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 88df1c0e7e5f..7bf2ff86d277 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -232,6 +232,31 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, } EXPORT_SYMBOL_GPL(blk_crypto_init_key); +/** + * blk_crypto_start_using_mode() - Start using blk-crypto on a device + * @crypto_mode: the crypto mode that will be used + * @data_unit_size: the data unit size that will be used + * @q: the request queue for the device + * + * Upper layers must call this function to ensure that either the hardware + * supports the needed crypto settings, or the crypto API fallback has + * transforms for the needed mode allocated and ready to go. + * + * Return: 0 on success; -ENOPKG if the hardware doesn't support the crypto + * settings and blk-crypto-fallback is either disabled or the needed + * algorithm is disabled in the crypto API; or another -errno code. + */ +int blk_crypto_start_using_mode(enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + struct request_queue *q) +{ + if (keyslot_manager_crypto_mode_supported(q->ksm, crypto_mode, + data_unit_size)) + return 0; + return blk_crypto_fallback_start_using_mode(crypto_mode); +} +EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); + /** * blk_crypto_evict_key() - Evict a key from any inline encryption hardware * it may have been programmed into diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 913b367d42bd..3b6cb9f7b888 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -22,6 +22,10 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size); +int blk_crypto_start_using_mode(enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + struct request_queue *q); + int blk_crypto_evict_key(struct request_queue *q, const struct blk_crypto_key *key); @@ -41,22 +45,10 @@ static inline bool blk_crypto_endio(struct bio *bio) #ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK -int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, - unsigned int data_unit_size, - struct request_queue *q); - int blk_crypto_fallback_init(void); #else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ -static inline int -blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, - unsigned int data_unit_size, - struct request_queue *q) -{ - return 0; -} - static inline int blk_crypto_fallback_init(void) { return 0; -- GitLab From a14fa7bc5f5a9a02f9ea201c3792837240fea4ec Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 3 Apr 2020 12:06:11 -0700 Subject: [PATCH 0787/1278] ANDROID: block: require drivers to declare supported crypto key type(s) We need a way to tell which type of keys the inline crypto hardware supports (standard, wrapped, or both), so that fallbacks can be used when needed (either blk-crypto-fallback, or fscrypt fs-layer crypto). We can't simply assume that keyslot_mgmt_ll_ops::derive_raw_secret == NULL means only standard keys are supported and that keyslot_mgmt_ll_ops::derive_raw_secret != NULL means that only wrapped keys are supported, because device-mapper devices always implement this method. Also, hardware might support both types of keys. Therefore, add a field keyslot_manager::features which contains a bitmask of flags which indicate the supported types of keys. Drivers will need to fill this in. This patch makes the UFS standard crypto code set BLK_CRYPTO_FEATURE_STANDARD_KEYS, but UFS variant drivers may need to set BLK_CRYPTO_FEATURE_WRAPPED_KEYS instead. Then, make keyslot_manager_crypto_mode_supported() take the key type into account. Bug: 137270441 Bug: 151100202 Test: 'atest vts_kernel_encryption_test' on Pixel 4 with the inline crypto patches backported, and also on Cuttlefish. Change-Id: Ied846c2767c1fd2f438792dcfd3649157e68b005 Signed-off-by: Eric Biggers --- block/blk-crypto-fallback.c | 8 +++++--- block/blk-crypto.c | 15 ++++++++++++--- block/keyslot-manager.c | 30 ++++++++++++++++++++++++------ drivers/md/dm-default-key.c | 1 + drivers/md/dm.c | 9 +++++++-- drivers/scsi/ufs/ufshcd-crypto.c | 4 +++- fs/crypto/inline_crypt.c | 1 + include/linux/blk-crypto.h | 1 + include/linux/keyslot-manager.h | 14 +++++++++++++- 9 files changed, 67 insertions(+), 16 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index ba452cbafd44..ad83e1077ba3 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -600,9 +600,11 @@ int __init blk_crypto_fallback_init(void) crypto_mode_supported[i] = 0xFFFFFFFF; crypto_mode_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; - blk_crypto_ksm = keyslot_manager_create(NULL, blk_crypto_num_keyslots, - &blk_crypto_ksm_ll_ops, - crypto_mode_supported, NULL); + blk_crypto_ksm = keyslot_manager_create( + NULL, blk_crypto_num_keyslots, + &blk_crypto_ksm_ll_ops, + BLK_CRYPTO_FEATURE_STANDARD_KEYS, + crypto_mode_supported, NULL); if (!blk_crypto_ksm) return -ENOMEM; diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 7bf2ff86d277..f56bbec1132f 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -109,7 +109,8 @@ int blk_crypto_submit_bio(struct bio **bio_ptr) /* Get device keyslot if supported */ if (keyslot_manager_crypto_mode_supported(q->ksm, bc->bc_key->crypto_mode, - bc->bc_key->data_unit_size)) { + bc->bc_key->data_unit_size, + bc->bc_key->is_hw_wrapped)) { err = bio_crypt_ctx_acquire_keyslot(bc, q->ksm); if (!err) return 0; @@ -236,6 +237,7 @@ EXPORT_SYMBOL_GPL(blk_crypto_init_key); * blk_crypto_start_using_mode() - Start using blk-crypto on a device * @crypto_mode: the crypto mode that will be used * @data_unit_size: the data unit size that will be used + * @is_hw_wrapped_key: whether the key will be hardware-wrapped * @q: the request queue for the device * * Upper layers must call this function to ensure that either the hardware @@ -248,11 +250,17 @@ EXPORT_SYMBOL_GPL(blk_crypto_init_key); */ int blk_crypto_start_using_mode(enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size, + bool is_hw_wrapped_key, struct request_queue *q) { if (keyslot_manager_crypto_mode_supported(q->ksm, crypto_mode, - data_unit_size)) + data_unit_size, + is_hw_wrapped_key)) return 0; + if (is_hw_wrapped_key) { + pr_warn_once("hardware doesn't support wrapped keys\n"); + return -EOPNOTSUPP; + } return blk_crypto_fallback_start_using_mode(crypto_mode); } EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); @@ -277,7 +285,8 @@ int blk_crypto_evict_key(struct request_queue *q, { if (q->ksm && keyslot_manager_crypto_mode_supported(q->ksm, key->crypto_mode, - key->data_unit_size)) + key->data_unit_size, + key->is_hw_wrapped)) return keyslot_manager_evict_key(q->ksm, key); return blk_crypto_fallback_evict_key(key); diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index 0b6dd460645e..fe7dff3cae79 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -44,6 +44,7 @@ struct keyslot { struct keyslot_manager { unsigned int num_slots; struct keyslot_mgmt_ll_ops ksm_ll_ops; + unsigned int features; unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; void *ll_priv_data; @@ -136,6 +137,8 @@ static inline void keyslot_manager_hw_exit(struct keyslot_manager *ksm) * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot * manager will use to perform operations like programming and * evicting keys. + * @features: The supported features as a bitmask of BLK_CRYPTO_FEATURE_* flags. + * Most drivers should set BLK_CRYPTO_FEATURE_STANDARD_KEYS here. * @crypto_mode_supported: Array of size BLK_ENCRYPTION_MODE_MAX of * bitmasks that represents whether a crypto mode * and data unit size are supported. The i'th bit @@ -155,6 +158,7 @@ struct keyslot_manager *keyslot_manager_create( struct device *dev, unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) { @@ -176,6 +180,7 @@ struct keyslot_manager *keyslot_manager_create( ksm->num_slots = num_slots; ksm->ksm_ll_ops = *ksm_ll_ops; + ksm->features = features; memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; @@ -382,23 +387,24 @@ void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot) } /** - * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode/data - * unit size combination is supported - * by a ksm. + * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode / + * data unit size / is_hw_wrapped_key + * combination is supported by a ksm. * @ksm: The keyslot manager to check * @crypto_mode: The crypto mode to check for. * @data_unit_size: The data_unit_size for the mode. + * @is_hw_wrapped_key: Whether a hardware-wrapped key will be used. * * Calls and returns the result of the crypto_mode_supported function specified * by the ksm. * * Context: Process context. - * Return: Whether or not this ksm supports the specified crypto_mode/ - * data_unit_size combo. + * Return: Whether or not this ksm supports the specified crypto settings. */ bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) + unsigned int data_unit_size, + bool is_hw_wrapped_key) { if (!ksm) return false; @@ -406,6 +412,13 @@ bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, return false; if (WARN_ON(!is_power_of_2(data_unit_size))) return false; + if (is_hw_wrapped_key) { + if (!(ksm->features & BLK_CRYPTO_FEATURE_WRAPPED_KEYS)) + return false; + } else { + if (!(ksm->features & BLK_CRYPTO_FEATURE_STANDARD_KEYS)) + return false; + } return ksm->crypto_mode_supported[crypto_mode] & data_unit_size; } @@ -521,6 +534,7 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); * keyslot_manager_create_passthrough() - Create a passthrough keyslot manager * @dev: Device for runtime power management (NULL if none) * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops + * @features: Bitmask of BLK_CRYPTO_FEATURE_* flags * @crypto_mode_supported: Bitmasks for supported encryption modes * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. * @@ -538,6 +552,7 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); struct keyslot_manager *keyslot_manager_create_passthrough( struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) { @@ -548,6 +563,7 @@ struct keyslot_manager *keyslot_manager_create_passthrough( return NULL; ksm->ksm_ll_ops = *ksm_ll_ops; + ksm->features = features; memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; @@ -576,11 +592,13 @@ void keyslot_manager_intersect_modes(struct keyslot_manager *parent, if (child) { unsigned int i; + parent->features &= child->features; for (i = 0; i < ARRAY_SIZE(child->crypto_mode_supported); i++) { parent->crypto_mode_supported[i] &= child->crypto_mode_supported[i]; } } else { + parent->features = 0; memset(parent->crypto_mode_supported, 0, sizeof(parent->crypto_mode_supported)); } diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c index b3da6afa6927..3d0bd0645f7a 100644 --- a/drivers/md/dm-default-key.c +++ b/drivers/md/dm-default-key.c @@ -241,6 +241,7 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } err = blk_crypto_start_using_mode(cipher->mode_num, dkc->sector_size, + dkc->is_hw_wrapped, dkc->dev->bdev->bd_queue); if (err) { ti->error = "Error starting to use blk-crypto"; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c41c2eddba31..e4f0b67cd015 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2165,16 +2165,21 @@ static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { static int dm_init_inline_encryption(struct mapped_device *md) { + unsigned int features; unsigned int mode_masks[BLK_ENCRYPTION_MODE_MAX]; /* - * Start out with all crypto mode support bits set. Any unsupported - * bits will be cleared later when calculating the device restrictions. + * Initially declare support for all crypto settings. Anything + * unsupported by a child device will be removed later when calculating + * the device restrictions. */ + features = BLK_CRYPTO_FEATURE_STANDARD_KEYS | + BLK_CRYPTO_FEATURE_WRAPPED_KEYS; memset(mode_masks, 0xFF, sizeof(mode_masks)); md->queue->ksm = keyslot_manager_create_passthrough(NULL, &dm_ksm_ll_ops, + features, mode_masks, md); if (!md->queue->ksm) return -ENOMEM; diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 6999970235b3..d62ab7a9faff 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -336,7 +336,9 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, ufshcd_clear_all_keyslots(hba); hba->ksm = keyslot_manager_create(hba->dev, ufshcd_num_keyslots(hba), - ksm_ops, crypto_modes_supported, hba); + ksm_ops, + BLK_CRYPTO_FEATURE_STANDARD_KEYS, + crypto_modes_supported, hba); if (!hba->ksm) { err = -ENOMEM; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index cd901697d62a..0d639b8e8dda 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -104,6 +104,7 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, queue_refs++; err = blk_crypto_start_using_mode(crypto_mode, sb->s_blocksize, + is_hw_wrapped, blk_key->devs[i]); if (err) { fscrypt_err(inode, diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 3b6cb9f7b888..7dc478a8c3ed 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -24,6 +24,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, int blk_crypto_start_using_mode(enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size, + bool is_hw_wrapped_key, struct request_queue *q); int blk_crypto_evict_key(struct request_queue *q, diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index 2f4aac2851bf..cd65bea927db 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -8,6 +8,15 @@ #include +/* Inline crypto feature bits. Must set at least one. */ +enum { + /* Support for standard software-specified keys */ + BLK_CRYPTO_FEATURE_STANDARD_KEYS = BIT(0), + + /* Support for hardware-wrapped keys */ + BLK_CRYPTO_FEATURE_WRAPPED_KEYS = BIT(1), +}; + #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct keyslot_manager; @@ -45,6 +54,7 @@ struct keyslot_manager *keyslot_manager_create( struct device *dev, unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ops, + unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); @@ -57,7 +67,8 @@ void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot); bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); + unsigned int data_unit_size, + bool is_hw_wrapped_key); int keyslot_manager_evict_key(struct keyslot_manager *ksm, const struct blk_crypto_key *key); @@ -71,6 +82,7 @@ void keyslot_manager_destroy(struct keyslot_manager *ksm); struct keyslot_manager *keyslot_manager_create_passthrough( struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ops, + unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); -- GitLab From 4efb7e218a4a6c50741e5d0a101d4503274370c5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 3 Apr 2020 12:06:11 -0700 Subject: [PATCH 0788/1278] ANDROID: fscrypt: fall back to filesystem-layer crypto when needed When the filesystem is mounted with '-o inlinecrypt', make fscrypt fall back to filesystem-layer crypto when inline crypto won't work, e.g. due to the hardware not supporting the encryption algorithm. When blk-crypto-fallback is disabled, this fixes '-o inlinecrypt' to not break any fscrypt policies that would otherwise work. This is needed for VtsKernelEncryptionTest to pass on some devices. Bug: 137270441 Bug: 151100202 Test: 'atest vts_kernel_encryption_test' on Pixel 4 with the inline crypto patches backported, and also on Cuttlefish. Change-Id: I3e730df4608efb12d7126d1a85faddcccb566764 Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 7 ++-- fs/crypto/inline_crypt.c | 70 ++++++++++++++++++++++++++++++------- fs/crypto/keysetup.c | 10 ++++-- 3 files changed, 71 insertions(+), 16 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index f262f823fd58..0753681cb156 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -326,7 +326,8 @@ extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT -extern void fscrypt_select_encryption_impl(struct fscrypt_info *ci); +extern int fscrypt_select_encryption_impl(struct fscrypt_info *ci, + bool is_hw_wrapped_key); static inline bool fscrypt_using_inline_encryption(const struct fscrypt_info *ci) @@ -370,8 +371,10 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ -static inline void fscrypt_select_encryption_impl(struct fscrypt_info *ci) +static inline int fscrypt_select_encryption_impl(struct fscrypt_info *ci, + bool is_hw_wrapped_key) { + return 0; } static inline bool fscrypt_using_inline_encryption( diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 0d639b8e8dda..e1bbaeff1c43 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -26,26 +26,76 @@ struct fscrypt_blk_crypto_key { struct request_queue *devs[]; }; +static int fscrypt_get_num_devices(struct super_block *sb) +{ + if (sb->s_cop->get_num_devices) + return sb->s_cop->get_num_devices(sb); + return 1; +} + +static void fscrypt_get_devices(struct super_block *sb, int num_devs, + struct request_queue **devs) +{ + if (num_devs == 1) + devs[0] = bdev_get_queue(sb->s_bdev); + else + sb->s_cop->get_devices(sb, devs); +} + /* Enable inline encryption for this file if supported. */ -void fscrypt_select_encryption_impl(struct fscrypt_info *ci) +int fscrypt_select_encryption_impl(struct fscrypt_info *ci, + bool is_hw_wrapped_key) { const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; + enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; + struct request_queue **devs; + int num_devs; + int i; /* The file must need contents encryption, not filenames encryption */ if (!S_ISREG(inode->i_mode)) - return; + return 0; /* blk-crypto must implement the needed encryption algorithm */ - if (ci->ci_mode->blk_crypto_mode == BLK_ENCRYPTION_MODE_INVALID) - return; + if (crypto_mode == BLK_ENCRYPTION_MODE_INVALID) + return 0; /* The filesystem must be mounted with -o inlinecrypt */ if (!sb->s_cop->inline_crypt_enabled || !sb->s_cop->inline_crypt_enabled(sb)) - return; + return 0; + + /* + * The needed encryption settings must be supported either by + * blk-crypto-fallback, or by hardware on all the filesystem's devices. + */ + + if (IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) && + !is_hw_wrapped_key) { + ci->ci_inlinecrypt = true; + return 0; + } + + num_devs = fscrypt_get_num_devices(sb); + devs = kmalloc_array(num_devs, sizeof(*devs), GFP_NOFS); + if (!devs) + return -ENOMEM; + + fscrypt_get_devices(sb, num_devs, devs); + + for (i = 0; i < num_devs; i++) { + if (!keyslot_manager_crypto_mode_supported(devs[i]->ksm, + crypto_mode, + sb->s_blocksize, + is_hw_wrapped_key)) + goto out_free_devs; + } ci->ci_inlinecrypt = true; +out_free_devs: + kfree(devs); + return 0; } int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, @@ -57,14 +107,13 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; - int num_devs = 1; + int num_devs; int queue_refs = 0; struct fscrypt_blk_crypto_key *blk_key; int err; int i; - if (sb->s_cop->get_num_devices) - num_devs = sb->s_cop->get_num_devices(sb); + num_devs = fscrypt_get_num_devices(sb); if (WARN_ON(num_devs < 1)) return -EINVAL; @@ -73,10 +122,7 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, return -ENOMEM; blk_key->num_devs = num_devs; - if (num_devs == 1) - blk_key->devs[0] = bdev_get_queue(sb->s_bdev); - else - sb->s_cop->get_devices(sb, blk_key->devs); + fscrypt_get_devices(sb, num_devs, blk_key->devs); BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index e52bbbe68dc8..6b98ca38fc44 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -328,8 +328,6 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, struct fscrypt_key_specifier mk_spec; int err; - fscrypt_select_encryption_impl(ci); - switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR; @@ -354,6 +352,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, ci->ci_policy.version != FSCRYPT_POLICY_V1) return PTR_ERR(key); + err = fscrypt_select_encryption_impl(ci, false); + if (err) + return err; + /* * As a legacy fallback for v1 policies, search for the key in * the current task's subscribed keyrings too. Don't move this @@ -388,6 +390,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, goto out_release_key; } + err = fscrypt_select_encryption_impl(ci, mk->mk_secret.is_hw_wrapped); + if (err) + goto out_release_key; + switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw); -- GitLab From 2d578d836a8c886871add1943d6f824b2365e497 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Apr 2020 01:18:37 -0700 Subject: [PATCH 0789/1278] BACKPORT: FROMGIT: kbuild: mkcompile_h: Include $LD version in /proc/version When doing Clang builds of the kernel, it is possible to link with either ld.bfd (binutils) or ld.lld (LLVM), but it is not possible to discover this from a running kernel. Add the "$LD -v" output to /proc/version. Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Fangrui Song Reviewed-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Masahiro Yamada Bug: 153484457 (cherry picked from commit 6f04f056df3c https://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git for-next) [nd: commit 4b950bb9ac0c ("Kbuild: Handle PREEMPT_RT for version string and magic") missing in 4.14, first landed in 5.4-rc1. commit b79c6aa6a1f1 ("kbuild: remove unnecessary in-subshell execution") missing in 4.14, first landed in 5.1-rc1. ] Change-Id: Ifa5a98fe159392862e8d07a733c0f141fa9c7715 Signed-off-by: Nick Desaulniers --- init/Makefile | 5 +++-- scripts/mkcompile_h | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/init/Makefile b/init/Makefile index a04f1c18b58c..edb39edd0241 100644 --- a/init/Makefile +++ b/init/Makefile @@ -32,5 +32,6 @@ $(obj)/version.o: include/generated/compile.h silent_chk_compile.h = : include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \ + "$(CC) $(KBUILD_CFLAGS)" "$(LD)" diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 959199c3147e..29a1589e2f0b 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -6,6 +6,7 @@ ARCH=$2 SMP=$3 PREEMPT=$4 CC=$5 +LD=$6 vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } @@ -77,7 +78,10 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\" echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | grep ' version ' | sed 's/[[:space:]]*$//'`\" + CC_VERSION=$($CC -v 2>&1 | grep ' version ' | sed 's/[[:space:]]*$//') + LD_VERSION=$($LD -v | head -n1 | sed 's/(compatible with [^)]*)//' \ + | sed 's/[[:space:]]*$//') + printf '#define LINUX_COMPILER "%s"\n' "$CC_VERSION, $LD_VERSION" ) > .tmpcompile # Only replace the real compile.h if the new one is different, -- GitLab From 21889f3256ab0818940aaed4b6b983caaff78b9b Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 8 Jan 2020 14:23:11 -0600 Subject: [PATCH 0790/1278] UPSTREAM: power: supply: core: Update sysfs-class-power ABI document Add the "Over Current" string to /sys/class/power_supply//health description. Bug: 149071038 Test: Builds Fixes: e3e83cc601e57 ("power: supply: core: Add POWER_SUPPLY_HEALTH_OVERCURRENT constant") Signed-off-by: Dan Murphy Signed-off-by: Sebastian Reichel (cherry picked from commit 333853be564526ecd74e9cbdf3e28650a47ad85d) Signed-off-by: Sandeep Patil Change-Id: Icd423387bde55285bd18d6871bba6f37efd2e034 --- Documentation/ABI/testing/sysfs-class-power | 456 ++++++++++++++++++++ 1 file changed, 456 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index f85ce9e327b9..3bddc0bb7290 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -1,3 +1,459 @@ +===== General Properties ===== + +What: /sys/class/power_supply//manufacturer +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the name of the device manufacturer. + + Access: Read + Valid values: Represented as string + +What: /sys/class/power_supply//model_name +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the name of the device model. + + Access: Read + Valid values: Represented as string + +What: /sys/class/power_supply//serial_number +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the serial number of the device. + + Access: Read + Valid values: Represented as string + +What: /sys/class/power_supply//type +Date: May 2010 +Contact: linux-pm@vger.kernel.org +Description: + Describes the main type of the supply. + + Access: Read + Valid values: "Battery", "UPS", "Mains", "USB" + +===== Battery Properties ===== + +What: /sys/class/power_supply//capacity +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Fine grain representation of battery capacity. + Access: Read + Valid values: 0 - 100 (percent) + +What: /sys/class/power_supply//capacity_alert_max +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Maximum battery capacity trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery discharging scenario where user-space needs to know the + battery has dropped to an upper level so it can take + appropriate action (e.g. warning user that battery level is + low). + + Access: Read, Write + Valid values: 0 - 100 (percent) + +What: /sys/class/power_supply//capacity_alert_min +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Minimum battery capacity trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery discharging scenario where user-space needs to know the + battery has dropped to a lower level so it can take + appropriate action (e.g. warning user that battery level is + critically low). + + Access: Read, Write + Valid values: 0 - 100 (percent) + +What: /sys/class/power_supply//capacity_level +Date: June 2009 +Contact: linux-pm@vger.kernel.org +Description: + Coarse representation of battery capacity. + + Access: Read + Valid values: "Unknown", "Critical", "Low", "Normal", "High", + "Full" + +What: /sys/class/power_supply//current_avg +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an average IBAT current reading for the battery, over a + fixed period. Normally devices will provide a fixed interval in + which they average readings to smooth out the reported value. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//current_max +Date: October 2010 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum IBAT current allowed into the battery. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//current_now +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an instant, single IBAT current reading for the battery. + This value is not averaged/smoothed. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//charge_type +Date: July 2009 +Contact: linux-pm@vger.kernel.org +Description: + Represents the type of charging currently being applied to the + battery. + + Access: Read + Valid values: "Unknown", "N/A", "Trickle", "Fast" + +What: /sys/class/power_supply//charge_term_current +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the charging current value which is used to determine + when the battery is considered full and charging should end. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//health +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the health of the battery or battery side of charger + functionality. + + Access: Read + Valid values: "Unknown", "Good", "Overheat", "Dead", + "Over voltage", "Unspecified failure", "Cold", + "Watchdog timer expire", "Safety timer expire", + "Over current" + +What: /sys/class/power_supply//precharge_current +Date: June 2017 +Contact: linux-pm@vger.kernel.org +Description: + Reports the charging current applied during pre-charging phase + for a battery charge cycle. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//present +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports whether a battery is present or not in the system. + + Access: Read + Valid values: + 0: Absent + 1: Present + +What: /sys/class/power_supply//status +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Represents the charging status of the battery. Normally this + is read-only reporting although for some supplies this can be + used to enable/disable charging to the battery. + + Access: Read, Write + Valid values: "Unknown", "Charging", "Discharging", + "Not charging", "Full" + +What: /sys/class/power_supply//technology +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Describes the battery technology supported by the supply. + + Access: Read + Valid values: "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", + "NiCd", "LiMn" + +What: /sys/class/power_supply//temp +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the current TBAT battery temperature reading. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_max +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Maximum TBAT temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery charging scenario where user-space needs to know the + battery temperature has crossed an upper threshold so it can + take appropriate action (e.g. warning user that battery level is + critically high, and charging has stopped). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_min +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Minimum TBAT temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery charging scenario where user-space needs to know the + battery temperature has crossed a lower threshold so it can take + appropriate action (e.g. warning user that battery level is + high, and charging current has been reduced accordingly to + remedy the situation). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_max +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum allowed TBAT battery temperature for + charging. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_min +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the minimum allowed TBAT battery temperature for + charging. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//voltage_avg, +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an average VBAT voltage reading for the battery, over a + fixed period. Normally devices will provide a fixed interval in + which they average readings to smooth out the reported value. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_max, +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum safe VBAT voltage permitted for the battery, + during charging. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_min, +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the minimum safe VBAT voltage permitted for the battery, + during discharging. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_now, +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an instant, single VBAT voltage reading for the battery. + This value is not averaged/smoothed. + + Access: Read + Valid values: Represented in microvolts + +===== USB Properties ===== + +What: /sys/class/power_supply//current_avg +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an average IBUS current reading over a fixed period. + Normally devices will provide a fixed interval in which they + average readings to smooth out the reported value. + + Access: Read + Valid values: Represented in microamps + + +What: /sys/class/power_supply//current_max +Date: October 2010 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum IBUS current the supply can support. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//current_now +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the IBUS current supplied now. This value is generally + read-only reporting, unless the 'online' state of the supply + is set to be programmable, in which case this value can be set + within the reported min/max range. + + Access: Read, Write + Valid values: Represented in microamps + +What: /sys/class/power_supply//input_current_limit +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Details the incoming IBUS current limit currently set in the + supply. Normally this is configured based on the type of + connection made (e.g. A configured SDP should output a maximum + of 500mA so the input current limit is set to the same value). + + Access: Read, Write + Valid values: Represented in microamps + +What: /sys/class/power_supply//online, +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Indicates if VBUS is present for the supply. When the supply is + online, and the supply allows it, then it's possible to switch + between online states (e.g. Fixed -> Programmable for a PD_PPS + USB supply so voltage and current can be controlled). + + Access: Read, Write + Valid values: + 0: Offline + 1: Online Fixed - Fixed Voltage Supply + 2: Online Programmable - Programmable Voltage Supply + +What: /sys/class/power_supply//temp +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the current supply temperature reading. This would + normally be the internal temperature of the device itself (e.g + TJUNC temperature of an IC) + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_max +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Maximum supply temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + charging scenario where user-space needs to know the supply + temperature has crossed an upper threshold so it can take + appropriate action (e.g. warning user that the supply + temperature is critically high, and charging has stopped to + remedy the situation). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_min +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Minimum supply temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + charging scenario where user-space needs to know the supply + temperature has crossed a lower threshold so it can take + appropriate action (e.g. warning user that the supply + temperature is high, and charging current has been reduced + accordingly to remedy the situation). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_max +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum allowed supply temperature for operation. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_min +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the mainimum allowed supply temperature for operation. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//usb_type +Date: March 2018 +Contact: linux-pm@vger.kernel.org +Description: + Reports what type of USB connection is currently active for + the supply, for example it can show if USB-PD capable source + is attached. + + Access: Read-Only + Valid values: "Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD", + "PD_DRP", "PD_PPS", "BrickID" + +What: /sys/class/power_supply//voltage_max +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum VBUS voltage the supply can support. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_min +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the minimum VBUS voltage the supply can support. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_now +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the VBUS voltage supplied now. This value is generally + read-only reporting, unless the 'online' state of the supply + is set to be programmable, in which case this value can be set + within the reported min/max range. + + Access: Read, Write + Valid values: Represented in microvolts + +===== Device Specific Properties ===== + What: /sys/class/power/ds2760-battery.*/charge_now Date: May 2010 KernelVersion: 2.6.35 -- GitLab From 3463750c5ba30e0fd1d2f10805568e569a6cb244 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Thu, 16 Jan 2020 11:50:37 -0600 Subject: [PATCH 0791/1278] FROMLIST: power_supply: Add additional health properties to the header Add HEALTH_WARM, HEALTH_COOL and HEALTH_HOT to the health enum. Bug: 149071038 Test: Builds Link: https://lore.kernel.org/linux-pm/20200116175039.1317-3-dmurphy@ti.com/ Signed-off-by: Dan Murphy Tested-by: Guru Das Srinagesh Signed-off-by: Sandeep Patil Change-Id: I5a99577e8c8c38c2ea10a339223c177d18c93d37 --- Documentation/ABI/testing/sysfs-class-power | 2 +- drivers/power/supply/power_supply_sysfs.c | 2 +- include/linux/power_supply.h | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index 3bddc0bb7290..c1075ecfdb4b 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -145,7 +145,7 @@ Description: Valid values: "Unknown", "Good", "Overheat", "Dead", "Over voltage", "Unspecified failure", "Cold", "Watchdog timer expire", "Safety timer expire", - "Over current" + "Over current", "Warm", "Cool", "Hot" What: /sys/class/power_supply//precharge_current Date: June 2017 diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 2bbda7f7db04..641b301ce3f5 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -57,7 +57,7 @@ static const char * const power_supply_charge_type_text[] = { static const char * const power_supply_health_text[] = { "Unknown", "Good", "Overheat", "Dead", "Over voltage", "Unspecified failure", "Cold", "Watchdog timer expire", - "Safety timer expire" + "Safety timer expire", "Over current", "Warm", "Cool", "Hot" }; static const char * const power_supply_technology_text[] = { diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c25e166ea7d0..dcc84653ecfd 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -58,6 +58,10 @@ enum { POWER_SUPPLY_HEALTH_COLD, POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE, POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE, + POWER_SUPPLY_HEALTH_OVERCURRENT, + POWER_SUPPLY_HEALTH_WARM, + POWER_SUPPLY_HEALTH_COOL, + POWER_SUPPLY_HEALTH_HOT, }; enum { -- GitLab From a10564b7495e8634ae2e1201da096df52da08bbf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:49 -0700 Subject: [PATCH 0792/1278] fscrypt: add FS_IOC_GET_ENCRYPTION_NONCE ioctl Add an ioctl FS_IOC_GET_ENCRYPTION_NONCE which retrieves the nonce from an encrypted file or directory. The nonce is the 16-byte random value stored in the inode's encryption xattr. It is normally used together with the master key to derive the inode's actual encryption key. The nonces are needed by automated tests that verify the correctness of the ciphertext on-disk. Except for the IV_INO_LBLK_64 case, there's no way to replicate a file's ciphertext without knowing that file's nonce. The nonces aren't secret, and the existing ciphertext verification tests in xfstests retrieve them from disk using debugfs or dump.f2fs. But in environments that lack these debugging tools, getting the nonces by manually parsing the filesystem structure would be very hard. To make this important type of testing much easier, let's just add an ioctl that retrieves the nonce. Link: https://lore.kernel.org/r/20200314205052.93294-2-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 11 +++++++++++ fs/crypto/fscrypt_private.h | 20 ++++++++++++++++++++ fs/crypto/keysetup.c | 16 ++-------------- fs/crypto/policy.c | 21 ++++++++++++++++++++- include/linux/fscrypt.h | 6 ++++++ include/uapi/linux/fscrypt.h | 1 + 6 files changed, 60 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index fbcd185d15da..dc444b8d3704 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -633,6 +633,17 @@ from a passphrase or other low-entropy user credential. FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to generate and manage any needed salt(s) in userspace. +Getting a file's encryption nonce +--------------------------------- + +Since Linux v5.7, the ioctl FS_IOC_GET_ENCRYPTION_NONCE is supported. +On encrypted files and directories it gets the inode's 16-byte nonce. +On unencrypted files and directories, it fails with ENODATA. + +This ioctl can be useful for automated tests which verify that the +encryption is being done correctly. It is not needed for normal use +of fscrypt. + Adding keys ----------- diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 9aae851409e5..dbced2937ec8 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -76,6 +76,26 @@ static inline int fscrypt_context_size(const union fscrypt_context *ctx) return 0; } +/* Check whether an fscrypt_context has a recognized version number and size */ +static inline bool fscrypt_context_is_valid(const union fscrypt_context *ctx, + int ctx_size) +{ + return ctx_size >= 1 && ctx_size == fscrypt_context_size(ctx); +} + +/* Retrieve the context's nonce, assuming the context was already validated */ +static inline const u8 *fscrypt_context_nonce(const union fscrypt_context *ctx) +{ + switch (ctx->version) { + case FSCRYPT_CONTEXT_V1: + return ctx->v1.nonce; + case FSCRYPT_CONTEXT_V2: + return ctx->v2.nonce; + } + WARN_ON(1); + return NULL; +} + #undef fscrypt_policy union fscrypt_policy { u8 version; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 84039a163585..c0b30ea18f11 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -425,20 +425,8 @@ int fscrypt_get_encryption_info(struct inode *inode) goto out; } - switch (ctx.version) { - case FSCRYPT_CONTEXT_V1: - memcpy(crypt_info->ci_nonce, ctx.v1.nonce, - FS_KEY_DERIVATION_NONCE_SIZE); - break; - case FSCRYPT_CONTEXT_V2: - memcpy(crypt_info->ci_nonce, ctx.v2.nonce, - FS_KEY_DERIVATION_NONCE_SIZE); - break; - default: - WARN_ON(1); - res = -EINVAL; - goto out; - } + memcpy(crypt_info->ci_nonce, fscrypt_context_nonce(&ctx), + FS_KEY_DERIVATION_NONCE_SIZE); if (!fscrypt_supported_policy(&crypt_info->ci_policy, inode)) { res = -EINVAL; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index cf2a9d26ef7d..10ccf945020c 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -258,7 +258,7 @@ int fscrypt_policy_from_context(union fscrypt_policy *policy_u, { memset(policy_u, 0, sizeof(*policy_u)); - if (ctx_size <= 0 || ctx_size != fscrypt_context_size(ctx_u)) + if (!fscrypt_context_is_valid(ctx_u, ctx_size)) return -EINVAL; switch (ctx_u->version) { @@ -481,6 +481,25 @@ int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *uarg) } EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_policy_ex); +/* FS_IOC_GET_ENCRYPTION_NONCE: retrieve file's encryption nonce for testing */ +int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) +{ + struct inode *inode = file_inode(filp); + union fscrypt_context ctx; + int ret; + + ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); + if (ret < 0) + return ret; + if (!fscrypt_context_is_valid(&ctx, ret)) + return -EINVAL; + if (copy_to_user(arg, fscrypt_context_nonce(&ctx), + FS_KEY_DERIVATION_NONCE_SIZE)) + return -EFAULT; + return 0; +} +EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_nonce); + /** * fscrypt_has_permitted_context() - is a file's encryption policy permitted * within its directory? diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index fd1bc965a3e3..a2314004416e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -139,6 +139,7 @@ extern void fscrypt_free_bounce_page(struct page *bounce_page); extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *); extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *); +extern int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); extern int fscrypt_has_permitted_context(struct inode *, struct inode *); extern int fscrypt_inherit_context(struct inode *, struct inode *, void *, bool); @@ -295,6 +296,11 @@ static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, return -EOPNOTSUPP; } +static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 0d8a6f47711c..a10e3cdc2839 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -163,6 +163,7 @@ struct fscrypt_get_key_status_arg { #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg) #define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg) +#define FS_IOC_GET_ENCRYPTION_NONCE _IOR('f', 27, __u8[16]) /**********************************************************************/ -- GitLab From b56fd485f2217d21c6d7ca705ff8bf02a4059f7c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:50 -0700 Subject: [PATCH 0793/1278] ext4: wire up FS_IOC_GET_ENCRYPTION_NONCE This new ioctl retrieves a file's encryption nonce, which is useful for testing. See the corresponding fs/crypto/ patch for more details. Link: https://lore.kernel.org/r/20200314205052.93294-3-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/ext4/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index faeeb4ec1e4c..09d1d92628cb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1034,6 +1034,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EOPNOTSUPP; return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); + case FS_IOC_GET_ENCRYPTION_NONCE: + if (!ext4_has_feature_encrypt(sb)) + return -EOPNOTSUPP; + return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); + case EXT4_IOC_FSGETXATTR: { struct fsxattr fa; @@ -1174,6 +1179,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + case FS_IOC_GET_ENCRYPTION_NONCE: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: -- GitLab From 9e24ce3fa7089baf43f3ca21353caa370a505ee3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:51 -0700 Subject: [PATCH 0794/1278] f2fs: wire up FS_IOC_GET_ENCRYPTION_NONCE This new ioctl retrieves a file's encryption nonce, which is useful for testing. See the corresponding fs/crypto/ patch for more details. Link: https://lore.kernel.org/r/20200314205052.93294-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/f2fs/file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 80750bab5fef..509ef157105c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2428,6 +2428,14 @@ static int f2fs_ioc_get_encryption_key_status(struct file *filp, return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); } +static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); +} + static int f2fs_ioc_gc(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -3395,6 +3403,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_remove_encryption_key_all_users(filp, arg); case FS_IOC_GET_ENCRYPTION_KEY_STATUS: return f2fs_ioc_get_encryption_key_status(filp, arg); + case FS_IOC_GET_ENCRYPTION_NONCE: + return f2fs_ioc_get_encryption_nonce(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); case F2FS_IOC_GARBAGE_COLLECT_RANGE: @@ -3568,6 +3578,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + case FS_IOC_GET_ENCRYPTION_NONCE: case F2FS_IOC_GARBAGE_COLLECT: case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: -- GitLab From 3dcb4bdce902a2403a8917e81034297a23a0b5cd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:52 -0700 Subject: [PATCH 0795/1278] ubifs: wire up FS_IOC_GET_ENCRYPTION_NONCE This new ioctl retrieves a file's encryption nonce, which is useful for testing. See the corresponding fs/crypto/ patch for more details. Link: https://lore.kernel.org/r/20200314205052.93294-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ubifs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 71c344001574..236fe94229a3 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -220,6 +220,9 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: return fscrypt_ioctl_get_key_status(file, (void __user *)arg); + case FS_IOC_GET_ENCRYPTION_NONCE: + return fscrypt_ioctl_get_nonce(file, (void __user *)arg); + default: return -ENOTTY; } @@ -242,6 +245,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + case FS_IOC_GET_ENCRYPTION_NONCE: break; default: return -ENOIOCTLCMD; -- GitLab From 8c29afa6013897f201e7188d27eb2c60564070f9 Mon Sep 17 00:00:00 2001 From: Kelly Rossmoyer Date: Tue, 7 Apr 2020 12:25:33 -0700 Subject: [PATCH 0796/1278] ANDROID: power: wakeup_reason: wake reason enhancements These changes build upon the existing Android kernel wakeup reason code to: * improve the positioning of suspend abort logging calls in suspend flow * add logging of abnormal wakeup reasons like unexpected HW IRQs and IRQs configured as both wake-enabled and no-suspend * add support for capturing deferred-processing threaded nested IRQs as wakeup reasons rather than their synchronously-processed parents Bug: 150970830 Bug: 140217217 Signed-off-by: Kelly Rossmoyer Change-Id: I903b811a0fe11a605a25815c3a341668a23de700 --- drivers/base/power/main.c | 24 +- drivers/base/power/wakeup.c | 23 +- drivers/irqchip/irq-gic-v3.c | 4 + include/linux/wakeup_reason.h | 11 +- kernel/irq/chip.c | 17 +- kernel/power/process.c | 9 - kernel/power/suspend.c | 20 +- kernel/power/wakeup_reason.c | 417 ++++++++++++++++++++++++---------- 8 files changed, 374 insertions(+), 151 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index df97fb7cbfb5..f3b6afaf09c7 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1173,10 +1173,13 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a } error = dpm_run_callback(callback, dev, state, info); - if (!error) + if (!error) { dev->power.is_noirq_suspended = true; - else + } else { async_error = error; + log_suspend_abort_reason("Callback failed on %s in %pS returned %d", + dev_name(dev), callback, error); + } Complete: complete_all(&dev->power.completion); @@ -1334,10 +1337,13 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as } error = dpm_run_callback(callback, dev, state, info); - if (!error) + if (!error) { dev->power.is_late_suspended = true; - else + } else { async_error = error; + log_suspend_abort_reason("Callback failed on %s in %pS returned %d", + dev_name(dev), callback, error); + } Complete: TRACE_SUSPEND(error); @@ -1495,7 +1501,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; const char *info = NULL; int error = 0; - char suspend_abort[MAX_SUSPEND_ABORT_LEN]; DECLARE_DPM_WATCHDOG_ON_STACK(wd); TRACE_DEVICE(dev); @@ -1518,9 +1523,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { - pm_get_active_wakeup_sources(suspend_abort, - MAX_SUSPEND_ABORT_LEN); - log_suspend_abort_reason(suspend_abort); dev->power.direct_complete = false; async_error = -EBUSY; goto Complete; @@ -1599,7 +1601,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dev->power.is_suspended = true; if (parent) { spin_lock_irq(&parent->power.lock); - dev->parent->power.direct_complete = false; if (dev->power.wakeup_path && !dev->parent->power.ignore_children) @@ -1608,6 +1609,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) spin_unlock_irq(&parent->power.lock); } dpm_clear_suppliers_direct_complete(dev); + } else { + log_suspend_abort_reason("Callback failed on %s in %pS returned %d", + dev_name(dev), callback, error); } device_unlock(dev); @@ -1817,6 +1821,8 @@ int dpm_prepare(pm_message_t state) printk(KERN_INFO "PM: Device %s not prepared " "for power transition: code %d\n", dev_name(dev), error); + log_suspend_abort_reason("Device %s not prepared for power transition: code %d", + dev_name(dev), error); dpm_save_failed_dev(dev_name(dev)); put_device(dev); break; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 6f7e8f19c0a2..cf5ac5668ea6 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -15,7 +15,9 @@ #include #include #include -#include +#include +#include +#include #include #include "power.h" @@ -925,6 +927,7 @@ bool pm_wakeup_pending(void) { unsigned long flags; bool ret = false; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; spin_lock_irqsave(&events_lock, flags); if (events_check_enabled) { @@ -937,8 +940,10 @@ bool pm_wakeup_pending(void) spin_unlock_irqrestore(&events_lock, flags); if (ret) { - pr_info("PM: Wakeup pending, aborting suspend\n"); - pm_print_active_wakeup_sources(); + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); + pr_info("PM: %s\n", suspend_abort); } return ret || atomic_read(&pm_abort_suspend) > 0; @@ -966,6 +971,18 @@ void pm_wakeup_clear(bool reset) void pm_system_irq_wakeup(unsigned int irq_number) { if (pm_wakeup_irq == 0) { + struct irq_desc *desc; + const char *name = "null"; + + desc = irq_to_desc(irq_number); + if (desc == NULL) + name = "stray irq"; + else if (desc->action && desc->action->name) + name = desc->action->name; + + log_irq_wakeup_reason(irq_number); + pr_warn("%s: %d triggered %s\n", __func__, irq_number, name); + pm_wakeup_irq = irq_number; pm_system_wakeup(); } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 7d34ab83bb7f..483f8ac27eea 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -28,6 +28,8 @@ #include #include #include +#include + #include #include @@ -359,6 +361,8 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs err = handle_domain_irq(gic_data.domain, irqnr, regs); if (err) { WARN_ONCE(true, "Unexpected interrupt received!\n"); + log_abnormal_wakeup_reason( + "unexpected HW IRQ %u", irqnr); if (static_key_true(&supports_deactivate)) { if (irqnr < 8192) gic_write_dir(irqnr); diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index d84d8c301546..54f5caaa5cde 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -20,13 +20,18 @@ #define MAX_SUSPEND_ABORT_LEN 256 -void log_wakeup_reason(int irq); -int check_wakeup_reason(int irq); - #ifdef CONFIG_SUSPEND +void log_irq_wakeup_reason(int irq); +void log_threaded_irq_wakeup_reason(int irq, int parent_irq); void log_suspend_abort_reason(const char *fmt, ...); +void log_abnormal_wakeup_reason(const char *fmt, ...); +void clear_wakeup_reasons(void); #else +static inline void log_irq_wakeup_reason(int irq) { } +static inline void log_threaded_irq_wakeup_reason(int irq, int parent_irq) { } static inline void log_suspend_abort_reason(const char *fmt, ...) { } +static inline void log_abnormal_wakeup_reason(const char *fmt, ...) { } +static inline void clear_wakeup_reasons(void) { } #endif #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 317fc759de76..9da08b53d06a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -480,8 +481,22 @@ static bool irq_may_run(struct irq_desc *desc) * If the interrupt is not in progress and is not an armed * wakeup interrupt, proceed. */ - if (!irqd_has_set(&desc->irq_data, mask)) + if (!irqd_has_set(&desc->irq_data, mask)) { +#ifdef CONFIG_PM_SLEEP + if (unlikely(desc->no_suspend_depth && + irqd_is_wakeup_set(&desc->irq_data))) { + unsigned int irq = irq_desc_get_irq(desc); + const char *name = "(unnamed)"; + + if (desc->action && desc->action->name) + name = desc->action->name; + + log_abnormal_wakeup_reason("misconfigured IRQ %u %s", + irq, name); + } +#endif return true; + } /* * If the interrupt is an armed wakeup source, mark it pending diff --git a/kernel/power/process.c b/kernel/power/process.c index c366e3d34a07..d76e61606f51 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,6 @@ #include #include #include -#include /* * Timeout for stopping processes @@ -39,9 +38,6 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; -#ifdef CONFIG_PM_SLEEP - char suspend_abort[MAX_SUSPEND_ABORT_LEN]; -#endif start = ktime_get_boottime(); @@ -71,11 +67,6 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { -#ifdef CONFIG_PM_SLEEP - pm_get_active_wakeup_sources(suspend_abort, - MAX_SUSPEND_ABORT_LEN); - log_suspend_abort_reason(suspend_abort); -#endif wakeup = true; break; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 053b3014a220..dad1b8127560 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -148,6 +148,7 @@ static void s2idle_loop(void) break; pm_wakeup_clear(false); + clear_wakeup_reasons(); } pm_pr_dbg("resume from suspend-to-idle\n"); @@ -361,6 +362,7 @@ static int suspend_prepare(suspend_state_t state) if (!error) return 0; + log_suspend_abort_reason("One or more tasks refusing to freeze"); suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: @@ -390,7 +392,6 @@ void __weak arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int error, last_dev; error = platform_suspend_prepare(state); @@ -402,8 +403,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; last_dev %= REC_FAILED_NUM; pr_err("late suspend of devices failed\n"); - log_suspend_abort_reason("%s device failed to power down", - suspend_stats.failed_devs[last_dev]); + log_suspend_abort_reason("late suspend of %s device failed", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } error = platform_suspend_prepare_late(state); @@ -421,7 +422,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) last_dev %= REC_FAILED_NUM; pr_err("noirq suspend of devices failed\n"); log_suspend_abort_reason("noirq suspend of %s device failed", - suspend_stats.failed_devs[last_dev]); + suspend_stats.failed_devs[last_dev]); goto Platform_early_resume; } error = platform_suspend_prepare_noirq(state); @@ -450,9 +451,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) trace_suspend_resume(TPS("machine_suspend"), state, false); } else if (*wakeup) { - pm_get_active_wakeup_sources(suspend_abort, - MAX_SUSPEND_ABORT_LEN); - log_suspend_abort_reason(suspend_abort); error = -EBUSY; } syscore_resume(); @@ -485,7 +483,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) */ int suspend_devices_and_enter(suspend_state_t state) { - int error, last_dev; + int error; bool wakeup = false; if (!sleep_state_supported(state)) @@ -501,11 +499,9 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { - last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; - last_dev %= REC_FAILED_NUM; pr_err("Some devices failed to suspend, or early wake event detected\n"); - log_suspend_abort_reason("%s device failed to suspend, or early wake event detected", - suspend_stats.failed_devs[last_dev]); + log_suspend_abort_reason( + "Some devices failed to suspend, or early wake event detected"); goto Recover_platform; } suspend_test_finish("suspend devices"); diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 252611fad2fe..503a71fc49fc 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -4,7 +4,7 @@ * Logs the reasons which caused the kernel to resume from * the suspend mode. * - * Copyright (C) 2014 Google, Inc. + * Copyright (C) 2020 Google, Inc. * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. @@ -26,70 +26,312 @@ #include #include #include +#include +/* + * struct wakeup_irq_node - stores data and relationships for IRQs logged as + * either base or nested wakeup reasons during suspend/resume flow. + * @siblings - for membership on leaf or parent IRQ lists + * @irq - the IRQ number + * @irq_name - the name associated with the IRQ, or a default if none + */ +struct wakeup_irq_node { + struct list_head siblings; + int irq; + const char *irq_name; +}; + +static DEFINE_SPINLOCK(wakeup_reason_lock); + +static LIST_HEAD(leaf_irqs); /* kept in ascending IRQ sorted order */ +static LIST_HEAD(parent_irqs); /* unordered */ -#define MAX_WAKEUP_REASON_IRQS 32 -static int irq_list[MAX_WAKEUP_REASON_IRQS]; -static int irqcount; +static struct kmem_cache *wakeup_irq_nodes_cache; + +static const char *default_irq_name = "(unnamed)"; + +static struct kobject *kobj; + +static bool capture_reasons; static bool suspend_abort; -static char abort_reason[MAX_SUSPEND_ABORT_LEN]; -static struct kobject *wakeup_reason; -static DEFINE_SPINLOCK(resume_reason_lock); +static bool abnormal_wake; +static char non_irq_wake_reason[MAX_SUSPEND_ABORT_LEN]; static ktime_t last_monotime; /* monotonic time before last suspend */ static ktime_t curr_monotime; /* monotonic time after last suspend */ static ktime_t last_stime; /* monotonic boottime offset before last suspend */ static ktime_t curr_stime; /* monotonic boottime offset after last suspend */ -static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) +static void init_node(struct wakeup_irq_node *p, int irq) { - int irq_no, buf_offset = 0; struct irq_desc *desc; - spin_lock(&resume_reason_lock); - if (suspend_abort) { - buf_offset = sprintf(buf, "Abort: %s", abort_reason); - } else { - for (irq_no = 0; irq_no < irqcount; irq_no++) { - desc = irq_to_desc(irq_list[irq_no]); - if (desc && desc->action && desc->action->name) - buf_offset += sprintf(buf + buf_offset, "%d %s\n", - irq_list[irq_no], desc->action->name); + + INIT_LIST_HEAD(&p->siblings); + + p->irq = irq; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + p->irq_name = desc->action->name; + else + p->irq_name = default_irq_name; +} + +static struct wakeup_irq_node *create_node(int irq) +{ + struct wakeup_irq_node *result; + + result = kmem_cache_alloc(wakeup_irq_nodes_cache, GFP_ATOMIC); + if (unlikely(!result)) + pr_warn("Failed to log wakeup IRQ %d\n", irq); + else + init_node(result, irq); + + return result; +} + +static void delete_list(struct list_head *head) +{ + struct wakeup_irq_node *n; + + while (!list_empty(head)) { + n = list_first_entry(head, struct wakeup_irq_node, siblings); + list_del(&n->siblings); + kmem_cache_free(wakeup_irq_nodes_cache, n); + } +} + +static bool add_sibling_node_sorted(struct list_head *head, int irq) +{ + struct wakeup_irq_node *n; + struct list_head *predecessor = head; + + if (unlikely(WARN_ON(!head))) + return NULL; + + if (!list_empty(head)) + list_for_each_entry(n, head, siblings) { + if (n->irq < irq) + predecessor = &n->siblings; + else if (n->irq == irq) + return true; else - buf_offset += sprintf(buf + buf_offset, "%d\n", - irq_list[irq_no]); + break; + } + + n = create_node(irq); + if (n) { + list_add(&n->siblings, predecessor); + return true; + } + + return false; +} + +static struct wakeup_irq_node *find_node_in_list(struct list_head *head, + int irq) +{ + struct wakeup_irq_node *n; + + if (unlikely(WARN_ON(!head))) + return NULL; + + list_for_each_entry(n, head, siblings) + if (n->irq == irq) + return n; + + return NULL; +} + +void log_irq_wakeup_reason(int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + if (!capture_reasons) { + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + if (find_node_in_list(&parent_irqs, irq) == NULL) + add_sibling_node_sorted(&leaf_irqs, irq); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +void log_threaded_irq_wakeup_reason(int irq, int parent_irq) +{ + struct wakeup_irq_node *parent; + unsigned long flags; + + /* + * Intentionally unsynchronized. Calls that come in after we have + * resumed should have a fast exit path since there's no work to be + * done, any any coherence issue that could cause a wrong value here is + * both highly improbable - given the set/clear timing - and very low + * impact (parent IRQ gets logged instead of the specific child). + */ + if (!capture_reasons) + return; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + if (!capture_reasons || (find_node_in_list(&leaf_irqs, irq) != NULL)) { + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + parent = find_node_in_list(&parent_irqs, parent_irq); + if (parent != NULL) + add_sibling_node_sorted(&leaf_irqs, irq); + else { + parent = find_node_in_list(&leaf_irqs, parent_irq); + if (parent != NULL) { + list_del_init(&parent->siblings); + list_add_tail(&parent->siblings, &parent_irqs); + add_sibling_node_sorted(&leaf_irqs, irq); } } - spin_unlock(&resume_reason_lock); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +void __log_abort_or_abnormal_wake(bool abort, const char *fmt, va_list args) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + /* Suspend abort or abnormal wake reason has already been logged. */ + if (suspend_abort || abnormal_wake) { + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + suspend_abort = abort; + abnormal_wake = !abort; + vsnprintf(non_irq_wake_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + __log_abort_or_abnormal_wake(true, fmt, args); + va_end(args); +} + +void log_abnormal_wakeup_reason(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + __log_abort_or_abnormal_wake(false, fmt, args); + va_end(args); +} + +void clear_wakeup_reasons(void) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + delete_list(&leaf_irqs); + delete_list(&parent_irqs); + suspend_abort = false; + abnormal_wake = false; + capture_reasons = true; + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +static void print_wakeup_sources(void) +{ + struct wakeup_irq_node *n; + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + capture_reasons = false; + + if (suspend_abort) { + pr_info("Abort: %s\n", non_irq_wake_reason); + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + if (!list_empty(&leaf_irqs)) + list_for_each_entry(n, &leaf_irqs, siblings) + pr_info("Resume caused by IRQ %d, %s\n", n->irq, + n->irq_name); + else if (abnormal_wake) + pr_info("Resume caused by %s\n", non_irq_wake_reason); + else + pr_info("Resume cause unknown\n"); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +static ssize_t last_resume_reason_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t buf_offset = 0; + struct wakeup_irq_node *n; + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + if (suspend_abort) { + buf_offset = scnprintf(buf, PAGE_SIZE, "Abort: %s", + non_irq_wake_reason); + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return buf_offset; + } + + if (!list_empty(&leaf_irqs)) + list_for_each_entry(n, &leaf_irqs, siblings) + buf_offset += scnprintf(buf + buf_offset, + PAGE_SIZE - buf_offset, + "%d %s\n", n->irq, n->irq_name); + else if (abnormal_wake) + buf_offset = scnprintf(buf, PAGE_SIZE, "-1 %s", + non_irq_wake_reason); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return buf_offset; } static ssize_t last_suspend_time_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct timespec sleep_time; - struct timespec total_time; - struct timespec suspend_resume_time; + struct timespec64 sleep_time; + struct timespec64 total_time; + struct timespec64 suspend_resume_time; /* * total_time is calculated from monotonic bootoffsets because * unlike CLOCK_MONOTONIC it include the time spent in suspend state. */ - total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime)); + total_time = ktime_to_timespec64(ktime_sub(curr_stime, last_stime)); /* * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC) * time interval before entering suspend and post suspend. */ - suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime)); + suspend_resume_time = + ktime_to_timespec64(ktime_sub(curr_monotime, last_monotime)); /* sleep_time = total_time - suspend_resume_time */ - sleep_time = timespec_sub(total_time, suspend_resume_time); + sleep_time = timespec64_sub(total_time, suspend_resume_time); /* Export suspend_resume_time and sleep_time in pair here. */ - return sprintf(buf, "%lu.%09lu %lu.%09lu\n", - suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, - sleep_time.tv_sec, sleep_time.tv_nsec); + return sprintf(buf, "%llu.%09lu %llu.%09lu\n", + suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, + sleep_time.tv_sec, sleep_time.tv_nsec); } static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); @@ -104,86 +346,24 @@ static struct attribute_group attr_group = { .attrs = attrs, }; -/* - * logs all the wake up reasons to the kernel - * stores the irqs to expose them to the userspace via sysfs - */ -void log_wakeup_reason(int irq) -{ - struct irq_desc *desc; - desc = irq_to_desc(irq); - if (desc && desc->action && desc->action->name) - printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, - desc->action->name); - else - printk(KERN_INFO "Resume caused by IRQ %d\n", irq); - - spin_lock(&resume_reason_lock); - if (irqcount == MAX_WAKEUP_REASON_IRQS) { - spin_unlock(&resume_reason_lock); - printk(KERN_WARNING "Resume caused by more than %d IRQs\n", - MAX_WAKEUP_REASON_IRQS); - return; - } - - irq_list[irqcount++] = irq; - spin_unlock(&resume_reason_lock); -} - -int check_wakeup_reason(int irq) -{ - int irq_no; - int ret = false; - - spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irqcount; irq_no++) - if (irq_list[irq_no] == irq) { - ret = true; - break; - } - spin_unlock(&resume_reason_lock); - return ret; -} - -void log_suspend_abort_reason(const char *fmt, ...) -{ - va_list args; - - spin_lock(&resume_reason_lock); - - //Suspend abort reason has already been logged. - if (suspend_abort) { - spin_unlock(&resume_reason_lock); - return; - } - - suspend_abort = true; - va_start(args, fmt); - vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); - va_end(args); - spin_unlock(&resume_reason_lock); -} - /* Detects a suspend and clears all the previous wake up reasons*/ static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) { switch (pm_event) { case PM_SUSPEND_PREPARE: - spin_lock(&resume_reason_lock); - irqcount = 0; - suspend_abort = false; - spin_unlock(&resume_reason_lock); /* monotonic time since boot */ last_monotime = ktime_get(); /* monotonic time since boot including the time spent in suspend */ last_stime = ktime_get_boottime(); + clear_wakeup_reasons(); break; case PM_POST_SUSPEND: /* monotonic time since boot */ curr_monotime = ktime_get(); /* monotonic time since boot including the time spent in suspend */ curr_stime = ktime_get_boottime(); + print_wakeup_sources(); break; default: break; @@ -195,31 +375,40 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { .notifier_call = wakeup_reason_pm_event, }; -/* Initializes the sysfs parameter - * registers the pm_event notifier - */ int __init wakeup_reason_init(void) { - int retval; - - retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); - if (retval) - printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", - __func__, retval); + if (register_pm_notifier(&wakeup_reason_pm_notifier_block)) { + pr_warn("[%s] failed to register PM notifier\n", __func__); + goto fail; + } - wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); - if (!wakeup_reason) { - printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", - __func__); - return 1; + kobj = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!kobj) { + pr_warn("[%s] failed to create a sysfs kobject\n", __func__); + goto fail_unregister_pm_notifier; } - retval = sysfs_create_group(wakeup_reason, &attr_group); - if (retval) { - kobject_put(wakeup_reason); - printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", - __func__, retval); + + if (sysfs_create_group(kobj, &attr_group)) { + pr_warn("[%s] failed to create a sysfs group\n", __func__); + goto fail_kobject_put; } + + wakeup_irq_nodes_cache = + kmem_cache_create("wakeup_irq_node_cache", + sizeof(struct wakeup_irq_node), 0, 0, NULL); + if (!wakeup_irq_nodes_cache) + goto fail_remove_group; + return 0; + +fail_remove_group: + sysfs_remove_group(kobj, &attr_group); +fail_kobject_put: + kobject_put(kobj); +fail_unregister_pm_notifier: + unregister_pm_notifier(&wakeup_reason_pm_notifier_block); +fail: + return 1; } late_initcall(wakeup_reason_init); -- GitLab From 545d7421fb06364bd17305c5fb9565e7f1e7995e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 25 Mar 2020 18:01:00 -0400 Subject: [PATCH 0797/1278] ipv4: fix a RCU-list lock in fib_triestat_seq_show [ Upstream commit fbe4e0c1b298b4665ee6915266c9d6c5b934ef4a ] fib_triestat_seq_show() calls hlist_for_each_entry_rcu(tb, head, tb_hlist) without rcu_read_lock() will trigger a warning, net/ipv4/fib_trie.c:2579 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by proc01/115277: #0: c0000014507acf00 (&p->lock){+.+.}-{3:3}, at: seq_read+0x58/0x670 Call Trace: dump_stack+0xf4/0x164 (unreliable) lockdep_rcu_suspicious+0x140/0x164 fib_triestat_seq_show+0x750/0x880 seq_read+0x1a0/0x670 proc_reg_read+0x10c/0x1b0 __vfs_read+0x3c/0x70 vfs_read+0xac/0x170 ksys_read+0x7c/0x140 system_call+0x5c/0x68 Fix it by adding a pair of rcu_read_lock/unlock() and use cond_resched_rcu() to avoid the situation where walking of a large number of items may prevent scheduling for a long time. Signed-off-by: Qian Cai Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index bb847d280778..3f9509679f0e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2319,6 +2319,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0)); + rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; @@ -2338,7 +2339,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_show_usage(seq, t->stats); #endif } + cond_resched_rcu(); } + rcu_read_unlock(); return 0; } -- GitLab From b57327db68d3bbf2e0eae2a3398c5adc14237550 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Fri, 27 Mar 2020 19:56:39 +0100 Subject: [PATCH 0798/1278] net, ip_tunnel: fix interface lookup with no key [ Upstream commit 25629fdaff2ff509dd0b3f5ff93d70a75e79e0a1 ] when creating a new ipip interface with no local/remote configuration, the lookup is done with TUNNEL_NO_KEY flag, making it impossible to match the new interface (only possible match being fallback or metada case interface); e.g: `ip link add tunl1 type ipip dev eth0` To fix this case, adding a flag check before the key comparison so we permit to match an interface with no local/remote config; it also avoids breaking possible userland tools relying on TUNNEL_NO_KEY flag and uninitialised key. context being on my side, I'm creating an extra ipip interface attached to the physical one, and moving it to a dedicated namespace. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: William Dauchy Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 404dc765f2bf..f6793017a20d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -155,11 +155,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } - if (flags & TUNNEL_NO_KEY) - goto skip_key_lookup; - hlist_for_each_entry_rcu(t, head, hash_node) { - if (t->parms.i_key != key || + if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || t->parms.iph.saddr != 0 || t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) @@ -171,7 +168,6 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } -skip_key_lookup: if (cand) return cand; -- GitLab From 968f831d9056bcb3fcca031c11d39f4853908307 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 27 Mar 2020 11:07:51 +0800 Subject: [PATCH 0799/1278] sctp: fix refcount bug in sctp_wfree [ Upstream commit 5c3e82fe159622e46e91458c1a6509c321a62820 ] We should iterate over the datamsgs to move all chunks(skbs) to newsk. The following case cause the bug: for the trouble SKB, it was in outq->transmitted list sctp_outq_sack sctp_check_transmitted SKB was moved to outq->sacked list then throw away the sack queue SKB was deleted from outq->sacked (but it was held by datamsg at sctp_datamsg_to_asoc So, sctp_wfree was not called here) then migrate happened sctp_for_each_tx_datachunk( sctp_clear_owner_w); sctp_assoc_migrate(); sctp_for_each_tx_datachunk( sctp_set_owner_w); SKB was not in the outq, and was not changed to newsk finally __sctp_outq_teardown sctp_chunk_put (for another skb) sctp_datamsg_put __kfree_skb(msg->frag_list) sctp_wfree (for SKB) SKB->sk was still oldsk (skb->sk != asoc->base.sk). Reported-and-tested-by: syzbot+cea71eec5d6de256d54d@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 09cda66d0567..442780515760 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -175,29 +175,44 @@ static void sctp_clear_owner_w(struct sctp_chunk *chunk) skb_orphan(chunk->skb); } +#define traverse_and_process() \ +do { \ + msg = chunk->msg; \ + if (msg == prev_msg) \ + continue; \ + list_for_each_entry(c, &msg->chunks, frag_list) { \ + if ((clear && asoc->base.sk == c->skb->sk) || \ + (!clear && asoc->base.sk != c->skb->sk)) \ + cb(c); \ + } \ + prev_msg = msg; \ +} while (0) + static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, + bool clear, void (*cb)(struct sctp_chunk *)) { + struct sctp_datamsg *msg, *prev_msg = NULL; struct sctp_outq *q = &asoc->outqueue; + struct sctp_chunk *chunk, *c; struct sctp_transport *t; - struct sctp_chunk *chunk; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) list_for_each_entry(chunk, &t->transmitted, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->retransmit, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->sacked, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->abandoned, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->out_chunk_list, list) - cb(chunk); + traverse_and_process(); } /* Verify that this is a valid address. */ @@ -8280,9 +8295,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); - sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); + sctp_for_each_tx_datachunk(assoc, true, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); - sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); + sctp_for_each_tx_datachunk(assoc, false, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. -- GitLab From cfc370cb72e6d29e63b6b63c0a4ec015bbd49a0e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 26 Mar 2020 20:47:46 -0300 Subject: [PATCH 0800/1278] sctp: fix possibly using a bad saddr with a given dst [ Upstream commit 582eea230536a6f104097dd46205822005d5fe3a ] Under certain circumstances, depending on the order of addresses on the interfaces, it could be that sctp_v[46]_get_dst() would return a dst with a mismatched struct flowi. For example, if when walking through the bind addresses and the first one is not a match, it saves the dst as a fallback (added in 410f03831c07), but not the flowi. Then if the next one is also not a match, the previous dst will be returned but with the flowi information for the 2nd address, which is wrong. The fix is to use a locally stored flowi that can be used for such attempts, and copy it to the parameter only in case it is a possible match, together with the corresponding dst entry. The patch updates IPv6 code mostly just to be in sync. Even though the issue is also present there, it fallback is not expected to work with IPv6. Fixes: 410f03831c07 ("sctp: add routing output fallback") Reported-by: Jin Meng Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 20 ++++++++++++++------ net/sctp/protocol.c | 28 +++++++++++++++++++--------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 53a66ee1331f..18efb8cc4693 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -235,7 +235,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, { struct sctp_association *asoc = t->asoc; struct dst_entry *dst = NULL; - struct flowi6 *fl6 = &fl->u.ip6; + struct flowi _fl; + struct flowi6 *fl6 = &_fl.u.ip6; struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; @@ -245,7 +246,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, enum sctp_scope scope; __u8 matchlen = 0; - memset(fl6, 0, sizeof(struct flowi6)); + memset(&_fl, 0, sizeof(_fl)); fl6->daddr = daddr->v6.sin6_addr; fl6->fl6_dport = daddr->v6.sin6_port; fl6->flowi6_proto = IPPROTO_SCTP; @@ -271,8 +272,11 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, fl6, final_p); - if (!asoc || saddr) + if (!asoc || saddr) { + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); goto out; + } bp = &asoc->base.bind_addr; scope = sctp_scope(daddr); @@ -295,6 +299,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if ((laddr->a.sa.sa_family == AF_INET6) && (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) { rcu_read_unlock(); + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); goto out; } } @@ -333,6 +339,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); break; } @@ -346,6 +354,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, dst_release(dst); dst = bdst; matchlen = bmatchlen; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); } rcu_read_unlock(); @@ -354,14 +364,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct rt6_info *rt; rt = (struct rt6_info *)dst; - t->dst = dst; t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - &fl6->saddr); + &fl->u.ip6.saddr); } else { t->dst = NULL; - pr_debug("no route\n"); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bf39f317953a..785456df7505 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -435,14 +435,15 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, { struct sctp_association *asoc = t->asoc; struct rtable *rt; - struct flowi4 *fl4 = &fl->u.ip4; + struct flowi _fl; + struct flowi4 *fl4 = &_fl.u.ip4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; - memset(fl4, 0x0, sizeof(struct flowi4)); + memset(&_fl, 0x0, sizeof(_fl)); fl4->daddr = daddr->v4.sin_addr.s_addr; fl4->fl4_dport = daddr->v4.sin_port; fl4->flowi4_proto = IPPROTO_SCTP; @@ -460,8 +461,11 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, &fl4->saddr); rt = ip_route_output_key(sock_net(sk), fl4); - if (!IS_ERR(rt)) + if (!IS_ERR(rt)) { dst = &rt->dst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); + } /* If there is no association or if a source address is passed, no * more validation is required. @@ -524,27 +528,33 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr, false); if (!odev || odev->ifindex != fl4->flowi4_oif) { - if (!dst) + if (!dst) { dst = &rt->dst; - else + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); + } else { dst_release(&rt->dst); + } continue; } dst_release(dst); dst = &rt->dst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); break; } out_unlock: rcu_read_unlock(); out: - t->dst = dst; - if (dst) + if (dst) { pr_debug("rt_dst:%pI4, rt_src:%pI4\n", - &fl4->daddr, &fl4->saddr); - else + &fl->u.ip4.daddr, &fl->u.ip4.saddr); + } else { + t->dst = NULL; pr_debug("no route\n"); + } } /* For v4, the source address is cached in the route entry(dst). So no need -- GitLab From f8c1e65e1057a3e7f35ff866492f5e6a9f67913e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 13 Mar 2020 09:41:52 +0100 Subject: [PATCH 0801/1278] drm/bochs: downgrade pci_request_region failure from error to warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8c34cd1a7f089dc03933289c5d4a4d1489549828 ] Shutdown of firmware framebuffer has a bunch of problems. Because of this the framebuffer region might still be reserved even after drm_fb_helper_remove_conflicting_pci_framebuffers() returned. Don't consider pci_request_region() failure for the framebuffer region as fatal error to workaround this issue. Reported-by: Marek Marczykowski-Górecki Signed-off-by: Gerd Hoffmann Acked-by: Sam Ravnborg Link: http://patchwork.freedesktop.org/patch/msgid/20200313084152.2734-1-kraxel@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bochs/bochs_hw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c index a39b0343c197..401c218567af 100644 --- a/drivers/gpu/drm/bochs/bochs_hw.c +++ b/drivers/gpu/drm/bochs/bochs_hw.c @@ -97,10 +97,8 @@ int bochs_hw_init(struct drm_device *dev, uint32_t flags) size = min(size, mem); } - if (pci_request_region(pdev, 0, "bochs-drm") != 0) { - DRM_ERROR("Cannot request framebuffer\n"); - return -EBUSY; - } + if (pci_request_region(pdev, 0, "bochs-drm") != 0) + DRM_WARN("Cannot request framebuffer, boot fb still active?\n"); bochs->fb_map = ioremap(addr, size); if (bochs->fb_map == NULL) { -- GitLab From 389f04117453a5fd65d541d995cc9d55a62a42be Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 16 Mar 2020 14:25:19 +0300 Subject: [PATCH 0802/1278] initramfs: restore default compression behavior [ Upstream commit 785d74ec3bbf26ac7f6e92e6e96a259aec0f107a ] Even though INITRAMFS_SOURCE kconfig option isn't set in most of defconfigs it is used (set) extensively by various build systems. Commit f26661e12765 ("initramfs: make initramfs compression choice non-optional") has changed default compression mode. Previously we compress initramfs using available compression algorithm. Now we don't use any compression at all by default. It significantly increases the image size in case of build system chooses embedded initramfs. Initially I faced with this issue while using buildroot. As of today it's not possible to set preferred compression mode in target defconfig as this option depends on INITRAMFS_SOURCE being set. Modification of all build systems either doesn't look like good option. Let's instead rewrite initramfs compression mode choices list the way that "INITRAMFS_COMPRESSION_NONE" will be the last option in the list. In that case it will be chosen only if all other options (which implements any compression) are not available. Signed-off-by: Eugeniy Paltsev Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- usr/Kconfig | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/usr/Kconfig b/usr/Kconfig index 43658b8a975e..8b4826de1189 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -131,17 +131,6 @@ choice If in doubt, select 'None' -config INITRAMFS_COMPRESSION_NONE - bool "None" - help - Do not compress the built-in initramfs at all. This may sound wasteful - in space, but, you should be aware that the built-in initramfs will be - compressed at a later stage anyways along with the rest of the kernel, - on those architectures that support this. However, not compressing the - initramfs may lead to slightly higher memory consumption during a - short time at boot, while both the cpio image and the unpacked - filesystem image will be present in memory simultaneously - config INITRAMFS_COMPRESSION_GZIP bool "Gzip" depends on RD_GZIP @@ -214,6 +203,17 @@ config INITRAMFS_COMPRESSION_LZ4 If you choose this, keep in mind that most distros don't provide lz4 by default which could cause a build failure. +config INITRAMFS_COMPRESSION_NONE + bool "None" + help + Do not compress the built-in initramfs at all. This may sound wasteful + in space, but, you should be aware that the built-in initramfs will be + compressed at a later stage anyways along with the rest of the kernel, + on those architectures that support this. However, not compressing the + initramfs may lead to slightly higher memory consumption during a + short time at boot, while both the cpio image and the unpacked + filesystem image will be present in memory simultaneously + endchoice config INITRAMFS_COMPRESSION -- GitLab From 3069eab7eeb3da3f42e04ed5422a46bbad3f8c67 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 18:33:12 -0400 Subject: [PATCH 0803/1278] tools/power turbostat: Fix gcc build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d8d005ba6afa502ca37ced5782f672c4d2fc1515 ] Warning: ‘__builtin_strncpy’ specified bound 20 equals destination size [-Wstringop-truncation] reduce param to strncpy, to guarantee that a null byte is always copied into destination buffer. Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 19e345cf8193..0692f2efc25e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4650,9 +4650,9 @@ int add_counter(unsigned int msr_num, char *path, char *name, } msrp->msr_num = msr_num; - strncpy(msrp->name, name, NAME_BYTES); + strncpy(msrp->name, name, NAME_BYTES - 1); if (path) - strncpy(msrp->path, path, PATH_BYTES); + strncpy(msrp->path, path, PATH_BYTES - 1); msrp->width = width; msrp->type = type; msrp->format = format; -- GitLab From c0eab61c136f671bccdf7530dd86cdb66e4ae7ba Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 5 Jul 2019 19:17:23 +0200 Subject: [PATCH 0804/1278] drm/etnaviv: replace MMU flush marker with flush sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4900dda90af2cb13bc1d4c12ce94b98acc8fe64e upstream. If a MMU is shared between multiple GPUs, all of them need to flush their TLBs, so a single marker that gets reset on the first flush won't do. Replace the flush marker with a sequence number, so that it's possible to check if the TLB is in sync with the current page table state for each GPU. Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Reviewed-by: Guido Günther Signed-off-by: Robert Beckett Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 10 ++++++---- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 1 + drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 8 ++++---- drivers/gpu/drm/etnaviv/etnaviv_mmu.h | 2 +- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index ed9588f36bc9..5fc1b41cb6c5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -258,6 +258,8 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, unsigned int waitlink_offset = buffer->user_size - 16; u32 return_target, return_dwords; u32 link_target, link_dwords; + unsigned int new_flush_seq = READ_ONCE(gpu->mmu->flush_seq); + bool need_flush = gpu->flush_seq != new_flush_seq; if (drm_debug & DRM_UT_DRIVER) etnaviv_buffer_dump(gpu, buffer, 0, 0x50); @@ -270,14 +272,14 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, * need to append a mmu flush load state, followed by a new * link to this buffer - a total of four additional words. */ - if (gpu->mmu->need_flush || gpu->switch_context) { + if (need_flush || gpu->switch_context) { u32 target, extra_dwords; /* link command */ extra_dwords = 1; /* flush command */ - if (gpu->mmu->need_flush) { + if (need_flush) { if (gpu->mmu->version == ETNAVIV_IOMMU_V1) extra_dwords += 1; else @@ -290,7 +292,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords); - if (gpu->mmu->need_flush) { + if (need_flush) { /* Add the MMU flush */ if (gpu->mmu->version == ETNAVIV_IOMMU_V1) { CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU, @@ -310,7 +312,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, SYNC_RECIPIENT_PE); } - gpu->mmu->need_flush = false; + gpu->flush_seq = new_flush_seq; } if (gpu->switch_context) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a1562f89c3d7..1f8c8e4328e4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1353,7 +1353,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->active_fence = submit->fence->seqno; if (gpu->lastctx != cmdbuf->ctx) { - gpu->mmu->need_flush = true; + gpu->mmu->flush_seq++; gpu->switch_context = true; gpu->lastctx = cmdbuf->ctx; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 689cb8f3680c..62b2877d090b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -138,6 +138,7 @@ struct etnaviv_gpu { struct etnaviv_iommu *mmu; struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc; + unsigned int flush_seq; /* Power Control: */ struct clk *clk_bus; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index f103e787de94..0e23a0542f0a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -132,7 +132,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu, */ if (mmu->last_iova) { mmu->last_iova = 0; - mmu->need_flush = true; + mmu->flush_seq++; continue; } @@ -246,7 +246,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, } list_add_tail(&mapping->mmu_node, &mmu->mappings); - mmu->need_flush = true; + mmu->flush_seq++; mutex_unlock(&mmu->lock); return ret; @@ -264,7 +264,7 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu, etnaviv_iommu_remove_mapping(mmu, mapping); list_del(&mapping->mmu_node); - mmu->need_flush = true; + mmu->flush_seq++; mutex_unlock(&mmu->lock); } @@ -346,7 +346,7 @@ int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr, return ret; } mmu->last_iova = vram_node->start + size; - gpu->mmu->need_flush = true; + mmu->flush_seq++; mutex_unlock(&mmu->lock); *iova = (u32)vram_node->start; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index 54be289e5981..ccb6ad3582b8 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -44,7 +44,7 @@ struct etnaviv_iommu { struct list_head mappings; struct drm_mm mm; u32 last_iova; - bool need_flush; + unsigned int flush_seq; }; struct etnaviv_gem_object; -- GitLab From 574eb136ec7f315c3ef2ca68fa9b3e16c56baa24 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Tue, 21 Aug 2018 15:15:04 +0800 Subject: [PATCH 0805/1278] blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter commit f5bbbbe4d63577026f908a809f22f5fd5a90ea1f upstream. For blk-mq, part_in_flight/rw will invoke blk_mq_in_flight/rw to account the inflight requests. It will access the queue_hw_ctx and nr_hw_queues w/o any protection. When updating nr_hw_queues and blk_mq_in_flight/rw occur concurrently, panic comes up. Before update nr_hw_queues, the q will be frozen. So we could use q_usage_counter to avoid the race. percpu_ref_is_zero is used here so that we will not miss any in-flight request. The access to nr_hw_queues and queue_hw_ctx in blk_mq_queue_tag_busy_iter are under rcu critical section, __blk_mq_update_nr_hw_queues could use synchronize_rcu to ensure the zeroed q_usage_counter to be globally visible. Signed-off-by: Jianchao Wang Reviewed-by: Ming Lei Signed-off-by: Jens Axboe Cc: Giuliano Procida Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-tag.c | 14 +++++++++++++- block/blk-mq.c | 4 ++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 3d2ab65d2dd1..4c623ba0af86 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -334,6 +334,18 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, struct blk_mq_hw_ctx *hctx; int i; + /* + * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and + * queue_hw_ctx after freeze the queue. So we could use q_usage_counter + * to avoid race with it. __blk_mq_update_nr_hw_queues will users + * synchronize_rcu to ensure all of the users go out of the critical + * section below and see zeroed q_usage_counter. + */ + rcu_read_lock(); + if (percpu_ref_is_zero(&q->q_usage_counter)) { + rcu_read_unlock(); + return; + } queue_for_each_hw_ctx(q, hctx, i) { struct blk_mq_tags *tags = hctx->tags; @@ -349,7 +361,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); } - + rcu_read_unlock(); } static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, diff --git a/block/blk-mq.c b/block/blk-mq.c index eac444804736..9d53f476c517 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2748,6 +2748,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); + /* + * Sync with blk_mq_queue_tag_busy_iter. + */ + synchronize_rcu(); } void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) -- GitLab From f46117cb82fd863d900a9d6be8b00ef71f862f04 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 25 Sep 2018 10:36:20 -0600 Subject: [PATCH 0806/1278] blk-mq: Allow blocking queue tag iter callbacks commit 530ca2c9bd6949c72c9b5cfc330cb3dbccaa3f5b upstream. A recent commit runs tag iterator callbacks under the rcu read lock, but existing callbacks do not satisfy the non-blocking requirement. The commit intended to prevent an iterator from accessing a queue that's being modified. This patch fixes the original issue by taking a queue reference instead of reading it, which allows callbacks to make blocking calls. Fixes: f5bbbbe4d6357 ("blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter") Acked-by: Jianchao Wang Signed-off-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Giuliano Procida Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-tag.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 4c623ba0af86..e4b3eeaffc82 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -336,16 +336,11 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, /* * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and - * queue_hw_ctx after freeze the queue. So we could use q_usage_counter - * to avoid race with it. __blk_mq_update_nr_hw_queues will users - * synchronize_rcu to ensure all of the users go out of the critical - * section below and see zeroed q_usage_counter. + * queue_hw_ctx after freeze the queue, so we use q_usage_counter + * to avoid race with it. */ - rcu_read_lock(); - if (percpu_ref_is_zero(&q->q_usage_counter)) { - rcu_read_unlock(); + if (!percpu_ref_tryget(&q->q_usage_counter)) return; - } queue_for_each_hw_ctx(q, hctx, i) { struct blk_mq_tags *tags = hctx->tags; @@ -361,7 +356,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); } - rcu_read_unlock(); + blk_queue_exit(q); } static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, -- GitLab From cbd8aa4c013c0352c363cc612c81c5bd628cdf68 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 17 Mar 2020 15:31:57 +0530 Subject: [PATCH 0807/1278] misc: pci_endpoint_test: Fix to support > 10 pci-endpoint-test devices commit 6b443e5c80b67a7b8a85b33d052d655ef9064e90 upstream. Adding more than 10 pci-endpoint-test devices results in "kobject_add_internal failed for pci-endpoint-test.1 with -EEXIST, don't try to register things with the same name in the same directory". This is because commit 2c156ac71c6b ("misc: Add host side PCI driver for PCI test function device") limited the length of the "name" to 20 characters. Change the length of the name to 24 in order to support upto 10000 pci-endpoint-test devices. Fixes: 2c156ac71c6b ("misc: Add host side PCI driver for PCI test function device") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 230f1e8538dc..953af49dd38a 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -466,7 +466,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, int err; int irq = 0; int id; - char name[20]; + char name[24]; enum pci_barno bar; void __iomem *base; struct device *dev = &pdev->dev; -- GitLab From 9469e982d8cecad859009e0d3471032d1a9f774c Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Tue, 24 Mar 2020 05:22:13 +0100 Subject: [PATCH 0808/1278] coresight: do not use the BIT() macro in the UAPI header commit 9b6eaaf3db5e5888df7bca7fed7752a90f7fd871 upstream. The BIT() macro definition is not available for the UAPI headers (moreover, it can be defined differently in the user space); replace its usage with the _BITUL() macro that is defined in . Fixes: 237483aa5cf4 ("coresight: stm: adding driver for CoreSight STM component") Signed-off-by: Eugene Syromiatnikov Cc: stable Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200324042213.GA10452@asgard.redhat.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/coresight-stm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/coresight-stm.h b/include/uapi/linux/coresight-stm.h index aac550a52f80..8847dbf24151 100644 --- a/include/uapi/linux/coresight-stm.h +++ b/include/uapi/linux/coresight-stm.h @@ -2,8 +2,10 @@ #ifndef __UAPI_CORESIGHT_STM_H_ #define __UAPI_CORESIGHT_STM_H_ -#define STM_FLAG_TIMESTAMPED BIT(3) -#define STM_FLAG_GUARANTEED BIT(7) +#include + +#define STM_FLAG_TIMESTAMPED _BITUL(3) +#define STM_FLAG_GUARANTEED _BITUL(7) /* * The CoreSight STM supports guaranteed and invariant timing -- GitLab From 2d8260be1c2cc83a0e43b0304db490421a531d9a Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:11 -0500 Subject: [PATCH 0809/1278] padata: always acquire cpu_hotplug_lock before pinst->lock commit 38228e8848cd7dd86ccb90406af32de0cad24be3 upstream. lockdep complains when padata's paths to update cpumasks via CPU hotplug and sysfs are both taken: # echo 0 > /sys/devices/system/cpu/cpu1/online # echo ff > /sys/kernel/pcrypt/pencrypt/parallel_cpumask ====================================================== WARNING: possible circular locking dependency detected 5.4.0-rc8-padata-cpuhp-v3+ #1 Not tainted ------------------------------------------------------ bash/205 is trying to acquire lock: ffffffff8286bcd0 (cpu_hotplug_lock.rw_sem){++++}, at: padata_set_cpumask+0x2b/0x120 but task is already holding lock: ffff8880001abfa0 (&pinst->lock){+.+.}, at: padata_set_cpumask+0x26/0x120 which lock already depends on the new lock. padata doesn't take cpu_hotplug_lock and pinst->lock in a consistent order. Which should be first? CPU hotplug calls into padata with cpu_hotplug_lock already held, so it should have priority. Fixes: 6751fb3c0e0c ("padata: Use get_online_cpus/put_online_cpus") Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 528a251217df..a71620d2b8ba 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -605,8 +605,8 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, struct cpumask *serial_mask, *parallel_mask; int err = -EINVAL; - mutex_lock(&pinst->lock); get_online_cpus(); + mutex_lock(&pinst->lock); switch (cpumask_type) { case PADATA_CPU_PARALLEL: @@ -624,8 +624,8 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask); out: - put_online_cpus(); mutex_unlock(&pinst->lock); + put_online_cpus(); return err; } -- GitLab From c51609ac4c48d31a8881af669f1192d2ee405928 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 1 Apr 2020 21:10:58 -0700 Subject: [PATCH 0810/1278] mm: mempolicy: require at least one nodeid for MPOL_PREFERRED commit aa9f7d5172fac9bf1f09e678c35e287a40a7b7dd upstream. Using an empty (malformed) nodelist that is not caught during mount option parsing leads to a stack-out-of-bounds access. The option string that was used was: "mpol=prefer:,". However, MPOL_PREFERRED requires a single node number, which is not being provided here. Add a check that 'nodes' is not empty after parsing for MPOL_PREFERRED's nodeid. Fixes: 095f1fc4ebf3 ("mempolicy: rework shmem mpol parsing and display") Reported-by: Entropy Moe <3ntr0py1337@gmail.com> Reported-by: syzbot+b055b1a6b2b958707a21@syzkaller.appspotmail.com Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Tested-by: syzbot+b055b1a6b2b958707a21@syzkaller.appspotmail.com Cc: Lee Schermerhorn Link: http://lkml.kernel.org/r/89526377-7eb6-b662-e1d8-4430928abde9@infradead.org Signed-off-by: Linus Torvalds Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1b34f2e35951..d857e4770cc8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2748,7 +2748,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) switch (mode) { case MPOL_PREFERRED: /* - * Insist on a nodelist of one node only + * Insist on a nodelist of one node only, although later + * we use first_node(nodes) to grab a single node, so here + * nodelist (or nodes) cannot be empty. */ if (nodelist) { char *rest = nodelist; @@ -2756,6 +2758,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) rest++; if (*rest) goto out; + if (nodes_empty(nodes)) + goto out; } break; case MPOL_INTERLEAVE: -- GitLab From 75757197572160c0ca878ac77016b70cdc6f2086 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 30 Mar 2020 11:22:19 -0400 Subject: [PATCH 0811/1278] ipv6: don't auto-add link-local address to lag ports [ Upstream commit 744fdc8233f6aa9582ce08a51ca06e59796a3196 ] Bonding slave and team port devices should not have link-local addresses automatically added to them, as it can interfere with openvswitch being able to properly add tc ingress. Basic reproducer, courtesy of Marcelo: $ ip link add name bond0 type bond $ ip link set dev ens2f0np0 master bond0 $ ip link set dev ens2f1np2 master bond0 $ ip link set dev bond0 up $ ip a s 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: ens2f0np0: mtu 1500 qdisc mq master bond0 state UP group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff 5: ens2f1np2: mtu 1500 qdisc mq master bond0 state DOWN group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff 11: bond0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff inet6 fe80::20f:53ff:fe2f:ea40/64 scope link valid_lft forever preferred_lft forever (above trimmed to relevant entries, obviously) $ sysctl net.ipv6.conf.ens2f0np0.addr_gen_mode=0 net.ipv6.conf.ens2f0np0.addr_gen_mode = 0 $ sysctl net.ipv6.conf.ens2f1np2.addr_gen_mode=0 net.ipv6.conf.ens2f1np2.addr_gen_mode = 0 $ ip a l ens2f0np0 2: ens2f0np0: mtu 1500 qdisc mq master bond0 state UP group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff inet6 fe80::20f:53ff:fe2f:ea40/64 scope link tentative valid_lft forever preferred_lft forever $ ip a l ens2f1np2 5: ens2f1np2: mtu 1500 qdisc mq master bond0 state DOWN group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff inet6 fe80::20f:53ff:fe2f:ea40/64 scope link tentative valid_lft forever preferred_lft forever Looks like addrconf_sysctl_addr_gen_mode() bypasses the original "is this a slave interface?" check added by commit c2edacf80e15, and results in an address getting added, while w/the proposed patch added, no address gets added. This simply adds the same gating check to another code path, and thus should prevent the same devices from erroneously obtaining an ipv6 link-local address. Fixes: d35a00b8e33d ("net/ipv6: allow sysctl to change link-local address generation mode") Reported-by: Moshe Levi CC: Stephen Hemminger CC: Marcelo Ricardo Leitner CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 092e72d6a61d..e58fdefcd2ca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3175,6 +3175,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (netif_is_l3_master(idev->dev)) return; + /* no link local addresses on devices flagged as slaves */ + if (idev->dev->flags & IFF_SLAVE) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); switch (idev->cnf.addr_gen_mode) { -- GitLab From 697c52f593d55c969ae80cf741ff08364087d410 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Apr 2020 13:00:30 -0700 Subject: [PATCH 0812/1278] net: dsa: bcm_sf2: Ensure correct sub-node is parsed [ Upstream commit afa3b592953bfaecfb4f2f335ec5f935cff56804 ] When the bcm_sf2 was converted into a proper platform device driver and used the new dsa_register_switch() interface, we would still be parsing the legacy DSA node that contained all the port information since the platform firmware has intentionally maintained backward and forward compatibility to client programs. Ensure that we do parse the correct node, which is "ports" per the revised DSA binding. Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 6bca42e34a53..b40ebc27e1ec 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1112,6 +1112,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) const struct bcm_sf2_of_data *data; struct b53_platform_data *pdata; struct dsa_switch_ops *ops; + struct device_node *ports; struct bcm_sf2_priv *priv; struct b53_device *dev; struct dsa_switch *ds; @@ -1174,7 +1175,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) */ set_bit(0, priv->cfp.used); - bcm_sf2_identify_ports(priv, dn->child); + ports = of_find_node_by_name(dn, "ports"); + if (ports) { + bcm_sf2_identify_ports(priv, ports); + of_node_put(ports); + } priv->irq0 = irq_of_parse_and_map(dn, 0); priv->irq1 = irq_of_parse_and_map(dn, 1); -- GitLab From 575698a583abe5778506ec13ccbca17fdf3a6a65 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 3 Apr 2020 09:53:25 +0200 Subject: [PATCH 0813/1278] net: phy: micrel: kszphy_resume(): add delay after genphy_resume() before accessing PHY registers [ Upstream commit 6110dff776f7fa65c35850ef65b41d3b39e2fac2 ] After the power-down bit is cleared, the chip internally triggers a global reset. According to the KSZ9031 documentation, we have to wait at least 1ms for the reset to finish. If the chip is accessed during reset, read will return 0xffff, while write will be ignored. Depending on the system performance and MDIO bus speed, we may or may not run in to this issue. This bug was discovered on an iMX6QP system with KSZ9031 PHY and attached PHY interrupt line. If IRQ was used, the link status update was lost. In polling mode, the link status update was always correct. The investigation showed, that during a read-modify-write access, the read returned 0xffff (while the chip was still in reset) and corresponding write hit the chip _after_ reset and triggered (due to the 0xffff) another reset in an undocumented bit (register 0x1f, bit 1), resulting in the next write being lost due to the new reset cycle. This patch fixes the issue by adding a 1...2 ms sleep after the genphy_resume(). Fixes: 836384d2501d ("net: phy: micrel: Add specific suspend") Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index eb85cf4a381a..5be7fc354e33 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -29,6 +29,7 @@ #include #include #include +#include /* Operation Mode Strap Override */ #define MII_KSZPHY_OMSO 0x16 @@ -727,6 +728,12 @@ static int kszphy_resume(struct phy_device *phydev) genphy_resume(phydev); + /* After switching from power-down to normal mode, an internal global + * reset is automatically generated. Wait a minimum of 1 ms before + * read/write access to the PHY registers. + */ + usleep_range(1000, 2000); + ret = kszphy_config_reset(phydev); if (ret) return ret; -- GitLab From ca66bbed742de34889e4fbdc273f7fcbc855dc2a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 3 Apr 2020 10:23:29 +0800 Subject: [PATCH 0814/1278] net: stmmac: dwmac1000: fix out-of-bounds mac address reg setting [ Upstream commit 3e1221acf6a8f8595b5ce354bab4327a69d54d18 ] Commit 9463c4455900 ("net: stmmac: dwmac1000: Clear unused address entries") cleared the unused mac address entries, but introduced an out-of bounds mac address register programming bug -- After setting the secondary unicast mac addresses, the "reg" value has reached netdev_uc_count() + 1, thus we should only clear address entries if (addr < perfect_addr_number) Fixes: 9463c4455900 ("net: stmmac: dwmac1000: Clear unused address entries") Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 08dd6a06ac58..f76d4a7281af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -218,7 +218,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, reg++; } - while (reg <= perfect_addr_number) { + while (reg < perfect_addr_number) { writel(0, ioaddr + GMAC_ADDR_HIGH(reg)); writel(0, ioaddr + GMAC_ADDR_LOW(reg)); reg++; -- GitLab From d0a7c3373404bd931565f361802d320462fbe9f9 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Wed, 1 Apr 2020 12:06:39 +0200 Subject: [PATCH 0815/1278] slcan: Don't transmit uninitialized stack data in padding [ Upstream commit b9258a2cece4ec1f020715fe3554bc2e360f6264 ] struct can_frame contains some padding which is not explicitly zeroed in slc_bump. This uninitialized data will then be transmitted if the stack initialization hardening feature is not enabled (CONFIG_INIT_STACK_ALL). This commit just zeroes the whole struct including the padding. Signed-off-by: Richard Palethorpe Fixes: a1044e36e457 ("can: add slcan driver for serial/USB-serial CAN adapters") Reviewed-by: Kees Cook Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: security@kernel.org Cc: wg@grandegger.com Cc: mkl@pengutronix.de Cc: davem@davemloft.net Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/slcan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 7c37b96ff22a..c5a616395c49 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -147,7 +147,7 @@ static void slc_bump(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; - cf.can_id = 0; + memset(&cf, 0, sizeof(cf)); switch (*cmd) { case 'r': @@ -186,8 +186,6 @@ static void slc_bump(struct slcan *sl) else return; - *(u64 *) (&cf.data) = 0; /* clear payload */ - /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf.can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf.can_dlc; i++) { -- GitLab From 982a76b0f0e1d0aa10af57bc9bcf0e578feea1a8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 5 Apr 2020 09:50:22 +0300 Subject: [PATCH 0816/1278] mlxsw: spectrum_flower: Do not stop at FLOW_ACTION_VLAN_MANGLE [ Upstream commit ccfc569347f870830e7c7cf854679a06cf9c45b5 ] The handler for FLOW_ACTION_VLAN_MANGLE ends by returning whatever the lower-level function that it calls returns. If there are more actions lined up after this action, those are never offloaded. Fix by only bailing out when the called function returns an error. Fixes: a150201a70da ("mlxsw: spectrum: Add support for vlan modify TC action") Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8aace9a06a5d..ea4704726505 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -112,9 +112,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, u8 prio = tcf_vlan_push_prio(a); u16 vid = tcf_vlan_push_vid(a); - return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, - action, vid, - proto, prio); + err = mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + action, vid, + proto, prio); + if (err) + return err; } else { dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); return -EOPNOTSUPP; -- GitLab From 1335dd6942feb749e3ccbe1a4493458229f77a6c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Feb 2020 21:10:37 +0100 Subject: [PATCH 0817/1278] random: always use batched entropy for get_random_u{32,64} commit 69efea712f5b0489e67d07565aad5c94e09a3e52 upstream. It turns out that RDRAND is pretty slow. Comparing these two constructions: for (i = 0; i < CHACHA_BLOCK_SIZE; i += sizeof(ret)) arch_get_random_long(&ret); and long buf[CHACHA_BLOCK_SIZE / sizeof(long)]; extract_crng((u8 *)buf); it amortizes out to 352 cycles per long for the top one and 107 cycles per long for the bottom one, on Coffee Lake Refresh, Intel Core i9-9880H. And importantly, the top one has the drawback of not benefiting from the real rng, whereas the bottom one has all the nice benefits of using our own chacha rng. As get_random_u{32,64} gets used in more places (perhaps beyond what it was originally intended for when it was introduced as get_random_{int,long} back in the md5 monstrosity era), it seems like it might be a good thing to strengthen its posture a tiny bit. Doing this should only be stronger and not any weaker because that pool is already initialized with a bunch of rdrand data (when available). This way, we get the benefits of the hardware rng as well as our own rng. Another benefit of this is that we no longer hit pitfalls of the recent stream of AMD bugs in RDRAND. One often used code pattern for various things is: do { val = get_random_u32(); } while (hash_table_contains_key(val)); That recent AMD bug rendered that pattern useless, whereas we're really very certain that chacha20 output will give pretty distributed numbers, no matter what. So, this simplification seems better both from a security perspective and from a performance perspective. Signed-off-by: Jason A. Donenfeld Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200221201037.30231-1-Jason@zx2c4.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e6efa07e9f9e..868d2620b7ac 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2193,11 +2193,11 @@ struct batched_entropy { /* * Get a random word for internal kernel use only. The quality of the random - * number is either as good as RDRAND or as good as /dev/urandom, with the - * goal of being quite fast and not depleting entropy. In order to ensure + * number is good as /dev/urandom, but there is no backtrack protection, with + * the goal of being quite fast and not depleting entropy. In order to ensure * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. + * wait_for_random_bytes() should be called and return 0 at least once at any + * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), @@ -2210,15 +2210,6 @@ u64 get_random_u64(void) struct batched_entropy *batch; static void *previous; -#if BITS_PER_LONG == 64 - if (arch_get_random_long((unsigned long *)&ret)) - return ret; -#else - if (arch_get_random_long((unsigned long *)&ret) && - arch_get_random_long((unsigned long *)&ret + 1)) - return ret; -#endif - warn_unseeded_randomness(&previous); batch = raw_cpu_ptr(&batched_entropy_u64); @@ -2243,9 +2234,6 @@ u32 get_random_u32(void) struct batched_entropy *batch; static void *previous; - if (arch_get_random_int(&ret)) - return ret; - warn_unseeded_randomness(&previous); batch = raw_cpu_ptr(&batched_entropy_u32); -- GitLab From 316b572b3dece07eb277a9523dac241f160c5ea9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 1 Apr 2020 21:02:25 -0700 Subject: [PATCH 0818/1278] tools/accounting/getdelays.c: fix netlink attribute length commit 4054ab64e29bb05b3dfe758fff3c38a74ba753bb upstream. A recent change to the netlink code: 6e237d099fac ("netlink: Relax attr validation for fixed length types") logs a warning when programs send messages with invalid attributes (e.g., wrong length for a u32). Yafang reported this error message for tools/accounting/getdelays.c. send_cmd() is wrongly adding 1 to the attribute length. As noted in include/uapi/linux/netlink.h nla_len should be NLA_HDRLEN + payload length, so drop the +1. Fixes: 9e06d3f9f6b1 ("per task delay accounting taskstats interface: documentation fix") Reported-by: Yafang Shao Signed-off-by: David Ahern Signed-off-by: Andrew Morton Tested-by: Yafang Shao Cc: Johannes Berg Cc: Shailabh Nagar Cc: Link: http://lkml.kernel.org/r/20200327173111.63922-1-dsahern@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- tools/accounting/getdelays.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 9f420d98b5fb..6bf6a204341e 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -136,7 +136,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, msg.g.version = 0x1; na = (struct nlattr *) GENLMSG_DATA(&msg); na->nla_type = nla_type; - na->nla_len = nla_len + 1 + NLA_HDRLEN; + na->nla_len = nla_len + NLA_HDRLEN; memcpy(NLA_DATA(na), nla_data, nla_len); msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); -- GitLab From bbe3eda9703b0a9607add7648263bd68cd16bcfa Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:20 +0100 Subject: [PATCH 0819/1278] hwrng: imx-rngc - fix an error path commit 47a1f8e8b3637ff5f7806587883d7d94068d9ee8 upstream. Make sure that the rngc interrupt is masked if the rngc self test fails. Self test failure means that probe fails as well. Interrupts should be masked in this case, regardless of the error. Cc: stable@vger.kernel.org Fixes: 1d5449445bd0 ("hwrng: mx-rngc - add a driver for Freescale RNGC") Reviewed-by: PrasannaKumar Muralidharan Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/imx-rngc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 88db42d30760..48194d1a6076 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -110,8 +110,10 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) return -ETIMEDOUT; } - if (rngc->err_reg != 0) + if (rngc->err_reg != 0) { + imx_rngc_irq_mask_clear(rngc); return -EIO; + } return 0; } -- GitLab From 3c97c943d3b784058cbd17ed49a1f863e925bfee Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 6 Mar 2020 23:29:27 +0100 Subject: [PATCH 0820/1278] ASoC: jz4740-i2s: Fix divider written at incorrect offset in register commit 9401d5aa328e64617d87abd59af1c91cace4c3e4 upstream. The 4-bit divider value was written at offset 8, while the jz4740 programming manual locates it at offset 0. Fixes: 26b0aad80a86 ("ASoC: jz4740: Add dynamic sampling rate support to jz4740-i2s") Signed-off-by: Paul Cercueil Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200306222931.39664-2-paul@crapouillou.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/jz4740/jz4740-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c index 99394c036998..e099c0505b76 100644 --- a/sound/soc/jz4740/jz4740-i2s.c +++ b/sound/soc/jz4740/jz4740-i2s.c @@ -92,7 +92,7 @@ #define JZ_AIC_I2S_STATUS_BUSY BIT(2) #define JZ_AIC_CLK_DIV_MASK 0xf -#define I2SDIV_DV_SHIFT 8 +#define I2SDIV_DV_SHIFT 0 #define I2SDIV_DV_MASK (0xf << I2SDIV_DV_SHIFT) #define I2SDIV_IDV_SHIFT 8 #define I2SDIV_IDV_MASK (0xf << I2SDIV_IDV_SHIFT) -- GitLab From 596bd9d7a516dd6adefd6c326bcbb94cc2d5e64c Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 26 Mar 2020 12:38:14 -0400 Subject: [PATCH 0821/1278] IB/hfi1: Call kobject_put() when kobject_init_and_add() fails commit dfb5394f804ed4fcea1fc925be275a38d66712ab upstream. When kobject_init_and_add() returns an error in the function hfi1_create_port_files(), the function kobject_put() is not called for the corresponding kobject, which potentially leads to memory leak. This patch fixes the issue by calling kobject_put() even if kobject_init_and_add() fails. Cc: Link: https://lore.kernel.org/r/20200326163813.21129.44280.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sysfs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 25e867393463..783a8b3be735 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -670,7 +670,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sc2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + /* + * Based on the documentation for kobject_init_and_add(), the + * caller should call kobject_put even if this call fails. + */ + goto bail_sc2vl; } kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD); @@ -680,7 +684,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sl2sc sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sc2vl; + goto bail_sl2sc; } kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD); @@ -690,7 +694,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping vl2mtu sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl2sc; + goto bail_vl2mtu; } kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD); @@ -700,7 +704,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_vl2mtu; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -738,7 +742,6 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, kobject_put(&ppd->sl2sc_kobj); bail_sc2vl: kobject_put(&ppd->sc2vl_kobj); -bail: return ret; } -- GitLab From 88c27291112c2bd765f3a12a83a8c64e6315fc0f Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 26 Mar 2020 12:38:07 -0400 Subject: [PATCH 0822/1278] IB/hfi1: Fix memory leaks in sysfs registration and unregistration commit 5c15abc4328ad696fa61e2f3604918ed0c207755 upstream. When the hfi1 driver is unloaded, kmemleak will report the following issue: unreferenced object 0xffff8888461a4c08 (size 8): comm "kworker/0:0", pid 5, jiffies 4298601264 (age 2047.134s) hex dump (first 8 bytes): 73 64 6d 61 30 00 ff ff sdma0... backtrace: [<00000000311a6ef5>] kvasprintf+0x62/0xd0 [<00000000ade94d9f>] kobject_set_name_vargs+0x1c/0x90 [<0000000060657dbb>] kobject_init_and_add+0x5d/0xb0 [<00000000346fe72b>] 0xffffffffa0c5ecba [<000000006cfc5819>] 0xffffffffa0c866b9 [<0000000031c65580>] 0xffffffffa0c38e87 [<00000000e9739b3f>] local_pci_probe+0x41/0x80 [<000000006c69911d>] work_for_cpu_fn+0x16/0x20 [<00000000601267b5>] process_one_work+0x171/0x380 [<0000000049a0eefa>] worker_thread+0x1d1/0x3f0 [<00000000909cf2b9>] kthread+0xf8/0x130 [<0000000058f5f874>] ret_from_fork+0x35/0x40 This patch fixes the issue by: - Releasing dd->per_sdma[i].kobject in hfi1_unregister_sysfs(). - This will fix the memory leak. - Calling kobject_put() to unwind operations only for those entries in dd->per_sdma[] whose operations have succeeded (including the current one that has just failed) in hfi1_verbs_register_sysfs(). Cc: Fixes: 0cb2aa690c7e ("IB/hfi1: Add sysfs interface for affinity setup") Link: https://lore.kernel.org/r/20200326163807.21129.27371.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sysfs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 783a8b3be735..e3e8d65646e3 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -861,8 +861,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) device_remove_file(&dev->dev, hfi1_attributes[i]); - for (i = 0; i < dd->num_sdma; i++) - kobject_del(&dd->per_sdma[i].kobj); + /* + * The function kobject_put() will call kobject_del() if the kobject + * has been added successfully. The sysfs files created under the + * kobject directory will also be removed during the process. + */ + for (; i >= 0; i--) + kobject_put(&dd->per_sdma[i].kobj); return ret; } @@ -875,6 +880,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; + /* Unwind operations in hfi1_verbs_register_sysfs() */ + for (i = 0; i < dd->num_sdma; i++) + kobject_put(&dd->per_sdma[i].kobj); + for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; -- GitLab From 9218bca65e3b6d946c08e8a02b860d52143dcb7c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 20 Dec 2019 09:34:04 -0500 Subject: [PATCH 0823/1278] ceph: remove the extra slashes in the server path commit 4fbc0c711b2464ee1551850b85002faae0b775d5 upstream. It's possible to pass the mount helper a server path that has more than one contiguous slash character. For example: $ mount -t ceph 192.168.195.165:40176:/// /mnt/cephfs/ In the MDS server side the extra slashes of the server path will be treated as snap dir, and then we can get the following debug logs: ceph: mount opening path // ceph: open_root_inode opening '//' ceph: fill_trace 0000000059b8a3bc is_dentry 0 is_target 1 ceph: alloc_inode 00000000dc4ca00b ceph: get_inode created new inode 00000000dc4ca00b 1.ffffffffffffffff ino 1 ceph: get_inode on 1=1.ffffffffffffffff got 00000000dc4ca00b And then when creating any new file or directory under the mount point, we can hit the following BUG_ON in ceph_fill_trace(): BUG_ON(ceph_snap(dir) != dvino.snap); Have the client ignore the extra slashes in the server path when mounting. This will also canonicalize the path, so that identical mounts can be consilidated. 1) "//mydir1///mydir//" 2) "/mydir1/mydir" 3) "/mydir1/mydir/" Regardless of the internal treatment of these paths, the kernel still stores the original string including the leading '/' for presentation to userland. URL: https://tracker.ceph.com/issues/42771 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 120 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 19 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 6b10b20bfe32..187fc47d7fbe 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -92,7 +92,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); @@ -374,6 +373,73 @@ static int strcmp_null(const char *s1, const char *s2) return strcmp(s1, s2); } +/** + * path_remove_extra_slash - Remove the extra slashes in the server path + * @server_path: the server path and could be NULL + * + * Return NULL if the path is NULL or only consists of "/", or a string + * without any extra slashes including the leading slash(es) and the + * slash(es) at the end of the server path, such as: + * "//dir1////dir2///" --> "dir1/dir2" + */ +static char *path_remove_extra_slash(const char *server_path) +{ + const char *path = server_path; + const char *cur, *end; + char *buf, *p; + int len; + + /* if the server path is omitted */ + if (!path) + return NULL; + + /* remove all the leading slashes */ + while (*path == '/') + path++; + + /* if the server path only consists of slashes */ + if (*path == '\0') + return NULL; + + len = strlen(path); + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + end = path + len; + p = buf; + do { + cur = strchr(path, '/'); + if (!cur) + cur = end; + + len = cur - path; + + /* including one '/' */ + if (cur != end) + len += 1; + + memcpy(p, path, len); + p += len; + + while (cur <= end && *cur == '/') + cur++; + path = cur; + } while (path < end); + + *p = '\0'; + + /* + * remove the last slash if there has and just to make sure that + * we will get something like "dir1/dir2" + */ + if (*(--p) == '/') + *p = '\0'; + + return buf; +} + static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_options *new_opt, struct ceph_fs_client *fsc) @@ -381,6 +447,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_mount_options *fsopt1 = new_fsopt; struct ceph_mount_options *fsopt2 = fsc->mount_options; int ofs = offsetof(struct ceph_mount_options, snapdir_name); + char *p1, *p2; int ret; ret = memcmp(fsopt1, fsopt2, ofs); @@ -393,9 +460,21 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; - ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); + + p1 = path_remove_extra_slash(fsopt1->server_path); + if (IS_ERR(p1)) + return PTR_ERR(p1); + p2 = path_remove_extra_slash(fsopt2->server_path); + if (IS_ERR(p2)) { + kfree(p1); + return PTR_ERR(p2); + } + ret = strcmp_null(p1, p2); + kfree(p1); + kfree(p2); if (ret) return ret; + ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); if (ret) return ret; @@ -451,12 +530,14 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, */ dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { - if (strlen(dev_name_end) > 1) { - fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); - if (!fsopt->server_path) { - err = -ENOMEM; - goto out; - } + /* + * The server_path will include the whole chars from userland + * including the leading '/'. + */ + fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); + if (!fsopt->server_path) { + err = -ENOMEM; + goto out; } } else { dev_name_end = dev_name + strlen(dev_name); @@ -760,7 +841,6 @@ static void destroy_caches(void) ceph_fscache_unregister(); } - /* * ceph_umount_begin - initiate forced umount. Tear down down the * mount, skipping steps that may hang while waiting for server(s). @@ -845,9 +925,6 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, return root; } - - - /* * mount: join the ceph cluster, and open root directory. */ @@ -861,7 +938,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path; + const char *path, *p; err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; @@ -873,19 +950,24 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) goto out; } - if (!fsc->mount_options->server_path) { - path = ""; - dout("mount opening path \\t\n"); - } else { - path = fsc->mount_options->server_path + 1; - dout("mount opening path %s\n", path); + p = path_remove_extra_slash(fsc->mount_options->server_path); + if (IS_ERR(p)) { + err = PTR_ERR(p); + goto out; } + /* if the server path is omitted or just consists of '/' */ + if (!p) + path = ""; + else + path = p; + dout("mount opening path '%s'\n", path); err = ceph_fs_debugfs_init(fsc); if (err < 0) goto out; root = open_root_dentry(fsc, path, started); + kfree(p); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; -- GitLab From 4452a6c44d8836bca9452aaad1a05c21aa840949 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 10 Feb 2020 22:51:08 +0100 Subject: [PATCH 0824/1278] ceph: canonicalize server path in place commit b27a939e8376a3f1ed09b9c33ef44d20f18ec3d0 upstream. syzbot reported that 4fbc0c711b24 ("ceph: remove the extra slashes in the server path") had caused a regression where an allocation could be done under a spinlock -- compare_mount_options() is called by sget_fc() with sb_lock held. We don't really need the supplied server path, so canonicalize it in place and compare it directly. To make this work, the leading slash is kept around and the logic in ceph_real_mount() to skip it is restored. CEPH_MSG_CLIENT_SESSION now reports the same (i.e. canonicalized) path, with the leading slash of course. Fixes: 4fbc0c711b24 ("ceph: remove the extra slashes in the server path") Reported-by: syzbot+98704a51af8e3d9425a9@syzkaller.appspotmail.com Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Signed-off-by: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 118 +++++++++++------------------------------------- fs/ceph/super.h | 2 +- 2 files changed, 28 insertions(+), 92 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 187fc47d7fbe..caa6780d8ff2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -188,6 +188,26 @@ static match_table_t fsopt_tokens = { {-1, NULL} }; +/* + * Remove adjacent slashes and then the trailing slash, unless it is + * the only remaining character. + * + * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". + */ +static void canonicalize_path(char *path) +{ + int i, j = 0; + + for (i = 0; path[i] != '\0'; i++) { + if (path[i] != '/' || j < 1 || path[j - 1] != '/') + path[j++] = path[i]; + } + + if (j > 1 && path[j - 1] == '/') + j--; + path[j] = '\0'; +} + static int parse_fsopt_token(char *c, void *private) { struct ceph_mount_options *fsopt = private; @@ -373,73 +393,6 @@ static int strcmp_null(const char *s1, const char *s2) return strcmp(s1, s2); } -/** - * path_remove_extra_slash - Remove the extra slashes in the server path - * @server_path: the server path and could be NULL - * - * Return NULL if the path is NULL or only consists of "/", or a string - * without any extra slashes including the leading slash(es) and the - * slash(es) at the end of the server path, such as: - * "//dir1////dir2///" --> "dir1/dir2" - */ -static char *path_remove_extra_slash(const char *server_path) -{ - const char *path = server_path; - const char *cur, *end; - char *buf, *p; - int len; - - /* if the server path is omitted */ - if (!path) - return NULL; - - /* remove all the leading slashes */ - while (*path == '/') - path++; - - /* if the server path only consists of slashes */ - if (*path == '\0') - return NULL; - - len = strlen(path); - - buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) - return ERR_PTR(-ENOMEM); - - end = path + len; - p = buf; - do { - cur = strchr(path, '/'); - if (!cur) - cur = end; - - len = cur - path; - - /* including one '/' */ - if (cur != end) - len += 1; - - memcpy(p, path, len); - p += len; - - while (cur <= end && *cur == '/') - cur++; - path = cur; - } while (path < end); - - *p = '\0'; - - /* - * remove the last slash if there has and just to make sure that - * we will get something like "dir1/dir2" - */ - if (*(--p) == '/') - *p = '\0'; - - return buf; -} - static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_options *new_opt, struct ceph_fs_client *fsc) @@ -447,7 +400,6 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_mount_options *fsopt1 = new_fsopt; struct ceph_mount_options *fsopt2 = fsc->mount_options; int ofs = offsetof(struct ceph_mount_options, snapdir_name); - char *p1, *p2; int ret; ret = memcmp(fsopt1, fsopt2, ofs); @@ -457,21 +409,12 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); if (ret) return ret; + ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; - p1 = path_remove_extra_slash(fsopt1->server_path); - if (IS_ERR(p1)) - return PTR_ERR(p1); - p2 = path_remove_extra_slash(fsopt2->server_path); - if (IS_ERR(p2)) { - kfree(p1); - return PTR_ERR(p2); - } - ret = strcmp_null(p1, p2); - kfree(p1); - kfree(p2); + ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); if (ret) return ret; @@ -539,6 +482,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, err = -ENOMEM; goto out; } + + canonicalize_path(fsopt->server_path); } else { dev_name_end = dev_name + strlen(dev_name); } @@ -938,7 +883,9 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path, *p; + const char *path = fsc->mount_options->server_path ? + fsc->mount_options->server_path + 1 : ""; + err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; @@ -950,16 +897,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) goto out; } - p = path_remove_extra_slash(fsc->mount_options->server_path); - if (IS_ERR(p)) { - err = PTR_ERR(p); - goto out; - } - /* if the server path is omitted or just consists of '/' */ - if (!p) - path = ""; - else - path = p; dout("mount opening path '%s'\n", path); err = ceph_fs_debugfs_init(fsc); @@ -967,7 +904,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) goto out; root = open_root_dentry(fsc, path, started); - kfree(p); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 46f600107cb5..dd5257dee6cb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -85,7 +85,7 @@ struct ceph_mount_options { char *snapdir_name; /* default ".snap" */ char *mds_namespace; /* default NULL */ - char *server_path; /* default "/" */ + char *server_path; /* default NULL (means "/") */ char *fscache_uniq; /* default NULL */ }; -- GitLab From 0da9c032adbb6f83cc7f548c5155d26c64f44d63 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sun, 8 Mar 2020 17:45:27 +0800 Subject: [PATCH 0825/1278] Bluetooth: RFCOMM: fix ODEBUG bug in rfcomm_dev_ioctl commit 71811cac8532b2387b3414f7cd8fe9e497482864 upstream. Needn't call 'rfcomm_dlc_put' here, because 'rfcomm_dlc_exists' didn't increase dlc->refcnt. Reported-by: syzbot+4496e82090657320efc6@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Suggested-by: Hillf Danton Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 5f3074cb6b4d..b6f26ec9e90c 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg) dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel); if (IS_ERR(dlc)) return PTR_ERR(dlc); - else if (dlc) { - rfcomm_dlc_put(dlc); + if (dlc) return -EBUSY; - } dlc = rfcomm_dlc_alloc(GFP_KERNEL); if (!dlc) return -ENOMEM; -- GitLab From b51bea602a48a18c980da3e7e28f917ab3b464d6 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Wed, 18 Mar 2020 12:17:41 +0200 Subject: [PATCH 0826/1278] RDMA/cm: Update num_paths in cma_resolve_iboe_route error flow commit 987914ab841e2ec281a35b54348ab109b4c0bb4e upstream. After a successful allocation of path_rec, num_paths is set to 1, but any error after such allocation will leave num_paths uncleared. This causes to de-referencing a NULL pointer later on. Hence, num_paths needs to be set back to 0 if such an error occurs. The following crash from syzkaller revealed it. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI CPU: 0 PID: 357 Comm: syz-executor060 Not tainted 4.18.0+ #311 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:ib_copy_path_rec_to_user+0x94/0x3e0 Code: f1 f1 f1 f1 c7 40 0c 00 00 f4 f4 65 48 8b 04 25 28 00 00 00 48 89 45 c8 31 c0 e8 d7 60 24 ff 48 8d 7b 4c 48 89 f8 48 c1 e8 03 <42> 0f b6 14 30 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RSP: 0018:ffff88006586f980 EFLAGS: 00010207 RAX: 0000000000000009 RBX: 0000000000000000 RCX: 1ffff1000d5fe475 RDX: ffff8800621e17c0 RSI: ffffffff820d45f9 RDI: 000000000000004c RBP: ffff88006586fa50 R08: ffffed000cb0df73 R09: ffffed000cb0df72 R10: ffff88006586fa70 R11: ffffed000cb0df73 R12: 1ffff1000cb0df30 R13: ffff88006586fae8 R14: dffffc0000000000 R15: ffff88006aff2200 FS: 00000000016fc880(0000) GS:ffff88006d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000040 CR3: 0000000063fec000 CR4: 00000000000006b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ib_copy_path_rec_from_user+0xcc0/0xcc0 ? __mutex_unlock_slowpath+0xfc/0x670 ? wait_for_completion+0x3b0/0x3b0 ? ucma_query_route+0x818/0xc60 ucma_query_route+0x818/0xc60 ? ucma_listen+0x1b0/0x1b0 ? sched_clock_cpu+0x18/0x1d0 ? sched_clock_cpu+0x18/0x1d0 ? ucma_listen+0x1b0/0x1b0 ? ucma_write+0x292/0x460 ucma_write+0x292/0x460 ? ucma_close_id+0x60/0x60 ? sched_clock_cpu+0x18/0x1d0 ? sched_clock_cpu+0x18/0x1d0 __vfs_write+0xf7/0x620 ? ucma_close_id+0x60/0x60 ? kernel_read+0x110/0x110 ? time_hardirqs_on+0x19/0x580 ? lock_acquire+0x18b/0x3a0 ? finish_task_switch+0xf3/0x5d0 ? _raw_spin_unlock_irq+0x29/0x40 ? _raw_spin_unlock_irq+0x29/0x40 ? finish_task_switch+0x1be/0x5d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? security_file_permission+0x172/0x1e0 vfs_write+0x192/0x460 ksys_write+0xc6/0x1a0 ? __ia32_sys_read+0xb0/0xb0 ? entry_SYSCALL_64_after_hwframe+0x3e/0xbe ? do_syscall_64+0x1d/0x470 do_syscall_64+0x9e/0x470 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Link: https://lore.kernel.org/r/20200318101741.47211-1-leon@kernel.org Signed-off-by: Avihai Horon Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1614f6f3677c..d901591db9c8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2661,6 +2661,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; + route->num_paths = 0; err1: kfree(work); return ret; -- GitLab From 90d2773a220d73f9fd00233b61717b58ad3b5866 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sun, 29 Mar 2020 16:56:47 +0800 Subject: [PATCH 0827/1278] fbcon: fix null-ptr-deref in fbcon_switch commit b139f8b00db4a8ea75a4174346eafa48041aa489 upstream. Set logo_shown to FBCON_LOGO_CANSHOW when the vc was deallocated. syzkaller report: https://lkml.org/lkml/2020/3/27/403 general protection fault, probably for non-canonical address 0xdffffc000000006c: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000360-0x0000000000000367] RIP: 0010:fbcon_switch+0x28f/0x1740 drivers/video/fbdev/core/fbcon.c:2260 Call Trace: redraw_screen+0x2a8/0x770 drivers/tty/vt/vt.c:1008 vc_do_resize+0xfe7/0x1360 drivers/tty/vt/vt.c:1295 fbcon_init+0x1221/0x1ab0 drivers/video/fbdev/core/fbcon.c:1219 visual_init+0x305/0x5c0 drivers/tty/vt/vt.c:1062 do_bind_con_driver+0x536/0x890 drivers/tty/vt/vt.c:3542 do_take_over_console+0x453/0x5b0 drivers/tty/vt/vt.c:4122 do_fbcon_takeover+0x10b/0x210 drivers/video/fbdev/core/fbcon.c:588 fbcon_fb_registered+0x26b/0x340 drivers/video/fbdev/core/fbcon.c:3259 do_register_framebuffer drivers/video/fbdev/core/fbmem.c:1664 [inline] register_framebuffer+0x56e/0x980 drivers/video/fbdev/core/fbmem.c:1832 dlfb_usb_probe.cold+0x1743/0x1ba3 drivers/video/fbdev/udlfb.c:1735 usb_probe_interface+0x310/0x800 drivers/usb/core/driver.c:374 accessing vc_cons[logo_shown].d->vc_top causes the bug. Reported-by: syzbot+732528bae351682f1f27@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Acked-by: Sam Ravnborg Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200329085647.25133-1-hqjagain@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbcon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 85787119bfbf..41360caff17f 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -1221,6 +1221,9 @@ static void fbcon_deinit(struct vc_data *vc) if (!con_is_bound(&fb_con)) fbcon_exit(); + if (vc->vc_num == logo_shown) + logo_shown = FBCON_LOGO_CANSHOW; + return; } -- GitLab From 63108f2a408abea7ecab063efa0f398da4d0d14b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 7 Feb 2019 14:56:50 -0800 Subject: [PATCH 0828/1278] acpi/nfit: Fix bus command validation commit ebe9f6f19d80d8978d16078dff3d5bd93ad8d102 upstream. Commit 11189c1089da "acpi/nfit: Fix command-supported detection" broke ND_CMD_CALL for bus-level commands. The "func = cmd" assumption is only valid for: ND_CMD_ARS_CAP ND_CMD_ARS_START ND_CMD_ARS_STATUS ND_CMD_CLEAR_ERROR The function number otherwise needs to be pulled from the command payload for: NFIT_CMD_TRANSLATE_SPA NFIT_CMD_ARS_INJECT_SET NFIT_CMD_ARS_INJECT_CLEAR NFIT_CMD_ARS_INJECT_GET Update cmd_to_func() for the bus case and call it in the common path. Fixes: 11189c1089da ("acpi/nfit: Fix command-supported detection") Cc: Reviewed-by: Vishal Verma Reported-by: Grzegorz Burzynski Tested-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 05fb821c2558..68205002f561 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -214,7 +214,7 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, if (call_pkg) { int i; - if (nfit_mem->family != call_pkg->nd_family) + if (nfit_mem && nfit_mem->family != call_pkg->nd_family) return -ENOTTY; for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) @@ -223,6 +223,10 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, return call_pkg->nd_command; } + /* In the !call_pkg case, bus commands == bus functions */ + if (!nfit_mem) + return cmd; + /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */ if (nfit_mem->family == NVDIMM_FAMILY_INTEL) return cmd; @@ -238,6 +242,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); union acpi_object in_obj, in_buf, *out_obj; const struct nd_cmd_desc *desc = NULL; struct device *dev = acpi_desc->dev; @@ -252,18 +257,18 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (cmd_rc) *cmd_rc = -EINVAL; + if (cmd == ND_CMD_CALL) + call_pkg = buf; + func = cmd_to_func(nfit_mem, cmd, call_pkg); + if (func < 0) + return func; + if (nvdimm) { - struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_device *adev = nfit_mem->adev; if (!adev) return -ENOTTY; - if (cmd == ND_CMD_CALL) - call_pkg = buf; - func = cmd_to_func(nfit_mem, cmd, call_pkg); - if (func < 0) - return func; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -274,12 +279,9 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); - func = cmd; cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; - dsm_mask = cmd_mask; - if (cmd == ND_CMD_CALL) - dsm_mask = nd_desc->bus_dsm_mask; + dsm_mask = nd_desc->bus_dsm_mask; desc = nd_cmd_bus_desc(cmd); guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; -- GitLab From 2e390ca2158cdb6dd264d327d3e532a6cf641f5f Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 8 May 2019 23:54:53 +0530 Subject: [PATCH 0829/1278] clk: qcom: rcg: Return failure for RCG update commit 21ea4b62e1f3dc258001a68da98c9663a9dbd6c7 upstream. In case of update config failure, return -EBUSY, so that consumers could handle the failure gracefully. Signed-off-by: Taniya Das Link: https://lkml.kernel.org/r/1557339895-21952-2-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/clk-rcg2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index d3953ea69fda..6091d9b6a27b 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -112,7 +112,7 @@ static int update_config(struct clk_rcg2 *rcg) } WARN(1, "%s: rcg didn't update its configuration.", name); - return 0; + return -EBUSY; } static int clk_rcg2_set_parent(struct clk_hw *hw, u8 index) -- GitLab From 3e1b1597690106327668b353beb988044dc11ebb Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 30 Jun 2019 05:47:22 -0700 Subject: [PATCH 0830/1278] drm/msm: stop abusing dma_map/unmap for cache commit 0036bc73ccbe7e600a3468bf8e8879b122252274 upstream. Recently splats like this started showing up: WARNING: CPU: 4 PID: 251 at drivers/iommu/dma-iommu.c:451 __iommu_dma_unmap+0xb8/0xc0 Modules linked in: ath10k_snoc ath10k_core fuse msm ath mac80211 uvcvideo cfg80211 videobuf2_vmalloc videobuf2_memops vide CPU: 4 PID: 251 Comm: kworker/u16:4 Tainted: G W 5.2.0-rc5-next-20190619+ #2317 Hardware name: LENOVO 81JL/LNVNB161216, BIOS 9UCN23WW(V1.06) 10/25/2018 Workqueue: msm msm_gem_free_work [msm] pstate: 80c00005 (Nzcv daif +PAN +UAO) pc : __iommu_dma_unmap+0xb8/0xc0 lr : __iommu_dma_unmap+0x54/0xc0 sp : ffff0000119abce0 x29: ffff0000119abce0 x28: 0000000000000000 x27: ffff8001f9946648 x26: ffff8001ec271068 x25: 0000000000000000 x24: ffff8001ea3580a8 x23: ffff8001f95ba010 x22: ffff80018e83ba88 x21: ffff8001e548f000 x20: fffffffffffff000 x19: 0000000000001000 x18: 00000000c00001fe x17: 0000000000000000 x16: 0000000000000000 x15: ffff000015b70068 x14: 0000000000000005 x13: 0003142cc1be1768 x12: 0000000000000001 x11: ffff8001f6de9100 x10: 0000000000000009 x9 : ffff000015b78000 x8 : 0000000000000000 x7 : 0000000000000001 x6 : fffffffffffff000 x5 : 0000000000000fff x4 : ffff00001065dbc8 x3 : 000000000000000d x2 : 0000000000001000 x1 : fffffffffffff000 x0 : 0000000000000000 Call trace: __iommu_dma_unmap+0xb8/0xc0 iommu_dma_unmap_sg+0x98/0xb8 put_pages+0x5c/0xf0 [msm] msm_gem_free_work+0x10c/0x150 [msm] process_one_work+0x1e0/0x330 worker_thread+0x40/0x438 kthread+0x12c/0x130 ret_from_fork+0x10/0x18 ---[ end trace afc0dc5ab81a06bf ]--- Not quite sure what triggered that, but we really shouldn't be abusing dma_{map,unmap}_sg() for cache maint. Cc: Stephen Boyd Tested-by: Stephen Boyd Reviewed-by: Jordan Crouse Signed-off-by: Rob Clark Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190630124735.27786-1-robdclark@gmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f2df718af370..3a91ccd92c47 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -108,7 +108,7 @@ static struct page **get_pages(struct drm_gem_object *obj) * because display controller, GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_map_sg(dev->dev, msm_obj->sgt->sgl, + dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl, msm_obj->sgt->nents, DMA_BIDIRECTIONAL); } @@ -138,7 +138,7 @@ static void put_pages(struct drm_gem_object *obj) * GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl, + dma_sync_sg_for_cpu(obj->dev->dev, msm_obj->sgt->sgl, msm_obj->sgt->nents, DMA_BIDIRECTIONAL); -- GitLab From 0b3c8d538ddbcf09ff6135544d70f9b24b0b8c82 Mon Sep 17 00:00:00 2001 From: Arun KS Date: Tue, 30 Apr 2019 16:05:04 +0530 Subject: [PATCH 0831/1278] arm64: Fix size of __early_cpu_boot_status commit 61cf61d81e326163ce1557ceccfca76e11d0e57c upstream. __early_cpu_boot_status is of type long. Use quad assembler directive to allocate proper size. Acked-by: Mark Rutland Signed-off-by: Arun KS Signed-off-by: Will Deacon Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9c00fd2acc2a..bd24c8aed612 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -558,7 +558,7 @@ ENTRY(__boot_cpu_mode) * with MMU turned off. */ ENTRY(__early_cpu_boot_status) - .long 0 + .quad 0 .popsection -- GitLab From c886880db39186c5ac7ecd0801c68d972ab29604 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Fri, 27 Jul 2018 17:47:27 +0530 Subject: [PATCH 0832/1278] rpmsg: glink: Remove chunk size word align warning commit f0beb4ba9b185d497c8efe7b349363700092aee0 upstream. It is possible for the chunk sizes coming from the non RPM remote procs to not be word aligned. Remove the alignment warning and continue to read from the FIFO so execution is not stalled. Signed-off-by: Chris Lew Signed-off-by: Arun Kumar Neelakantam Signed-off-by: Bjorn Andersson Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 1e6253f1e070..114481c9fba1 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -811,9 +811,6 @@ static int qcom_glink_rx_data(struct qcom_glink *glink, size_t avail) return -EAGAIN; } - if (WARN(chunk_size % 4, "Incoming data must be word aligned\n")) - return -EINVAL; - rcid = le16_to_cpu(hdr.msg.param1); spin_lock_irqsave(&glink->idr_lock, flags); channel = idr_find(&glink->rcids, rcid); -- GitLab From 55d9ff5ace1261435040f90e980daa54d5985ca0 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 26 Aug 2019 16:10:58 +0300 Subject: [PATCH 0833/1278] usb: dwc3: don't set gadget->is_otg flag commit c09b73cfac2a9317f1104169045c519c6021aa1d upstream. This reverts commit 6a4290cc28be1 ("usb: dwc3: gadget: set the OTG flag in dwc3 gadget driver.") We don't yet support any of the OTG mechanisms (HNP/SRP/ADP) and are not setting gadget->otg_caps, so don't set gadget->is_otg flag. If we do then we end up publishing a OTG1.0 descriptor in the gadget descriptor which causes device enumeration to fail if we are connected to a host with CONFIG_USB_OTG enabled. Host side log without this patch [ 96.720453] usb 1-1: new high-speed USB device number 2 using xhci-hcd [ 96.901391] usb 1-1: Dual-Role OTG device on non-HNP port [ 96.907552] usb 1-1: set a_alt_hnp_support failed: -32 [ 97.060447] usb 1-1: new high-speed USB device number 3 using xhci-hcd [ 97.241378] usb 1-1: Dual-Role OTG device on non-HNP port [ 97.247536] usb 1-1: set a_alt_hnp_support failed: -32 [ 97.253606] usb usb1-port1: attempt power cycle [ 97.960449] usb 1-1: new high-speed USB device number 4 using xhci-hcd [ 98.141383] usb 1-1: Dual-Role OTG device on non-HNP port [ 98.147540] usb 1-1: set a_alt_hnp_support failed: -32 [ 98.300453] usb 1-1: new high-speed USB device number 5 using xhci-hcd [ 98.481391] usb 1-1: Dual-Role OTG device on non-HNP port [ 98.487545] usb 1-1: set a_alt_hnp_support failed: -32 [ 98.493532] usb usb1-port1: unable to enumerate USB device Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index e96b22d6fa52..76a0020b0f2e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3257,7 +3257,6 @@ int dwc3_gadget_init(struct dwc3 *dwc) dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->gadget.sg_supported = true; dwc->gadget.name = "dwc3-gadget"; - dwc->gadget.is_otg = dwc->dr_mode == USB_DR_MODE_OTG; /* * FIXME We might be setting max_speed to Date: Mon, 27 Aug 2018 10:07:42 +0200 Subject: [PATCH 0834/1278] drm_dp_mst_topology: fix broken drm_dp_sideband_parse_remote_dpcd_read() commit a4c30a4861c54af78c4eb8b7855524c1a96d9f80 upstream. When parsing the reply of a DP_REMOTE_DPCD_READ DPCD command the result is wrong due to a missing idx increment. This was never noticed since DP_REMOTE_DPCD_READ is currently not used, but if you enable it, then it is all wrong. Signed-off-by: Hans Verkuil Reviewed-by: Lyude Paul Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/e72ddac2-1dc0-100a-d816-9ac98ac009dd@xs4all.nl Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c8c83f84aced..f0d819fc16cd 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -433,6 +433,7 @@ static bool drm_dp_sideband_parse_remote_dpcd_read(struct drm_dp_sideband_msg_rx if (idx > raw->curlen) goto fail_len; repmsg->u.remote_dpcd_read_ack.num_bytes = raw->msg[idx]; + idx++; if (idx > raw->curlen) goto fail_len; -- GitLab From df9c43b0a8df15897747153f7d0d5f0ab1831237 Mon Sep 17 00:00:00 2001 From: Arun Kumar Neelakantam Date: Wed, 3 Oct 2018 17:08:20 +0530 Subject: [PATCH 0835/1278] rpmsg: glink: smem: Support rx peak for size less than 4 bytes commit 928002a5e9dab2ddc1a0fe3e00739e89be30dc6b upstream. The current rx peak function fails to read the data if size is less than 4bytes. Use memcpy_fromio to support data reads of size less than 4 bytes. Cc: stable@vger.kernel.org Fixes: f0beb4ba9b18 ("rpmsg: glink: Remove chunk size word align warning") Signed-off-by: Arun Kumar Neelakantam Signed-off-by: Bjorn Andersson Cc: nobuhiro1.iwamatsu@toshiba.co.jp Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_smem.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 53b3a43160f4..2b54e71886d9 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -99,15 +99,11 @@ static void glink_smem_rx_peak(struct qcom_glink_pipe *np, tail -= pipe->native.length; len = min_t(size_t, count, pipe->native.length - tail); - if (len) { - __ioread32_copy(data, pipe->fifo + tail, - len / sizeof(u32)); - } + if (len) + memcpy_fromio(data, pipe->fifo + tail, len); - if (len != count) { - __ioread32_copy(data + len, pipe->fifo, - (count - len) / sizeof(u32)); - } + if (len != count) + memcpy_fromio(data + len, pipe->fifo, (count - len)); } static void glink_smem_rx_advance(struct qcom_glink_pipe *np, -- GitLab From 7ed71842d3c88b71c3deae07f4aa1002eb9b5758 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 30 Jul 2019 14:46:28 -0700 Subject: [PATCH 0836/1278] drm/msm: Use the correct dma_sync calls in msm_gem commit 3de433c5b38af49a5fc7602721e2ab5d39f1e69c upstream. [subject was: drm/msm: shake fist angrily at dma-mapping] So, using dma_sync_* for our cache needs works out w/ dma iommu ops, but it falls appart with dma direct ops. The problem is that, depending on display generation, we can have either set of dma ops (mdp4 and dpu have iommu wired to mdss node, which maps to toplevel drm device, but mdp5 has iommu wired up to the mdp sub-node within mdss). Fixes this splat on mdp5 devices: Unable to handle kernel paging request at virtual address ffffffff80000000 Mem abort info: ESR = 0x96000144 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000144 CM = 1, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000810e4000 [ffffffff80000000] pgd=0000000000000000 Internal error: Oops: 96000144 [#1] SMP Modules linked in: btqcomsmd btqca bluetooth cfg80211 ecdh_generic ecc rfkill libarc4 panel_simple msm wcnss_ctrl qrtr_smd drm_kms_helper venus_enc venus_dec videobuf2_dma_sg videobuf2_memops drm venus_core ipv6 qrtr qcom_wcnss_pil v4l2_mem2mem qcom_sysmon videobuf2_v4l2 qmi_helpers videobuf2_common crct10dif_ce mdt_loader qcom_common videodev qcom_glink_smem remoteproc bmc150_accel_i2c bmc150_magn_i2c bmc150_accel_core bmc150_magn snd_soc_lpass_apq8016 snd_soc_msm8916_analog mms114 mc nf_defrag_ipv6 snd_soc_lpass_cpu snd_soc_apq8016_sbc industrialio_triggered_buffer kfifo_buf snd_soc_lpass_platform snd_soc_msm8916_digital drm_panel_orientation_quirks CPU: 2 PID: 33 Comm: kworker/2:1 Not tainted 5.3.0-rc2 #1 Hardware name: Samsung Galaxy A5U (EUR) (DT) Workqueue: events deferred_probe_work_func pstate: 80000005 (Nzcv daif -PAN -UAO) pc : __clean_dcache_area_poc+0x20/0x38 lr : arch_sync_dma_for_device+0x28/0x30 sp : ffff0000115736a0 x29: ffff0000115736a0 x28: 0000000000000001 x27: ffff800074830800 x26: ffff000011478000 x25: 0000000000000000 x24: 0000000000000001 x23: ffff000011478a98 x22: ffff800009fd1c10 x21: 0000000000000001 x20: ffff800075ad0a00 x19: 0000000000000000 x18: ffff0000112b2000 x17: 0000000000000000 x16: 0000000000000000 x15: 00000000fffffff0 x14: ffff000011455d70 x13: 0000000000000000 x12: 0000000000000028 x11: 0000000000000001 x10: ffff00001106c000 x9 : ffff7e0001d6b380 x8 : 0000000000001000 x7 : ffff7e0001d6b380 x6 : ffff7e0001d6b382 x5 : 0000000000000000 x4 : 0000000000001000 x3 : 000000000000003f x2 : 0000000000000040 x1 : ffffffff80001000 x0 : ffffffff80000000 Call trace: __clean_dcache_area_poc+0x20/0x38 dma_direct_sync_sg_for_device+0xb8/0xe8 get_pages+0x22c/0x250 [msm] msm_gem_get_and_pin_iova+0xdc/0x168 [msm] ... Fixes the combination of two patches: Fixes: 0036bc73ccbe (drm/msm: stop abusing dma_map/unmap for cache) Fixes: 449fa54d6815 (dma-direct: correct the physical addr in dma_direct_sync_sg_for_cpu/device) Tested-by: Stephan Gerhold Signed-off-by: Rob Clark [seanpaul changed subject to something more desriptive] Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190730214633.17820-1-robdclark@gmail.com Cc: nobuhiro1.iwamatsu@toshiba.co.jp Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_gem.c | 47 +++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 3a91ccd92c47..300c4624aa6c 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -43,6 +43,46 @@ static bool use_pages(struct drm_gem_object *obj) return !msm_obj->vram_node; } +/* + * Cache sync.. this is a bit over-complicated, to fit dma-mapping + * API. Really GPU cache is out of scope here (handled on cmdstream) + * and all we need to do is invalidate newly allocated pages before + * mapping to CPU as uncached/writecombine. + * + * On top of this, we have the added headache, that depending on + * display generation, the display's iommu may be wired up to either + * the toplevel drm device (mdss), or to the mdp sub-node, meaning + * that here we either have dma-direct or iommu ops. + * + * Let this be a cautionary tail of abstraction gone wrong. + */ + +static void sync_for_device(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev)) { + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_map_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + +static void sync_for_cpu(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev)) { + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + /* allocate pages from VRAM carveout, used when no IOMMU: */ static struct page **get_pages_vram(struct drm_gem_object *obj, int npages) { @@ -108,8 +148,7 @@ static struct page **get_pages(struct drm_gem_object *obj) * because display controller, GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + sync_for_device(msm_obj); } return msm_obj->pages; @@ -138,9 +177,7 @@ static void put_pages(struct drm_gem_object *obj) * GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_cpu(obj->dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, - DMA_BIDIRECTIONAL); + sync_for_cpu(msm_obj); sg_free_table(msm_obj->sgt); kfree(msm_obj->sgt); -- GitLab From c10b57a567e4333b9fdf60b5ec36de9859263ca2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 13 Apr 2020 10:34:39 +0200 Subject: [PATCH 0837/1278] Linux 4.14.176 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3b792208fbc2..9db2e7f90769 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 175 +SUBLEVEL = 176 EXTRAVERSION = NAME = Petit Gorille -- GitLab From df76f38e95cce2ef95dbea16d14cb316cfb0639f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 13 Apr 2020 08:35:34 -0700 Subject: [PATCH 0838/1278] Revert "ANDROID: Incremental fs: Fix initialization, use of bitfields" This change was never cherry-picked to other branches, is just syntactic sugar, and clashes with another change. Revert, and then apply cleanly over all branches as needed. This reverts commit 2664a43d88d91eb09f212cb26e111656a4acee49. Change-Id: I51dca9cb046cc15302c22e8f73e58dcf4849f84d Signed-off-by: Paul Lawrence --- fs/incfs/data_mgmt.c | 17 +++++++-------- fs/incfs/data_mgmt.h | 34 ++++-------------------------- include/uapi/linux/incrementalfs.h | 7 ------ 3 files changed, 12 insertions(+), 46 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index caa5770f7f4c..91541b46f771 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -98,11 +98,11 @@ static void data_file_segment_destroy(struct data_file_segment *segment) struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) { - struct data_file *df; - struct backing_file_context *bfc; + struct data_file *df = NULL; + struct backing_file_context *bfc = NULL; int md_records; u64 size; - int error; + int error = 0; int i; if (!bf || !mi) @@ -173,7 +173,7 @@ int make_inode_ready_for_data_ops(struct mount_info *mi, struct file *backing_file) { struct inode_info *node = get_incfs_node(inode); - struct data_file *df; + struct data_file *df = NULL; int err = 0; inode_lock(inode); @@ -194,7 +194,7 @@ int make_inode_ready_for_data_ops(struct mount_info *mi, struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) { - struct dir_file *dir; + struct dir_file *dir = NULL; if (!S_ISDIR(bf->f_inode->i_mode)) return ERR_PTR(-EBADF); @@ -235,12 +235,11 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, s64 now_us = ktime_to_us(ktime_get()); struct read_log_record record = { .file_id = *id, + .block_index = block_index, + .timed_out = timed_out, .timestamp_us = now_us }; - set_block_index(&record, block_index); - set_timed_out(&record, timed_out); - if (log->rl_size == 0) return; @@ -1166,7 +1165,7 @@ static void fill_pending_read_from_log_record( struct read_log_state *state, u64 log_size) { dest->file_id = src->file_id; - dest->block_index = get_block_index(src); + dest->block_index = src->block_index; dest->serial_number = state->current_pass_no * log_size + state->next_index; dest->timestamp_us = src->timestamp_us; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 3cdb95a7661a..e8f2154c80d9 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -21,41 +21,15 @@ #define SEGMENTS_PER_FILE 3 struct read_log_record { - u32 bitfield; + u32 block_index : 31; + + u32 timed_out : 1; u64 timestamp_us; incfs_uuid_t file_id; } __packed; -#define RLR_BLOCK_INDEX_MASK 0x7fff -#define RLR_TIMED_OUT_MASK 0x8000 - -static inline u32 get_block_index(const struct read_log_record *rlr) -{ - return rlr->bitfield & RLR_BLOCK_INDEX_MASK; -} - -static inline void set_block_index(struct read_log_record *rlr, - u32 block_index) -{ - rlr->bitfield = (rlr->bitfield & ~RLR_BLOCK_INDEX_MASK) - | (block_index & RLR_BLOCK_INDEX_MASK); -} - -static inline bool get_timed_out(const struct read_log_record *rlr) -{ - return (rlr->bitfield & RLR_TIMED_OUT_MASK) == RLR_TIMED_OUT_MASK; -} - -static inline void set_timed_out(struct read_log_record *rlr, bool timed_out) -{ - if (timed_out) - rlr->bitfield |= RLR_TIMED_OUT_MASK; - else - rlr->bitfield &= ~RLR_TIMED_OUT_MASK; -} - struct read_log_state { /* Next slot in rl_ring_buf to write to. */ u32 next_index; @@ -312,7 +286,7 @@ static inline struct inode_info *get_incfs_node(struct inode *inode) static inline struct data_file *get_incfs_data_file(struct file *f) { - struct inode_info *node; + struct inode_info *node = NULL; if (!f) return NULL; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index ac775b64bdcf..13c3d5173e14 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -150,13 +150,10 @@ struct incfs_fill_block { /* Values from enum incfs_block_flags */ __u8 flags; - /* Reserved - must be 0 */ __u16 reserved1; - /* Reserved - must be 0 */ __u32 reserved2; - /* Reserved - must be 0 */ __aligned_u64 reserved3; }; @@ -206,10 +203,8 @@ struct incfs_new_file_args { */ __u16 mode; - /* Reserved - must be 0 */ __u16 reserved1; - /* Reserved - must be 0 */ __u32 reserved2; /* @@ -242,7 +237,6 @@ struct incfs_new_file_args { */ __u32 file_attr_len; - /* Reserved - must be 0 */ __u32 reserved4; /* @@ -270,7 +264,6 @@ struct incfs_new_file_args { /* Size of signature_info */ __aligned_u64 signature_size; - /* Reserved - must be 0 */ __aligned_u64 reserved6; }; -- GitLab From 1b7e2d075ef4412022f0ec5228d4c4a99e0cba67 Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Fri, 3 Apr 2020 20:39:33 -0700 Subject: [PATCH 0839/1278] ANDROID: Incremental fs: Use 64-bit int for file_size when writing hash blocks Bug: 153210803 Test: manual Change-Id: Iafc888dbe906cd37e5b28dc2814f52aace175c0f Signed-off-by: Yurii Zubrytskyi --- fs/incfs/format.c | 3 ++- fs/incfs/format.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 1a7c4646a291..ff3a2219fc24 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -450,7 +450,8 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, struct mem_range block, int block_index, loff_t hash_area_off, - loff_t bm_base_off, int file_size) + loff_t bm_base_off, + loff_t file_size) { struct incfs_blockmap_entry bm_entry = {}; int result; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index deb5ca5bb0da..1a83349bb2eb 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -303,7 +303,8 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, struct mem_range block, int block_index, loff_t hash_area_off, - loff_t bm_base_off, int file_size); + loff_t bm_base_off, + loff_t file_size); int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, struct mem_range value, struct incfs_file_attr *attr); -- GitLab From 91ef6b614c4f655aea72368f5430bfc5baa846a0 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 3 Apr 2020 11:17:53 -0700 Subject: [PATCH 0840/1278] ANDROID: Incremental fs: Fix mislabeled __user ptrs Found by sparse Bug: 153174547 Test: make C=2 fs/incfs/incrementalfs.ko no errors, incfs_test pass Signed-off-by: Paul Lawrence Change-Id: I9ff4f4f35975fe09936724488b96cd8bdeeb719e --- fs/incfs/data_mgmt.c | 5 +++-- fs/incfs/integrity.c | 2 +- fs/incfs/vfs.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 91541b46f771..eac949243fd9 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -396,7 +396,8 @@ static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, if (error) return error; - if (copy_to_user(((char *)buffer) + *size_out, range, sizeof(*range))) + if (copy_to_user(((char __user *)buffer) + *size_out, range, + sizeof(*range))) return -EFAULT; *size_out += sizeof(*range); @@ -437,7 +438,7 @@ int incfs_get_filled_blocks(struct data_file *df, int error = 0; bool in_range = false; struct incfs_filled_range range; - void *buffer = u64_to_user_ptr(arg->range_buffer); + void __user *buffer = u64_to_user_ptr(arg->range_buffer); u32 size = arg->range_buffer_size; u32 end_index = arg->end_index ? arg->end_index : df->df_total_block_count; diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index 96e016a91542..d049988ef037 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -62,7 +62,7 @@ static bool read_u32(u8 **p, u8 *top, u32 *result) if (*p + sizeof(u32) > top) return false; - *result = le32_to_cpu(*(u32 *)*p); + *result = le32_to_cpu(*(__le32 *)*p); *p += sizeof(u32); return true; } diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index cb4c787c825d..7ac3cbe34995 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -1277,7 +1277,7 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) { struct incfs_fill_blocks __user *usr_fill_blocks = arg; struct incfs_fill_blocks fill_blocks; - struct incfs_fill_block *usr_fill_block_array; + struct incfs_fill_block __user *usr_fill_block_array; struct data_file *df = get_incfs_data_file(f); const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; u8 *data_buf = NULL; @@ -1347,7 +1347,7 @@ static long ioctl_permit_fill(struct file *f, void __user *arg) struct incfs_permit_fill __user *usr_permit_fill = arg; struct incfs_permit_fill permit_fill; long error = 0; - struct file *file = 0; + struct file *file = NULL; if (f->f_op != &incfs_pending_read_file_ops) return -EPERM; -- GitLab From 467d1f63e3baf0550e6524df58fd56adb974705b Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 3 Apr 2020 10:36:30 -0700 Subject: [PATCH 0841/1278] ANDROID: Incremental fs: Optimize get_filled_block This led to a 20x speed improvement on QEMU. 512 is somewhat arbitrary - most of the gains are already there reading 64 records at a time, but since the record size is 10 bytes, 512 is just over a page and seems a good choice. Bug: 153170997 Test: incfs_test passes. Adding logging to incfs_get_filled_blocks to measure performance shows a 20x improvement Signed-off-by: Paul Lawrence Change-Id: Ifb2da77cfd8c9d653c7047ba1eb7f39d795fa1c2 --- fs/incfs/data_mgmt.c | 57 ++++++++++++++++++++++++++++++++++---------- fs/incfs/format.c | 19 +++++++++++---- 2 files changed, 58 insertions(+), 18 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index eac949243fd9..193323ec6cd0 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -347,13 +347,28 @@ static bool is_data_block_present(struct data_file_block *block) (block->db_stored_size != 0); } +static void convert_data_file_block(struct incfs_blockmap_entry *bme, + struct data_file_block *res_block) +{ + u16 flags = le16_to_cpu(bme->me_flags); + + res_block->db_backing_file_data_offset = + le16_to_cpu(bme->me_data_offset_hi); + res_block->db_backing_file_data_offset <<= 32; + res_block->db_backing_file_data_offset |= + le32_to_cpu(bme->me_data_offset_lo); + res_block->db_stored_size = le16_to_cpu(bme->me_data_size); + res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? + COMPRESSION_LZ4 : + COMPRESSION_NONE; +} + static int get_data_file_block(struct data_file *df, int index, struct data_file_block *res_block) { struct incfs_blockmap_entry bme = {}; struct backing_file_context *bfc = NULL; loff_t blockmap_off = 0; - u16 flags = 0; int error = 0; if (!df || !res_block) @@ -369,16 +384,7 @@ static int get_data_file_block(struct data_file *df, int index, if (error) return error; - flags = le16_to_cpu(bme.me_flags); - res_block->db_backing_file_data_offset = - le16_to_cpu(bme.me_data_offset_hi); - res_block->db_backing_file_data_offset <<= 32; - res_block->db_backing_file_data_offset |= - le32_to_cpu(bme.me_data_offset_lo); - res_block->db_stored_size = le16_to_cpu(bme.me_data_size); - res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? - COMPRESSION_LZ4 : - COMPRESSION_NONE; + convert_data_file_block(&bme, res_block); return 0; } @@ -432,6 +438,7 @@ static int update_file_header_flags(struct data_file *df, u32 bits_to_reset, return result; } +#define READ_BLOCKMAP_ENTRIES 512 int incfs_get_filled_blocks(struct data_file *df, struct incfs_get_filled_blocks_args *arg) { @@ -443,6 +450,9 @@ int incfs_get_filled_blocks(struct data_file *df, u32 end_index = arg->end_index ? arg->end_index : df->df_total_block_count; u32 *size_out = &arg->range_buffer_size_out; + int i = READ_BLOCKMAP_ENTRIES - 1; + int entries_read = 0; + struct incfs_blockmap_entry *bme; *size_out = 0; if (end_index > df->df_total_block_count) @@ -474,13 +484,33 @@ int incfs_get_filled_blocks(struct data_file *df, return 0; } + bme = kzalloc(sizeof(*bme) * READ_BLOCKMAP_ENTRIES, GFP_NOFS); + if (!bme) + return -ENOMEM; + for (arg->index_out = arg->start_index; arg->index_out < end_index; ++arg->index_out) { struct data_file_block dfb; - error = get_data_file_block(df, arg->index_out, &dfb); - if (error) + if (++i == READ_BLOCKMAP_ENTRIES) { + entries_read = incfs_read_blockmap_entries( + df->df_backing_file_context, bme, + arg->index_out, READ_BLOCKMAP_ENTRIES, + df->df_blockmap_off); + if (entries_read < 0) { + error = entries_read; + break; + } + + i = 0; + } + + if (i >= entries_read) { + error = -EIO; break; + } + + convert_data_file_block(bme + i, &dfb); if (is_data_block_present(&dfb) == in_range) continue; @@ -520,6 +550,7 @@ int incfs_get_filled_blocks(struct data_file *df, pr_debug("Marked file full with result %d", result); } + kfree(bme); return error; } diff --git a/fs/incfs/format.c b/fs/incfs/format.c index ff3a2219fc24..8f3a3b3f8f7b 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -514,8 +514,19 @@ int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, loff_t bm_base_off, struct incfs_blockmap_entry *bm_entry) { - return incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, - bm_base_off); + int error = incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, + bm_base_off); + + if (error < 0) + return error; + + if (error == 0) + return -EIO; + + if (error != 1) + return -EFAULT; + + return 0; } int incfs_read_blockmap_entries(struct backing_file_context *bfc, @@ -539,9 +550,7 @@ int incfs_read_blockmap_entries(struct backing_file_context *bfc, bm_entry_off); if (result < 0) return result; - if (result < bytes_to_read) - return -EIO; - return 0; + return result / sizeof(*entries); } int incfs_read_file_header(struct backing_file_context *bfc, -- GitLab From e158e207c2d2f30f5cd028328129741f029b84ce Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Mon, 6 Apr 2020 12:49:41 -0700 Subject: [PATCH 0842/1278] ANDROID: Incremental fs: make remount log buffer change atomic Read log buffer can have multiple threads doing any of these operations simultaneously: - Polling for changes - Reading log records - Adding new log records - Updating log buffer size, or enabling/disabling it completely As we don't control the userspace, and it turns out that they all currently originate from different processes, code needs to be safe against parallel access to a read buffer and a request for reallocating it. This CL add an r/w spinlock to protect the buffer and its size. Each remount takes the write lock, while everything else takes a read lock. Remount makes sure it doesn't take too long by preallocating and precalculating all updates, while other operations don't care much about their critical section size - they all can still run together. Bug: 152633648 Test: manual remount + reading Signed-off-by: Yurii Zubrytskyi Signed-off-by: Paul Lawrence Change-Id: I7271b4cb89f1ae2cbee6e5b073758f344c4ba66a --- fs/incfs/data_mgmt.c | 145 ++++++++++++++++++++++++++++++------------- fs/incfs/data_mgmt.h | 23 +++++-- fs/incfs/vfs.c | 17 +++-- 3 files changed, 131 insertions(+), 54 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 193323ec6cd0..0b789e62ac4b 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -34,7 +34,8 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); init_waitqueue_head(&mi->mi_log.ml_notif_wq); - spin_lock_init(&mi->mi_log.rl_writer_lock); + rwlock_init(&mi->mi_log.rl_access_lock); + spin_lock_init(&mi->mi_log.rl_logging_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); error = incfs_realloc_mount_info(mi, options); @@ -51,20 +52,38 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, int incfs_realloc_mount_info(struct mount_info *mi, struct mount_options *options) { - kfree(mi->mi_log.rl_ring_buf); - mi->mi_log.rl_ring_buf = NULL; - mi->mi_log.rl_size = 0; + void *new_buffer = NULL; + size_t new_buffer_size = 0; - mi->mi_options = *options; - if (options->read_log_pages != 0) { - size_t buf_size = PAGE_SIZE * options->read_log_pages; + if (options->read_log_pages != mi->mi_options.read_log_pages) { + struct read_log_state log_state; + /* + * Even though having two buffers allocated at once isn't + * usually good, allocating a multipage buffer under a spinlock + * is even worse, so let's optimize for the shorter lock + * duration. It's not end of the world if we fail to increase + * the buffer size anyway. + */ + if (options->read_log_pages > 0) { + new_buffer_size = PAGE_SIZE * options->read_log_pages; + new_buffer = kzalloc(new_buffer_size, GFP_NOFS); + if (!new_buffer) + return -ENOMEM; + } - mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); - mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); - if (!mi->mi_log.rl_ring_buf) - return -ENOMEM; + write_lock(&mi->mi_log.rl_access_lock); + kfree(mi->mi_log.rl_ring_buf); + WRITE_ONCE(mi->mi_log.rl_ring_buf, new_buffer); + WRITE_ONCE(mi->mi_log.rl_size, + new_buffer_size / sizeof(*mi->mi_log.rl_ring_buf)); + log_state = READ_ONCE(mi->mi_log.rl_state); + log_state.generation_id++; + log_state.next_index = log_state.current_pass_no = 0; + WRITE_ONCE(mi->mi_log.rl_state, log_state); + write_unlock(&mi->mi_log.rl_access_lock); } + mi->mi_options = *options; return 0; } @@ -233,6 +252,7 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, struct read_log *log = &mi->mi_log; struct read_log_state state; s64 now_us = ktime_to_us(ktime_get()); + int rl_size; struct read_log_record record = { .file_id = *id, .block_index = block_index, @@ -240,20 +260,23 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, .timestamp_us = now_us }; - if (log->rl_size == 0) - return; - - spin_lock(&log->rl_writer_lock); - state = READ_ONCE(log->rl_state); - log->rl_ring_buf[state.next_index] = record; - if (++state.next_index == log->rl_size) { - state.next_index = 0; - ++state.current_pass_no; + read_lock(&log->rl_access_lock); + rl_size = READ_ONCE(log->rl_size); + if (rl_size != 0) { + spin_lock(&log->rl_logging_lock); + state = READ_ONCE(log->rl_state); + log->rl_ring_buf[state.next_index] = record; + if (++state.next_index == rl_size) { + state.next_index = 0; + ++state.current_pass_no; + } + WRITE_ONCE(log->rl_state, state); + spin_unlock(&log->rl_logging_lock); } - WRITE_ONCE(log->rl_state, state); - spin_unlock(&log->rl_writer_lock); + read_unlock(&log->rl_access_lock); - wake_up_all(&log->ml_notif_wq); + if (rl_size != 0) + wake_up_all(&log->ml_notif_wq); } static int validate_hash_tree(struct file *bf, struct data_file *df, @@ -1171,9 +1194,11 @@ struct read_log_state incfs_get_log_state(struct mount_info *mi) struct read_log *log = &mi->mi_log; struct read_log_state result; - spin_lock(&log->rl_writer_lock); + read_lock(&log->rl_access_lock); + spin_lock(&log->rl_logging_lock); result = READ_ONCE(log->rl_state); - spin_unlock(&log->rl_writer_lock); + spin_unlock(&log->rl_logging_lock); + read_unlock(&log->rl_access_lock); return result; } @@ -1186,10 +1211,21 @@ int incfs_get_uncollected_logs_count(struct mount_info *mi, struct read_log_state state) { struct read_log *log = &mi->mi_log; - - u64 count = calc_record_count(&log->rl_state, log->rl_size) - - calc_record_count(&state, log->rl_size); - return min_t(int, count, log->rl_size); + struct read_log_state rl_state; + int rl_size; + u64 count; + + read_lock(&log->rl_access_lock); + rl_size = READ_ONCE(log->rl_size); + spin_lock(&log->rl_logging_lock); + rl_state = READ_ONCE(log->rl_state); + spin_unlock(&log->rl_logging_lock); + read_unlock(&log->rl_access_lock); + + count = calc_record_count(&rl_state, rl_size); + if (rl_state.generation_id == state.generation_id) + count -= calc_record_count(&state, rl_size); + return min_t(int, count, rl_size); } static void fill_pending_read_from_log_record( @@ -1209,17 +1245,35 @@ int incfs_collect_logged_reads(struct mount_info *mi, int reads_size) { struct read_log *log = &mi->mi_log; - struct read_log_state live_state = incfs_get_log_state(mi); - u64 read_count = calc_record_count(reader_state, log->rl_size); - u64 written_count = calc_record_count(&live_state, log->rl_size); + struct read_log_state live_state; int dst_idx; + int rl_size; + int result = 0; + u64 read_count; + u64 written_count; - if (reader_state->next_index >= log->rl_size || - read_count > written_count) - return -ERANGE; + read_lock(&log->rl_access_lock); - if (read_count == written_count) - return 0; + rl_size = READ_ONCE(log->rl_size); + spin_lock(&log->rl_logging_lock); + live_state = READ_ONCE(log->rl_state); + spin_unlock(&log->rl_logging_lock); + + if (reader_state->generation_id != live_state.generation_id) { + reader_state->generation_id = live_state.generation_id; + reader_state->current_pass_no = reader_state->next_index = 0; + } + + read_count = calc_record_count(reader_state, rl_size); + written_count = calc_record_count(&live_state, rl_size); + if (read_count == written_count) { + result = 0; + goto out; + } + if (reader_state->next_index >= rl_size) { + result = -ERANGE; + goto out; + } if (read_count > written_count) { /* This reader is somehow ahead of the writer. */ @@ -1227,16 +1281,17 @@ int incfs_collect_logged_reads(struct mount_info *mi, *reader_state = live_state; } - if (written_count - read_count > log->rl_size) { + if (written_count - read_count > rl_size) { /* * Reading pointer is too far behind, * start from the record following the write pointer. */ - pr_debug("incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", + pr_debug( + "incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", (u32)reader_state->next_index, (u32)reader_state->current_pass_no, (u32)live_state.next_index, - (u32)live_state.current_pass_no - 1, (u32)log->rl_size); + (u32)live_state.current_pass_no - 1, (u32)rl_size); *reader_state = (struct read_log_state){ .next_index = live_state.next_index, @@ -1252,15 +1307,19 @@ int incfs_collect_logged_reads(struct mount_info *mi, fill_pending_read_from_log_record( &reads[dst_idx], &log->rl_ring_buf[reader_state->next_index], - reader_state, log->rl_size); + reader_state, rl_size); reader_state->next_index++; - if (reader_state->next_index == log->rl_size) { + if (reader_state->next_index == rl_size) { reader_state->next_index = 0; reader_state->current_pass_no++; } } - return dst_idx; + result = dst_idx; + +out: + read_unlock(&log->rl_access_lock); + return result; } bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index e8f2154c80d9..b860997d0bb6 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -31,10 +31,13 @@ struct read_log_record { } __packed; struct read_log_state { - /* Next slot in rl_ring_buf to write to. */ - u32 next_index; + /* Log buffer generation id, incremented on configuration changes */ + u32 generation_id : 8; - /* Current number of writer pass over rl_ring_buf */ + /* Next slot in rl_ring_buf to write into. */ + u32 next_index : 24; + + /* Current number of writer passes over rl_ring_buf */ u32 current_pass_no; }; @@ -42,11 +45,21 @@ struct read_log_state { struct read_log { struct read_log_record *rl_ring_buf; + int rl_size; + struct read_log_state rl_state; - spinlock_t rl_writer_lock; + /* + * A lock for _all_ accesses to the struct, to protect against remounts. + * Taken for writing when resizing the buffer. + */ + rwlock_t rl_access_lock; - int rl_size; + /* + * A lock to protect the actual logging - adding a new record. + * Note: ALWAYS taken after and under the |rl_access_lock|. + */ + spinlock_t rl_logging_lock; /* * A queue of waiters who want to be notified about reads. diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 7ac3cbe34995..838c5ea8e9d9 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -584,22 +584,27 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len, { struct log_file_state *log_state = f->private_data; struct mount_info *mi = get_mount_info(file_superblock(f)); - struct incfs_pending_read_info *reads_buf = - (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); - size_t reads_to_collect = len / sizeof(*reads_buf); - size_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); int total_reads_collected = 0; + int rl_size; ssize_t result = 0; + struct incfs_pending_read_info *reads_buf; + ssize_t reads_to_collect = len / sizeof(*reads_buf); + ssize_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); + + rl_size = READ_ONCE(mi->mi_log.rl_size); + if (rl_size == 0) + return 0; + reads_buf = (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); if (!reads_buf) return -ENOMEM; - reads_to_collect = min_t(size_t, mi->mi_log.rl_size, reads_to_collect); + reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); while (reads_to_collect > 0) { struct read_log_state next_state = READ_ONCE(log_state->state); int reads_collected = incfs_collect_logged_reads( mi, &next_state, reads_buf, - min_t(size_t, reads_to_collect, reads_per_page)); + min_t(ssize_t, reads_to_collect, reads_per_page)); if (reads_collected <= 0) { result = total_reads_collected ? total_reads_collected * -- GitLab From 3aee2b945b401a92b81d4b8c4f47fd990f644c4f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 8 Apr 2020 10:10:53 -0700 Subject: [PATCH 0843/1278] ANDROID: Incremental fs: Clean up incfs_test build process Bug: 153557975 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I57eef43a5d003e3d89a4c872d21e36376bc580a1 --- .../selftests/filesystems/incfs/Makefile | 17 ++----- .../selftests/filesystems/incfs/config | 1 - .../selftests/filesystems/incfs/incfs_test.c | 48 ++++++++++--------- .../selftests/filesystems/incfs/utils.c | 28 ++++++----- .../selftests/filesystems/incfs/utils.h | 2 +- 5 files changed, 46 insertions(+), 50 deletions(-) delete mode 100644 tools/testing/selftests/filesystems/incfs/config diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 1f13573d3617..5b2e627ce883 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,18 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -lssl -lcrypto -llz4 -CFLAGS += -I../../../../../usr/include/ -CFLAGS += -I../../../../include/uapi/ -CFLAGS += -I../../../../lib +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall +CFLAGS += -I../.. -I../../../../.. +LDLIBS := -llz4 -lcrypto EXTRA_SOURCES := utils.c -CFLAGS += $(EXTRA_SOURCES) - TEST_GEN_PROGS := incfs_test -include ../../lib.mk - -$(OUTPUT)incfs_test: incfs_test.c $(EXTRA_SOURCES) -all: $(OUTPUT)incfs_test +$(TEST_GEN_PROGS): $(EXTRA_SOURCES) -clean: - rm -rf $(OUTPUT)incfs_test *.o +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/incfs/config b/tools/testing/selftests/filesystems/incfs/config deleted file mode 100644 index b6749837a318..000000000000 --- a/tools/testing/selftests/filesystems/incfs/config +++ /dev/null @@ -1 +0,0 @@ -CONFIG_INCREMENTAL_FS=y \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index f9661a9eb3fa..150bd41d33cb 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2,27 +2,29 @@ /* * Copyright 2018 Google LLC */ -#include -#include -#include +#include #include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include + #include -#include +#include +#include #include #include -#include -#include -#include -#include -#include + #include #include -#include "../../kselftest.h" +#include -#include "lz4.h" #include "utils.h" #define TEST_FAILURE 1 @@ -208,7 +210,7 @@ int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) { char *path = get_index_filename(mnt_dir, id); int cmd_fd = open_commands_file(mnt_dir); - int fd = open(path, O_RDWR); + int fd = open(path, O_RDWR | O_CLOEXEC); struct incfs_permit_fill permit_fill = { .file_descriptor = fd, }; @@ -281,7 +283,7 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, .fill_blocks = ptr_to_u64(block_buf), }; ssize_t write_res = 0; - int fd; + int fd = -1; int error = 0; int i = 0; int blocks_written = 0; @@ -444,7 +446,7 @@ static loff_t read_whole_file(char *filename) loff_t bytes_read = 0; uint8_t buff[16 * 1024]; - fd = open(filename, O_RDONLY); + fd = open(filename, O_RDONLY | O_CLOEXEC); if (fd <= 0) return fd; @@ -476,7 +478,7 @@ static int read_test_file(uint8_t *buf, size_t len, char *filename, size_t bytes_to_read = len; off_t offset = ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE; - fd = open(filename, O_RDONLY); + fd = open(filename, O_RDONLY | O_CLOEXEC); if (fd <= 0) return fd; @@ -909,7 +911,7 @@ static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) int i; /* Test directory iteration */ - int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY); + int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY | O_CLOEXEC); if (fd < 0) { print_error("Can't open directory\n"); @@ -1110,7 +1112,7 @@ static int basic_file_ops_test(char *mount_dir) char *path = concat_file_name(mount_dir, file->name); int fd; - fd = open(path, O_RDWR); + fd = open(path, O_RDWR | O_CLOEXEC); free(path); if (fd <= 0) { print_error("Can't open file"); @@ -1943,7 +1945,7 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, char *filename = concat_file_name(mount_dir, file->name); int fd; - fd = open(filename, O_RDONLY); + fd = open(filename, O_RDONLY | O_CLOEXEC); free(filename); if (fd <= 0) return TEST_FAILURE; @@ -2125,7 +2127,7 @@ static int read_log_test(char *mount_dir) /* * Remount and check that logs start working again */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY); + drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); if (drop_caches == -1) goto failure; i = write(drop_caches, "3", 1); @@ -2215,7 +2217,7 @@ static int validate_ranges(const char *mount_dir, struct test_file *file) int cmd_fd = -1; struct incfs_permit_fill permit_fill; - fd = open(filename, O_RDONLY); + fd = open(filename, O_RDONLY | O_CLOEXEC); free(filename); if (fd <= 0) return TEST_FAILURE; @@ -2455,7 +2457,7 @@ static int validate_hash_ranges(const char *mount_dir, struct test_file *file) if (file->size <= 4096 / 32 * 4096) return 0; - fd = open(filename, O_RDONLY); + fd = open(filename, O_RDONLY | O_CLOEXEC); free(filename); if (fd <= 0) return TEST_FAILURE; @@ -2590,7 +2592,7 @@ int main(int argc, char *argv[]) // NOTE - this abuses the concept of randomness - do *not* ever do this // on a machine for production use - the device will think it has good // randomness when it does not. - fd = open("/dev/urandom", O_WRONLY); + fd = open("/dev/urandom", O_WRONLY | O_CLOEXEC); count = 4096; for (int i = 0; i < 128; ++i) ioctl(fd, RNDADDTOENTCNT, &count); diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index 545497685d14..e194f63ba922 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -2,27 +2,29 @@ /* * Copyright 2018 Google LLC */ -#include -#include #include -#include -#include +#include #include +#include +#include +#include +#include #include + #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include + #include #include #include "utils.h" +#ifndef __S_IFREG +#define __S_IFREG S_IFREG +#endif + int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms) { @@ -184,7 +186,7 @@ int open_commands_file(const char *mount_dir) snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/%s", mount_dir, INCFS_PENDING_READS_FILENAME); - cmd_fd = open(cmd_file, O_RDONLY); + cmd_fd = open(cmd_file, O_RDONLY | O_CLOEXEC); if (cmd_fd < 0) perror("Can't open commands file"); @@ -197,7 +199,7 @@ int open_log_file(const char *mount_dir) int cmd_fd; snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); - cmd_fd = open(cmd_file, O_RDWR); + cmd_fd = open(cmd_file, O_RDWR | O_CLOEXEC); if (cmd_fd < 0) perror("Can't open log file"); return cmd_fd; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 24b43287fcdd..9af63e4e922c 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -5,7 +5,7 @@ #include #include -#include "../../include/uapi/linux/incrementalfs.h" +#include #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) -- GitLab From 3c76e8dfcb960a5442d96c44a95ac4cdcb54df2f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 7 Apr 2020 14:48:14 -0700 Subject: [PATCH 0844/1278] ANDROID: Incremental fs: Fix compound page usercopy crash Bug: 153560805 Test: incfs_test passes on qemu and Pixel 4 Signed-off-by: Paul Lawrence Change-Id: I1b55341e4e4247a74f3f539b9d190fef0ca409b8 --- fs/incfs/data_mgmt.c | 3 ++- fs/incfs/vfs.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 0b789e62ac4b..4ec08eb52461 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -507,7 +507,8 @@ int incfs_get_filled_blocks(struct data_file *df, return 0; } - bme = kzalloc(sizeof(*bme) * READ_BLOCKMAP_ENTRIES, GFP_NOFS); + bme = kzalloc(sizeof(*bme) * READ_BLOCKMAP_ENTRIES, + GFP_NOFS | __GFP_COMP); if (!bme) return -ENOMEM; diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 838c5ea8e9d9..0a13821f5b59 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -861,7 +861,7 @@ static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, if (size > INCFS_MAX_SIGNATURE_SIZE) return range(ERR_PTR(-EFAULT), 0); - result = kzalloc(size, GFP_NOFS); + result = kzalloc(size, GFP_NOFS | __GFP_COMP); if (!result) return range(ERR_PTR(-ENOMEM), 0); @@ -1299,7 +1299,8 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) return -EFAULT; usr_fill_block_array = u64_to_user_ptr(fill_blocks.fill_blocks); - data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + data_buf = (u8 *)__get_free_pages(GFP_NOFS | __GFP_COMP, + get_order(data_buf_size)); if (!data_buf) return -ENOMEM; @@ -1414,7 +1415,7 @@ static long ioctl_read_file_signature(struct file *f, void __user *arg) if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) return -E2BIG; - sig_buffer = kzalloc(sig_buf_size, GFP_NOFS); + sig_buffer = kzalloc(sig_buf_size, GFP_NOFS | __GFP_COMP); if (!sig_buffer) return -ENOMEM; -- GitLab From 4f81903f2276fc98095f8dbcfdf6c94c3c99e316 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 7 Apr 2020 14:46:13 -0700 Subject: [PATCH 0845/1278] ANDROID: Incremental fs: Fix create_file performance incfs only syncs at createfile time. This was making createfile take a very long time. It also appears to offer little actual value - whether we flush or no, if the device crashes the header will be partial. Bug: 15356649 Test: incfs_test passes, createfile takes less than half the time Change-Id: I8f1fa138226868ebfb4a6a41254444af453070c8 Signed-off-by: Paul Lawrence (cherry picked from commit 4cc78c93ada6d0d8744c5b1ae12fddb50ea6a620) --- fs/incfs/format.c | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 8f3a3b3f8f7b..c56e559b6893 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -94,7 +94,6 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) { loff_t file_size = 0; loff_t new_last_byte_offset = 0; - int res = 0; if (!bfc) return -EFAULT; @@ -111,28 +110,18 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) */ file_size = incfs_get_end_offset(bfc->bc_file); new_last_byte_offset = file_size + len - 1; - res = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); - if (res) - return res; - - res = vfs_fsync_range(bfc->bc_file, file_size, file_size + len, 1); - return res; + return vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); } static int write_to_bf(struct backing_file_context *bfc, const void *buf, - size_t count, loff_t pos, bool sync) + size_t count, loff_t pos) { - ssize_t res = 0; + ssize_t res = incfs_kwrite(bfc->bc_file, buf, count, pos); - res = incfs_kwrite(bfc->bc_file, buf, count, pos); if (res < 0) return res; if (res != count) return -EIO; - - if (sync) - return vfs_fsync_range(bfc->bc_file, pos, pos + count, 1); - return 0; } @@ -186,7 +175,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, /* Write the metadata record to the end of the backing file */ record_offset = file_pos; new_md_offset = cpu_to_le64(record_offset); - result = write_to_bf(bfc, record, record_size, file_pos, true); + result = write_to_bf(bfc, record, record_size, file_pos); if (result) return result; @@ -207,7 +196,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, fh_first_md_offset); } result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset), - file_pos, true); + file_pos); if (result) return result; @@ -222,8 +211,7 @@ int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags) return write_to_bf(bfc, &flags, sizeof(flags), offsetof(struct incfs_file_header, - fh_file_header_flags), - false); + fh_file_header_flags)); } /* @@ -292,7 +280,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, file_attr.fa_offset = cpu_to_le64(value_offset); file_attr.fa_crc = cpu_to_le32(crc); - result = write_to_bf(bfc, value.data, value.len, value_offset, true); + result = write_to_bf(bfc, value.data, value.len, value_offset); if (result) return result; @@ -332,7 +320,7 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_sig_size = cpu_to_le32(sig.len); sg.sg_sig_offset = cpu_to_le64(pos); - result = write_to_bf(bfc, sig.data, sig.len, pos, false); + result = write_to_bf(bfc, sig.data, sig.len, pos); if (result) goto err; } @@ -365,10 +353,9 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, /* Write a hash tree metadata record pointing to the hash tree above. */ result = append_md_to_backing_file(bfc, &sg.sg_header); err: - if (result) { + if (result) /* Error, rollback file changes */ truncate_backing_file(bfc, rollback_pos); - } return result; } @@ -402,7 +389,7 @@ int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, if (file_pos != 0) return -EEXIST; - return write_to_bf(bfc, &fh, sizeof(fh), file_pos, true); + return write_to_bf(bfc, &fh, sizeof(fh), file_pos); } /* Write a given data block and update file's blockmap to point it. */ @@ -431,7 +418,7 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, } /* Write the block data at the end of the backing file. */ - result = write_to_bf(bfc, block.data, block.len, data_offset, false); + result = write_to_bf(bfc, block.data, block.len, data_offset); if (result) return result; @@ -441,9 +428,8 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, bm_entry.me_data_size = cpu_to_le16((u16)block.len); bm_entry.me_flags = cpu_to_le16(flags); - result = write_to_bf(bfc, &bm_entry, sizeof(bm_entry), - bm_entry_off, false); - return result; + return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), + bm_entry_off); } int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, @@ -474,7 +460,7 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, return -EINVAL; } - result = write_to_bf(bfc, block.data, block.len, data_offset, false); + result = write_to_bf(bfc, block.data, block.len, data_offset); if (result) return result; @@ -483,8 +469,7 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, bm_entry.me_data_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); bm_entry.me_flags = cpu_to_le16(INCFS_BLOCK_HASH); - return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off, - false); + return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off); } /* Initialize a new image in a given backing file. */ -- GitLab From bb7cd18b3236a4427c4d7c404e013e50df575ddb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 1 Jul 2019 09:58:43 +0900 Subject: [PATCH 0846/1278] UPSTREAM: kheaders: remove meaningless -R option of 'ls' The -R option of 'ls' is supposed to be used for directories. -R, --recursive list subdirectories recursively Since 'find ... -type f' only matches to regular files, we do not expect directories passed to the 'ls' command here. Giving -R is harmless at least, but unneeded. Change-Id: I73588f18e40824ccecc4149fbc467015b5c5e142 Signed-off-by: Masahiro Yamada Reviewed-by: Joel Fernandes (Google) (cherry picked from commit b60b7c2ea9b7f854d457fefd592c77f621a86580) Signed-off-by: Nathan Chancellor --- kernel/gen_kheaders.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 9a34e1d9bd7f..86a666f5cb17 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -33,8 +33,8 @@ arch/$SRCARCH/include/ # Uncomment it for debugging. # if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter; # else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi -# find $src_file_list -type f | xargs ls -lR > /tmp/src-ls-$iter -# find $obj_file_list -type f | xargs ls -lR > /tmp/obj-ls-$iter +# find $src_file_list -type f | xargs ls -l > /tmp/src-ls-$iter +# find $obj_file_list -type f | xargs ls -l > /tmp/obj-ls-$iter # include/generated/compile.h is ignored because it is touched even when none # of the source files changed. This causes pointless regeneration, so let us @@ -46,7 +46,7 @@ src_files_md5="$(find $src_file_list -type f | grep -v "include/config/auto.conf" | grep -v "include/config/auto.conf.cmd" | grep -v "include/config/tristate.conf" | - xargs ls -lR | md5sum | cut -d ' ' -f1)" + xargs ls -l | md5sum | cut -d ' ' -f1)" popd > /dev/null obj_files_md5="$(find $obj_file_list -type f | grep -v "include/generated/compile.h" | @@ -54,7 +54,7 @@ obj_files_md5="$(find $obj_file_list -type f | grep -v "include/config/auto.conf" | grep -v "include/config/auto.conf.cmd" | grep -v "include/config/tristate.conf" | - xargs ls -lR | md5sum | cut -d ' ' -f1)" + xargs ls -l | md5sum | cut -d ' ' -f1)" if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi if [ -f kernel/kheaders.md5 ] && -- GitLab From 4c13d6ace530f9352d85101c190979401508d94f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 1 Jul 2019 09:58:44 +0900 Subject: [PATCH 0847/1278] UPSTREAM: kheaders: include only headers into kheaders_data.tar.xz Currently, kheaders_data.tar.xz contains some build scripts as well as headers. None of them is needed in the header archive. For ARCH=x86, this commit excludes the following from the archive: arch/x86/include/asm/Kbuild arch/x86/include/uapi/asm/Kbuild include/asm-generic/Kbuild include/config/auto.conf include/config/kernel.release include/config/tristate.conf include/uapi/asm-generic/Kbuild include/uapi/Kbuild kernel/gen_kheaders.sh This change is actually motivated for the planned header compile-testing because it will generate more build artifacts, which should not be included in the archive. Change-Id: I688e041842740216cace0373ca9f358bc7704809 Signed-off-by: Masahiro Yamada Reviewed-by: Joel Fernandes (Google) (cherry picked from commit 7199ff7d74003b5aad1e6328bf6128cd8ceea735) Signed-off-by: Nathan Chancellor --- kernel/gen_kheaders.sh | 47 ++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 86a666f5cb17..9ff449888d9c 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -4,24 +4,12 @@ # This script generates an archive consisting of kernel headers # for CONFIG_IKHEADERS. set -e -spath="$(dirname "$(readlink -f "$0")")" -kroot="$spath/.." +sfile="$(readlink -f "$0")" outdir="$(pwd)" tarfile=$1 cpio_dir=$outdir/$tarfile.tmp -# Script filename relative to the kernel source root -# We add it to the archive because it is small and any changes -# to this script will also cause a rebuild of the archive. -sfile="$(realpath --relative-to $kroot "$(readlink -f "$0")")" - -src_file_list=" -include/ -arch/$SRCARCH/include/ -$sfile -" - -obj_file_list=" +dir_list=" include/ arch/$SRCARCH/include/ " @@ -33,33 +21,29 @@ arch/$SRCARCH/include/ # Uncomment it for debugging. # if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter; # else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi -# find $src_file_list -type f | xargs ls -l > /tmp/src-ls-$iter -# find $obj_file_list -type f | xargs ls -l > /tmp/obj-ls-$iter +# find $src_file_list -name "*.h" | xargs ls -l > /tmp/src-ls-$iter +# find $obj_file_list -name "*.h" | xargs ls -l > /tmp/obj-ls-$iter # include/generated/compile.h is ignored because it is touched even when none # of the source files changed. This causes pointless regeneration, so let us # ignore them for md5 calculation. -pushd $kroot > /dev/null -src_files_md5="$(find $src_file_list -type f | +pushd $srctree > /dev/null +src_files_md5="$(find $dir_list -name "*.h" | grep -v "include/generated/compile.h" | grep -v "include/generated/autoconf.h" | - grep -v "include/config/auto.conf" | - grep -v "include/config/auto.conf.cmd" | - grep -v "include/config/tristate.conf" | xargs ls -l | md5sum | cut -d ' ' -f1)" popd > /dev/null -obj_files_md5="$(find $obj_file_list -type f | +obj_files_md5="$(find $dir_list -name "*.h" | grep -v "include/generated/compile.h" | grep -v "include/generated/autoconf.h" | - grep -v "include/config/auto.conf" | - grep -v "include/config/auto.conf.cmd" | - grep -v "include/config/tristate.conf" | xargs ls -l | md5sum | cut -d ' ' -f1)" - +# Any changes to this script will also cause a rebuild of the archive. +this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)" if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi if [ -f kernel/kheaders.md5 ] && [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] && [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] && + [ "$(cat kernel/kheaders.md5|head -3|tail -1)" == "$this_file_md5" ] && [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then exit fi @@ -71,16 +55,16 @@ fi rm -rf $cpio_dir mkdir $cpio_dir -pushd $kroot > /dev/null -for f in $src_file_list; - do find "$f" ! -name "*.cmd" ! -name ".*"; +pushd $srctree > /dev/null +for f in $dir_list; + do find "$f" -name "*.h"; done | cpio --quiet -pd $cpio_dir popd > /dev/null # The second CPIO can complain if files already exist which can # happen with out of tree builds. Just silence CPIO for now. -for f in $obj_file_list; - do find "$f" ! -name "*.cmd" ! -name ".*"; +for f in $dir_list; + do find "$f" -name "*.h"; done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 # Remove comments except SDPX lines @@ -91,6 +75,7 @@ tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null echo "$src_files_md5" > kernel/kheaders.md5 echo "$obj_files_md5" >> kernel/kheaders.md5 +echo "$this_file_md5" >> kernel/kheaders.md5 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 rm -rf $cpio_dir -- GitLab From 72f1c7085ecc7760571243644c4e95454de928e5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Apr 2020 09:32:35 -0700 Subject: [PATCH 0848/1278] f2fs: introduce sysfs/data_io_flag to attach REQ_META/FUA This patch introduces a way to attach REQ_META/FUA explicitly to all the data writes given temperature. -> attach REQ_FUA to Hot Data writes -> attach REQ_FUA to Hot|Warm Data writes -> attach REQ_FUA to Hot|Warm|Cold Data writes -> attach REQ_FUA to Hot|Warm|Cold Data writes as well as REQ_META to Hot Data writes Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 +++++++++ fs/f2fs/data.c | 23 +++++++++++++++++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/sysfs.c | 3 ++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3b4080fa15b8..be008a622713 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -323,3 +323,12 @@ What: /sys/fs/f2fs//mounted_time_sec Date: February 2020 Contact: "Jaegeuk Kim" Description: Show the mounted time in secs of this partition. + +What: /sys/fs/f2fs//data_io_flag +Date: April 2020 +Contact: "Jaegeuk Kim" +Description: Give a way to attach REQ_META|FUA to data writes + given temperature-based bits. Now the bits indicate: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6ffa83937ca8..a894b45cc4f9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -514,6 +514,28 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi, __submit_bio(sbi, bio, type); } +static void __attach_data_io_flag(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; + unsigned int fua_flag = sbi->data_io_flag & temp_mask; + unsigned int meta_flag = (sbi->data_io_flag >> NR_TEMP_TYPE) & + temp_mask; + /* + * data io flag bits per temp: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | + */ + if (fio->type != DATA) + return; + + if ((1 << fio->temp) & meta_flag) + fio->op_flags |= REQ_META; + if ((1 << fio->temp) & fua_flag) + fio->op_flags |= REQ_FUA; +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -521,6 +543,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; + __attach_data_io_flag(fio); bio_set_op_attrs(io->bio, fio->op, fio->op_flags); if (is_read_io(fio->op)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 58b6341d4971..29d7b540ce5e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1504,6 +1504,9 @@ struct f2fs_sb_info { unsigned long long write_iostat[NR_IO_TYPE]; bool iostat_enable; + /* to attach REQ_META|REQ_FUA flags */ + unsigned int data_io_flag; + /* For sysfs suppport */ struct kobject s_kobj; struct completion s_kobj_unregister; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 58a213a878e5..c70dc8450cf7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -372,7 +372,6 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } - if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; if (!sbi->iostat_enable) @@ -543,6 +542,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); @@ -622,6 +622,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(data_io_flag), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), ATTR_LIST(unusable), -- GitLab From c40c0c80d383b98102f413d41ea7882e70d1eb73 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 30 Mar 2020 03:30:59 +0000 Subject: [PATCH 0849/1278] f2fs: add tracepoint for f2fs iostat Added a tracepoint to see iostat of f2fs. Default period of that is 3 second. This tracepoint can be used to be monitoring I/O statistics periodically. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 +++ fs/f2fs/f2fs.h | 16 +++++++- fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 39 +++++++++++++++++++ include/trace/events/f2fs.h | 52 +++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index be008a622713..a67387006a0f 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -332,3 +332,9 @@ Description: Give a way to attach REQ_META|FUA to data writes * REQ_META | REQ_FUA | * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | + +What: /sys/fs/f2fs//iostat_period_ms +Date: April 2020 +Contact: "Daeho Jeong" +Description: Give a way to change iostat_period time. 3secs by default. + The new iostat trace gives stats gap given the period. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 29d7b540ce5e..e2058d912bb9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1502,7 +1502,10 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; unsigned long long write_iostat[NR_IO_TYPE]; + unsigned long long prev_write_iostat[NR_IO_TYPE]; bool iostat_enable; + unsigned long iostat_next_period; + unsigned int iostat_period_ms; /* to attach REQ_META|REQ_FUA flags */ unsigned int data_io_flag; @@ -2997,16 +3000,25 @@ static inline int get_inline_xattr_addrs(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ +#define DEFAULT_IOSTAT_PERIOD_MS 3000 +#define MIN_IOSTAT_PERIOD_MS 100 +/* maximum period of iostat tracing is 1 day */ +#define MAX_IOSTAT_PERIOD_MS 8640000 + static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { int i; spin_lock(&sbi->iostat_lock); - for (i = 0; i < NR_IO_TYPE; i++) + for (i = 0; i < NR_IO_TYPE; i++) { sbi->write_iostat[i] = 0; + sbi->prev_write_iostat[i] = 0; + } spin_unlock(&sbi->iostat_lock); } +extern void f2fs_record_iostat(struct f2fs_sb_info *sbi); + static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, enum iostat_type type, unsigned long long io_bytes) { @@ -3020,6 +3032,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, sbi->write_iostat[APP_WRITE_IO] - sbi->write_iostat[APP_DIRECT_IO]; spin_unlock(&sbi->iostat_lock); + + f2fs_record_iostat(sbi); } #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1f69ae192c0a..d5863b460696 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3452,6 +3452,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* init iostat info */ spin_lock_init(&sbi->iostat_lock); sbi->iostat_enable = false; + sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1: NR_TEMP_TYPE; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c70dc8450cf7..aa01bf3a4b48 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -15,6 +15,7 @@ #include "f2fs.h" #include "segment.h" #include "gc.h" +#include static struct proc_dir_entry *f2fs_proc_root; @@ -379,6 +380,15 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "iostat_period_ms")) { + if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS) + return -EINVAL; + spin_lock(&sbi->iostat_lock); + sbi->iostat_period_ms = (unsigned int)t; + spin_unlock(&sbi->iostat_lock); + return count; + } + *ui = (unsigned int)t; return count; @@ -535,6 +545,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); @@ -615,6 +626,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle_interval), ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), + ATTR_LIST(iostat_period_ms), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), ATTR_LIST(extension_list), @@ -749,6 +761,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, return 0; } +void f2fs_record_iostat(struct f2fs_sb_info *sbi) +{ + unsigned long long iostat_diff[NR_IO_TYPE]; + int i; + + if (time_is_after_jiffies(sbi->iostat_next_period)) + return; + + /* Need double check under the lock */ + spin_lock(&sbi->iostat_lock); + if (time_is_after_jiffies(sbi->iostat_next_period)) { + spin_unlock(&sbi->iostat_lock); + return; + } + sbi->iostat_next_period = jiffies + + msecs_to_jiffies(sbi->iostat_period_ms); + + for (i = 0; i < NR_IO_TYPE; i++) { + iostat_diff[i] = sbi->write_iostat[i] - + sbi->prev_write_iostat[i]; + sbi->prev_write_iostat[i] = sbi->write_iostat[i]; + } + spin_unlock(&sbi->iostat_lock); + + trace_f2fs_iostat(sbi, iostat_diff); +} + static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index a4253b003122..e6c524f6a136 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1822,6 +1822,58 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, TP_ARGS(inode, cluster_idx, compressed_size, ret) ); +TRACE_EVENT(f2fs_iostat, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), + + TP_ARGS(sbi, iostat), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, app_dio) + __field(unsigned long long, app_bio) + __field(unsigned long long, app_wio) + __field(unsigned long long, app_mio) + __field(unsigned long long, fs_dio) + __field(unsigned long long, fs_nio) + __field(unsigned long long, fs_mio) + __field(unsigned long long, fs_gc_dio) + __field(unsigned long long, fs_gc_nio) + __field(unsigned long long, fs_cp_dio) + __field(unsigned long long, fs_cp_nio) + __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, fs_discard) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->app_dio = iostat[APP_DIRECT_IO]; + __entry->app_bio = iostat[APP_BUFFERED_IO]; + __entry->app_wio = iostat[APP_WRITE_IO]; + __entry->app_mio = iostat[APP_MAPPED_IO]; + __entry->fs_dio = iostat[FS_DATA_IO]; + __entry->fs_nio = iostat[FS_NODE_IO]; + __entry->fs_mio = iostat[FS_META_IO]; + __entry->fs_gc_dio = iostat[FS_GC_DATA_IO]; + __entry->fs_gc_nio = iostat[FS_GC_NODE_IO]; + __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; + __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; + __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->fs_discard = iostat[FS_DISCARD]; + ), + + TP_printk("dev = (%d,%d), " + "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " + "gc [data=%llu, node=%llu], " + "cp [data=%llu, node=%llu, meta=%llu]", + show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, + __entry->app_bio, __entry->app_mio, __entry->fs_dio, + __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, + __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, + __entry->fs_cp_nio, __entry->fs_cp_mio) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- GitLab From 71a73add4eafd103defcd900e64274e13c9faee7 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 15 Apr 2020 14:37:53 +0530 Subject: [PATCH 0850/1278] f2fs: fix long latency due to discard during umount F2FS already has a default timeout of 5 secs for discards that can be issued during umount, but it can take more than the 5 sec timeout if the underlying UFS device queue is already full and there are no more available free tags to be used. Fix this by submitting a small batch of discard requests so that it won't cause the device queue to be full at any time and thus doesn't incur its wait time in the umount context. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2158f27fc701..27193d31c311 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1101,7 +1101,6 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = 1; @@ -1463,6 +1462,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, return issued; } +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy); static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) @@ -1471,12 +1472,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, issued = 0; + int i, issued; bool io_interrupted = false; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); +retry: + issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) @@ -1523,6 +1526,11 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, break; } + if (dpolicy->type == DPOLICY_UMOUNT && issued) { + __wait_all_discard_cmd(sbi, dpolicy); + goto retry; + } + if (!issued && io_interrupted) issued = -1; -- GitLab From 3fc66540eef4dadd4a21cd2689d0797acc33ab05 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 15 Apr 2020 09:35:54 +0530 Subject: [PATCH 0851/1278] f2fs: report the discard cmd errors properly In case a discard_cmd is split into several bios, the dc->error must not be overwritten once an error is reported by a bio. Also, move it under dc->lock. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 27193d31c311..1bfbc70e0d80 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1029,9 +1029,9 @@ static void f2fs_submit_discard_endio(struct bio *bio) struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; unsigned long flags; - dc->error = blk_status_to_errno(bio->bi_status); - spin_lock_irqsave(&dc->lock, flags); + if (!dc->error) + dc->error = blk_status_to_errno(bio->bi_status); dc->bio_ref--; if (!dc->bio_ref && dc->state == D_SUBMIT) { dc->state = D_DONE; -- GitLab From bca5bba8e1b0e85320d88541125a197e9fdb9b4d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 10 Apr 2020 18:07:20 +0800 Subject: [PATCH 0852/1278] f2fs: fix to handle error path of f2fs_ra_meta_pages() In f2fs_ra_meta_pages(), if f2fs_submit_page_bio() failed, we need to unlock page, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e5669185a8bd..67a8624f9e01 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -220,6 +220,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .is_por = (type == META_POR), }; struct blk_plug plug; + int err; if (unlikely(type == META_POR)) fio.op_flags &= ~REQ_META; @@ -263,8 +264,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - f2fs_submit_page_bio(&fio); - f2fs_put_page(page, 0); + err = f2fs_submit_page_bio(&fio); + f2fs_put_page(page, err ? 1 : 0); } out: blk_finish_plug(&plug); -- GitLab From fd6563ca1a35c3687aa8c1c911ce9b9dccc1816e Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Thu, 16 Apr 2020 11:47:41 +0530 Subject: [PATCH 0853/1278] f2fs: Fix the accounting of dcc->undiscard_blks When a discard_cmd needs to be split due to dpolicy->max_requests, then for the remaining length it will be either merged into another cmd or a new discard_cmd will be created. In this case, there is double accounting of dcc->undiscard_blks for the remaining len, due to which it shows incorrect value in stats. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1bfbc70e0d80..021a1bb9eb43 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1214,8 +1214,10 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, len = total_len; } - if (!err && len) + if (!err && len) { + dcc->undiscard_blks -= len; __update_discard_tree_range(sbi, bdev, lstart, start, len); + } return err; } -- GitLab From 56f2766a86835acc3ccfa7c1f05a8b1f5f52aa34 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 16 Apr 2020 18:16:56 +0800 Subject: [PATCH 0854/1278] f2fs: support read iostat Adds to support accounting read IOs from userspace/kernel. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 ++++ fs/f2fs/data.c | 6 +++++ fs/f2fs/f2fs.h | 37 +++++++++++++++++++------- fs/f2fs/file.c | 12 ++++++++- fs/f2fs/gc.c | 6 +++++ fs/f2fs/node.c | 8 +++++- fs/f2fs/sysfs.c | 52 +++++++++++++++++++++++++------------ include/trace/events/f2fs.h | 23 ++++++++++++++-- 8 files changed, 118 insertions(+), 31 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 67a8624f9e01..3b7d5d23f3ee 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,6 +86,8 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, return ERR_PTR(err); } + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -266,6 +268,9 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, fio.page = page; err = f2fs_submit_page_bio(&fio); f2fs_put_page(page, err ? 1 : 0); + + if (!err) + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); } out: blk_finish_plug(&plug); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a894b45cc4f9..e3781f87b3fa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1034,6 +1034,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); __submit_bio(sbi, bio, DATA); return 0; } @@ -2039,6 +2040,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = block_nr; goto out; @@ -2174,6 +2176,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto submit_and_realloc; inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } @@ -3527,6 +3530,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } + } else { + if (err > 0) + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err); } out: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e2058d912bb9..2e4a097096bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1085,8 +1085,9 @@ enum cp_reason_type { }; enum iostat_type { - APP_DIRECT_IO, /* app direct IOs */ - APP_BUFFERED_IO, /* app buffered IOs */ + /* WRITE IO */ + APP_DIRECT_IO, /* app direct write IOs */ + APP_BUFFERED_IO, /* app buffered write IOs */ APP_WRITE_IO, /* app write IOs */ APP_MAPPED_IO, /* app mapped IOs */ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ @@ -1097,6 +1098,17 @@ enum iostat_type { FS_CP_DATA_IO, /* data IOs from checkpoint */ FS_CP_NODE_IO, /* node IOs from checkpoint */ FS_CP_META_IO, /* meta IOs from checkpoint */ + + /* READ IO */ + APP_DIRECT_READ_IO, /* app direct read IOs */ + APP_BUFFERED_READ_IO, /* app buffered read IOs */ + APP_READ_IO, /* app read IOs */ + APP_MAPPED_READ_IO, /* app mapped read IOs */ + FS_DATA_READ_IO, /* data read IOs */ + FS_NODE_READ_IO, /* node read IOs */ + FS_META_READ_IO, /* meta read IOs */ + + /* other */ FS_DISCARD, /* discard */ NR_IO_TYPE, }; @@ -1501,8 +1513,8 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; - unsigned long long write_iostat[NR_IO_TYPE]; - unsigned long long prev_write_iostat[NR_IO_TYPE]; + unsigned long long rw_iostat[NR_IO_TYPE]; + unsigned long long prev_rw_iostat[NR_IO_TYPE]; bool iostat_enable; unsigned long iostat_next_period; unsigned int iostat_period_ms; @@ -3011,8 +3023,8 @@ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) spin_lock(&sbi->iostat_lock); for (i = 0; i < NR_IO_TYPE; i++) { - sbi->write_iostat[i] = 0; - sbi->prev_write_iostat[i] = 0; + sbi->rw_iostat[i] = 0; + sbi->prev_rw_iostat[i] = 0; } spin_unlock(&sbi->iostat_lock); } @@ -3025,12 +3037,17 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, if (!sbi->iostat_enable) return; spin_lock(&sbi->iostat_lock); - sbi->write_iostat[type] += io_bytes; + sbi->rw_iostat[type] += io_bytes; if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->write_iostat[APP_BUFFERED_IO] = - sbi->write_iostat[APP_WRITE_IO] - - sbi->write_iostat[APP_DIRECT_IO]; + sbi->rw_iostat[APP_BUFFERED_IO] = + sbi->rw_iostat[APP_WRITE_IO] - + sbi->rw_iostat[APP_DIRECT_IO]; + + if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_BUFFERED_READ_IO] = + sbi->rw_iostat[APP_READ_IO] - + sbi->rw_iostat[APP_DIRECT_READ_IO]; spin_unlock(&sbi->iostat_lock); f2fs_record_iostat(sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 509ef157105c..b32bbb9a524b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -39,6 +39,10 @@ static int f2fs_filemap_fault(struct vm_fault *vmf) err = filemap_fault(vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + if (!err) + f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, + F2FS_BLKSIZE); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)err); return err; @@ -3446,11 +3450,17 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + int ret; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - return generic_file_read_iter(iocb, iter); + ret = generic_file_read_iter(iocb, iter); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret); + + return ret; } static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a74a5c53e4fc..bc36cd04daef 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -737,6 +737,9 @@ static int ra_data_block(struct inode *inode, pgoff_t index) goto put_encrypted_page; f2fs_put_page(fio.encrypted_page, 0); f2fs_put_page(page, 1); + + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, 1); @@ -840,6 +843,9 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_put_page(mpage, 1); goto up_out; } + + f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || !PageUptodate(mpage))) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7ef52f5ed3e1..1d9131a84ea8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1298,7 +1298,13 @@ static int read_node_page(struct page *page, int op_flags) } fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; - return f2fs_submit_page_bio(&fio); + + err = f2fs_submit_page_bio(&fio); + + if (!err) + f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE); + + return err; } /* diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index aa01bf3a4b48..a54d6daede28 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -779,9 +779,9 @@ void f2fs_record_iostat(struct f2fs_sb_info *sbi) msecs_to_jiffies(sbi->iostat_period_ms); for (i = 0; i < NR_IO_TYPE; i++) { - iostat_diff[i] = sbi->write_iostat[i] - - sbi->prev_write_iostat[i]; - sbi->prev_write_iostat[i] = sbi->write_iostat[i]; + iostat_diff[i] = sbi->rw_iostat[i] - + sbi->prev_rw_iostat[i]; + sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; } spin_unlock(&sbi->iostat_lock); @@ -800,33 +800,51 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); - /* print app IOs */ + /* print app write IOs */ seq_printf(seq, "app buffered: %-16llu\n", - sbi->write_iostat[APP_BUFFERED_IO]); + sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", - sbi->write_iostat[APP_DIRECT_IO]); + sbi->rw_iostat[APP_DIRECT_IO]); seq_printf(seq, "app mapped: %-16llu\n", - sbi->write_iostat[APP_MAPPED_IO]); + sbi->rw_iostat[APP_MAPPED_IO]); - /* print fs IOs */ + /* print fs write IOs */ seq_printf(seq, "fs data: %-16llu\n", - sbi->write_iostat[FS_DATA_IO]); + sbi->rw_iostat[FS_DATA_IO]); seq_printf(seq, "fs node: %-16llu\n", - sbi->write_iostat[FS_NODE_IO]); + sbi->rw_iostat[FS_NODE_IO]); seq_printf(seq, "fs meta: %-16llu\n", - sbi->write_iostat[FS_META_IO]); + sbi->rw_iostat[FS_META_IO]); seq_printf(seq, "fs gc data: %-16llu\n", - sbi->write_iostat[FS_GC_DATA_IO]); + sbi->rw_iostat[FS_GC_DATA_IO]); seq_printf(seq, "fs gc node: %-16llu\n", - sbi->write_iostat[FS_GC_NODE_IO]); + sbi->rw_iostat[FS_GC_NODE_IO]); seq_printf(seq, "fs cp data: %-16llu\n", - sbi->write_iostat[FS_CP_DATA_IO]); + sbi->rw_iostat[FS_CP_DATA_IO]); seq_printf(seq, "fs cp node: %-16llu\n", - sbi->write_iostat[FS_CP_NODE_IO]); + sbi->rw_iostat[FS_CP_NODE_IO]); seq_printf(seq, "fs cp meta: %-16llu\n", - sbi->write_iostat[FS_CP_META_IO]); + sbi->rw_iostat[FS_CP_META_IO]); + + /* print app read IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_READ_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_READ_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_READ_IO]); + + /* print fs read IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_READ_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_READ_IO]); + + /* print other IOs */ seq_printf(seq, "fs discard: %-16llu\n", - sbi->write_iostat[FS_DISCARD]); + sbi->rw_iostat[FS_DISCARD]); return 0; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e6c524f6a136..5b95f24125ed 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1842,6 +1842,13 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_cp_dio) __field(unsigned long long, fs_cp_nio) __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, app_drio) + __field(unsigned long long, app_brio) + __field(unsigned long long, app_rio) + __field(unsigned long long, app_mrio) + __field(unsigned long long, fs_drio) + __field(unsigned long long, fs_nrio) + __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) ), @@ -1859,6 +1866,13 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->app_drio = iostat[APP_DIRECT_READ_IO]; + __entry->app_brio = iostat[APP_BUFFERED_READ_IO]; + __entry->app_rio = iostat[APP_READ_IO]; + __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; + __entry->fs_drio = iostat[FS_DATA_READ_IO]; + __entry->fs_nrio = iostat[FS_NODE_READ_IO]; + __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD]; ), @@ -1866,12 +1880,17 @@ TRACE_EVENT(f2fs_iostat, "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " "gc [data=%llu, node=%llu], " - "cp [data=%llu, node=%llu, meta=%llu]", + "cp [data=%llu, node=%llu, meta=%llu], " + "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->fs_dio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, - __entry->fs_cp_nio, __entry->fs_cp_mio) + __entry->fs_cp_nio, __entry->fs_cp_mio, + __entry->app_rio, __entry->app_drio, __entry->app_brio, + __entry->app_mrio, __entry->fs_drio, __entry->fs_nrio, + __entry->fs_mrio) ); #endif /* _TRACE_F2FS_H */ -- GitLab From 7881a723155357dd5ef5f493e2d3260cba048bb3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Apr 2020 10:25:21 -0700 Subject: [PATCH 0855/1278] f2fs: fix quota_sync failure due to f2fs_lock_op f2fs_quota_sync() uses f2fs_lock_op() before flushing dirty pages, but f2fs_write_data_page() returns EAGAIN. Likewise dentry blocks, we can just bypass getting the lock, since quota blocks are also maintained by checkpoint. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1bf22a430640..50dcbf76f7d9 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -990,7 +990,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - if (!f2fs_trylock_op(sbi)) + if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi)) return -EAGAIN; set_new_dnode(&dn, cc->inode, NULL, NULL, 0); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e3781f87b3fa..7efe5fd5dcb8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2660,8 +2660,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, f2fs_available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* Dentry blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode)) { + /* Dentry/quota blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); goto done; -- GitLab From d8cd62b59af5b71b9a1972a861acce2dfce0af6c Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 8 Jan 2019 11:59:30 -0300 Subject: [PATCH 0856/1278] BACKPORT: drm/virtio: Drop deprecated load/unload initialization Move the code around so the driver is probed the bus .probe and removed from the bus .remove callbacks. This commit is just a cleanup and shouldn't affect functionality. Signed-off-by: Ezequiel Garcia Link: http://patchwork.freedesktop.org/patch/msgid/20190108145930.15080-1-ezequiel@collabora.com Signed-off-by: Gerd Hoffmann (cherry picked from commit d516e75c71c9853ef70a9c476d11a97b69380147) Bug: 153580313 Signed-off-by: Alistair Delva Change-Id: I3ad8b26ab1a9c7dc6e0a5686845ccb9dc37c49da --- drivers/gpu/drm/virtio/Makefile | 2 +- drivers/gpu/drm/virtio/virtgpu_display.c | 3 +- drivers/gpu/drm/virtio/virtgpu_drm_bus.c | 121 ----------------------- drivers/gpu/drm/virtio/virtgpu_drv.c | 101 ++++++++++++++++++- drivers/gpu/drm/virtio/virtgpu_drv.h | 9 +- drivers/gpu/drm/virtio/virtgpu_kms.c | 9 +- 6 files changed, 106 insertions(+), 139 deletions(-) delete mode 100644 drivers/gpu/drm/virtio/virtgpu_drm_bus.c diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile index 11e25e9a4c45..42949a17ff70 100644 --- a/drivers/gpu/drm/virtio/Makefile +++ b/drivers/gpu/drm/virtio/Makefile @@ -3,7 +3,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_drm_bus.o virtgpu_gem.o \ +virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_gem.o \ virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \ virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o \ virtgpu_ioctl.o virtgpu_prime.o virtgpu_trace_points.o diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 443cf4c93074..3922de4a4f71 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -356,7 +356,7 @@ static const struct drm_mode_config_funcs virtio_gpu_mode_funcs = { .atomic_commit = drm_atomic_helper_commit, }; -int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) +void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) { int i; @@ -374,7 +374,6 @@ int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) vgdev_output_init(vgdev, i); drm_mode_config_reset(vgdev->ddev); - return 0; } void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev) diff --git a/drivers/gpu/drm/virtio/virtgpu_drm_bus.c b/drivers/gpu/drm/virtio/virtgpu_drm_bus.c deleted file mode 100644 index fe3b752cf6cd..000000000000 --- a/drivers/gpu/drm/virtio/virtgpu_drm_bus.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2015 Red Hat, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include - -#include "virtgpu_drv.h" - -static void virtio_pci_kick_out_firmware_fb(struct pci_dev *pci_dev) -{ - struct apertures_struct *ap; - bool primary; - - ap = alloc_apertures(1); - if (!ap) - return; - - ap->ranges[0].base = pci_resource_start(pci_dev, 0); - ap->ranges[0].size = pci_resource_len(pci_dev, 0); - - primary = pci_dev->resource[PCI_ROM_RESOURCE].flags - & IORESOURCE_ROM_SHADOW; - - drm_fb_helper_remove_conflicting_framebuffers(ap, "virtiodrmfb", primary); - - kfree(ap); -} - -int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev) -{ - struct drm_device *dev; - int ret; - - dev = drm_dev_alloc(driver, &vdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - vdev->priv = dev; - - if (strcmp(vdev->dev.parent->bus->name, "pci") == 0) { - struct pci_dev *pdev = to_pci_dev(vdev->dev.parent); - const char *pname = dev_name(&pdev->dev); - bool vga = (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; - char unique[20]; - - DRM_INFO("pci: %s detected at %s\n", - vga ? "virtio-vga" : "virtio-gpu-pci", - pname); - dev->pdev = pdev; - if (vga) - virtio_pci_kick_out_firmware_fb(pdev); - - /* - * Normally the drm_dev_set_unique() call is done by core DRM. - * The following comment covers, why virtio cannot rely on it. - * - * Unlike the other virtual GPU drivers, virtio abstracts the - * underlying bus type by using struct virtio_device. - * - * Hence the dev_is_pci() check, used in core DRM, will fail - * and the unique returned will be the virtio_device "virtio0", - * while a "pci:..." one is required. - * - * A few other ideas were considered: - * - Extend the dev_is_pci() check [in drm_set_busid] to - * consider virtio. - * Seems like a bigger hack than what we have already. - * - * - Point drm_device::dev to the parent of the virtio_device - * Semantic changes: - * * Using the wrong device for i2c, framebuffer_alloc and - * prime import. - * Visual changes: - * * Helpers such as DRM_DEV_ERROR, dev_info, drm_printer, - * will print the wrong information. - * - * We could address the latter issues, by introducing - * drm_device::bus_dev, ... which would be used solely for this. - * - * So for the moment keep things as-is, with a bulky comment - * for the next person who feels like removing this - * drm_dev_set_unique() quirk. - */ - snprintf(unique, sizeof(unique), "pci:%s", pname); - ret = drm_dev_set_unique(dev, unique); - if (ret) - goto err_free; - - } - - ret = drm_dev_register(dev, 0); - if (ret) - goto err_free; - - return 0; - -err_free: - drm_dev_unref(dev); - return ret; -} diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 9f592f9353a6..e2c26a2914c5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -40,21 +40,118 @@ static int virtio_gpu_modeset = -1; MODULE_PARM_DESC(modeset, "Disable/Enable modesetting"); module_param_named(modeset, virtio_gpu_modeset, int, 0400); +static void virtio_pci_kick_out_firmware_fb(struct pci_dev *pci_dev) +{ + struct apertures_struct *ap; + bool primary; + + ap = alloc_apertures(1); + if (!ap) + return; + + ap->ranges[0].base = pci_resource_start(pci_dev, 0); + ap->ranges[0].size = pci_resource_len(pci_dev, 0); + + primary = pci_dev->resource[PCI_ROM_RESOURCE].flags + & IORESOURCE_ROM_SHADOW; + + drm_fb_helper_remove_conflicting_framebuffers(ap, "virtiodrmfb", primary); + + kfree(ap); +} + +static int virtio_gpu_pci_quirk(struct drm_device *dev, struct virtio_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->dev.parent); + const char *pname = dev_name(&pdev->dev); + bool vga = (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; + char unique[20]; + + DRM_INFO("pci: %s detected at %s\n", + vga ? "virtio-vga" : "virtio-gpu-pci", + pname); + dev->pdev = pdev; + if (vga) + virtio_pci_kick_out_firmware_fb(pdev); + + /* + * Normally the drm_dev_set_unique() call is done by core DRM. + * The following comment covers, why virtio cannot rely on it. + * + * Unlike the other virtual GPU drivers, virtio abstracts the + * underlying bus type by using struct virtio_device. + * + * Hence the dev_is_pci() check, used in core DRM, will fail + * and the unique returned will be the virtio_device "virtio0", + * while a "pci:..." one is required. + * + * A few other ideas were considered: + * - Extend the dev_is_pci() check [in drm_set_busid] to + * consider virtio. + * Seems like a bigger hack than what we have already. + * + * - Point drm_device::dev to the parent of the virtio_device + * Semantic changes: + * * Using the wrong device for i2c, framebuffer_alloc and + * prime import. + * Visual changes: + * * Helpers such as DRM_DEV_ERROR, dev_info, drm_printer, + * will print the wrong information. + * + * We could address the latter issues, by introducing + * drm_device::bus_dev, ... which would be used solely for this. + * + * So for the moment keep things as-is, with a bulky comment + * for the next person who feels like removing this + * drm_dev_set_unique() quirk. + */ + snprintf(unique, sizeof(unique), "pci:%s", pname); + return drm_dev_set_unique(dev, unique); +} + static int virtio_gpu_probe(struct virtio_device *vdev) { + struct drm_device *dev; + int ret; + if (vgacon_text_force() && virtio_gpu_modeset == -1) return -EINVAL; if (virtio_gpu_modeset == 0) return -EINVAL; - return drm_virtio_init(&driver, vdev); + dev = drm_dev_alloc(&driver, &vdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + vdev->priv = dev; + + if (!strcmp(vdev->dev.parent->bus->name, "pci")) { + ret = virtio_gpu_pci_quirk(dev, vdev); + if (ret) + goto err_free; + } + + ret = virtio_gpu_init(dev); + if (ret) + goto err_free; + + ret = drm_dev_register(dev, 0); + if (ret) + goto err_free; + + return 0; + +err_free: + drm_dev_unref(dev); + return ret; } static void virtio_gpu_remove(struct virtio_device *vdev) { struct drm_device *dev = vdev->priv; + drm_dev_unregister(dev); + virtio_gpu_deinit(dev); drm_put_dev(dev); } @@ -116,8 +213,6 @@ static const struct file_operations virtio_gpu_driver_fops = { static struct drm_driver driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER | DRIVER_ATOMIC, - .load = virtio_gpu_driver_load, - .unload = virtio_gpu_driver_unload, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 05936432632b..db60aabf9176 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -50,9 +50,6 @@ #define DRIVER_MINOR 1 #define DRIVER_PATCHLEVEL 0 -/* virtgpu_drm_bus.c */ -int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev); - struct virtio_gpu_object_params { uint32_t format; uint32_t width; @@ -244,8 +241,8 @@ int virtio_gpu_object_list_validate(struct ww_acquire_ctx *ticket, void virtio_gpu_unref_list(struct list_head *head); /* virtio_kms.c */ -int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags); -void virtio_gpu_driver_unload(struct drm_device *dev); +int virtio_gpu_init(struct drm_device *dev); +void virtio_gpu_deinit(struct drm_device *dev); int virtio_gpu_driver_open(struct drm_device *dev, struct drm_file *file); void virtio_gpu_driver_postclose(struct drm_device *dev, struct drm_file *file); @@ -359,7 +356,7 @@ int virtio_gpu_framebuffer_init(struct drm_device *dev, struct virtio_gpu_framebuffer *vgfb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); -int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); +void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); /* virtio_gpu_plane.c */ diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index d0f54b81eaba..1c721b39f2ba 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -127,7 +127,7 @@ static void virtio_gpu_get_capsets(struct virtio_gpu_device *vgdev, vgdev->num_capsets = num_capsets; } -int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) +int virtio_gpu_init(struct drm_device *dev) { static vq_callback_t *callbacks[] = { virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack @@ -216,9 +216,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) num_capsets, &num_capsets); DRM_INFO("number of cap sets: %d\n", num_capsets); - ret = virtio_gpu_modeset_init(vgdev); - if (ret) - goto err_modeset; + virtio_gpu_modeset_init(vgdev); virtio_device_ready(vgdev->vdev); vgdev->vqs_ready = true; @@ -235,7 +233,6 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) return 0; -err_modeset: err_scanouts: virtio_gpu_ttm_fini(vgdev); err_ttm: @@ -257,7 +254,7 @@ static void virtio_gpu_cleanup_cap_cache(struct virtio_gpu_device *vgdev) } } -void virtio_gpu_driver_unload(struct drm_device *dev) +void virtio_gpu_deinit(struct drm_device *dev) { struct virtio_gpu_device *vgdev = dev->dev_private; -- GitLab From 173d02a053d24bc12f2c0643c70466e784ecf37d Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 8 Feb 2019 15:04:09 +0100 Subject: [PATCH 0857/1278] BACKPORT: drm/virtio: do NOT reuse resource ids Bisected guest kernel changes crashing qemu. Landed at "6c1cd97bda drm/virtio: fix resource id handling". Looked again, and noticed we where not only leaking *some* ids, but *all* ids. The old code never ever called virtio_gpu_resource_id_put(). So, commit 6c1cd97bda effectively makes the linux kernel starting re-using IDs after releasing them, and apparently virglrenderer can't deal with that. Oops. This patch puts a temporary stopgap into place for the 5.0 release. Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190208140409.15280-1-kraxel@redhat.com (cherry picked from commit 67456f423317efe58fc5766a160fa066bf210db7) Bug: 153580313 Signed-off-by: Alistair Delva Change-Id: I85d16357377a3298a537f01e455f551287e86d8a --- drivers/gpu/drm/virtio/virtgpu_object.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index e88c00367782..cce7c99a3ae3 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -30,6 +30,7 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, uint32_t *resid) { +#if 0 int handle; idr_preload(GFP_KERNEL); @@ -40,6 +41,16 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, if (handle < 0) return handle; +#else + static int handle; + + /* + * FIXME: dirty hack to avoid re-using IDs, virglrenderer + * can't deal with that. Needs fixing in virglrenderer, also + * should figure a better way to handle that in the guest. + */ + handle++; +#endif *resid = handle; return 0; @@ -47,9 +58,11 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, static void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id) { +#if 0 spin_lock(&vgdev->resource_idr_lock); idr_remove(&vgdev->resource_idr, id); spin_unlock(&vgdev->resource_idr_lock); +#endif } static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) -- GitLab From 87d27faccddaac3c1d50b74a6fb4eb552a07222e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 22 Aug 2019 12:26:14 +0200 Subject: [PATCH 0858/1278] BACKPORT: drm/virtio: make resource id workaround runtime switchable. commit 3e93bc2a58aa241081e043ef9e6e86c42808499a upstream. Also update the comment with a reference to the virglrenderer fix. Signed-off-by: Gerd Hoffmann Reviewed-by: Chia-I Wu Link: http://patchwork.freedesktop.org/patch/msgid/20190822102614.18164-1-kraxel@redhat.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d41f6cfd9aa7c3f01b44fc1ee15bac5589fd1b60) Bug: 153580313 Signed-off-by: Alistair Delva Change-Id: If17d7c7303b9c4e9e03566a20386d46c3bf188b1 --- drivers/gpu/drm/virtio/virtgpu_object.c | 63 ++++++++++++++----------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index cce7c99a3ae3..60203c6ab57a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -27,42 +27,49 @@ #include "virtgpu_drv.h" +static int virtio_gpu_virglrenderer_workaround = 1; +module_param_named(virglhack, virtio_gpu_virglrenderer_workaround, int, 0400); + static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, uint32_t *resid) { -#if 0 - int handle; - - idr_preload(GFP_KERNEL); - spin_lock(&vgdev->resource_idr_lock); - handle = idr_alloc(&vgdev->resource_idr, NULL, 1, 0, GFP_NOWAIT); - spin_unlock(&vgdev->resource_idr_lock); - idr_preload_end(); - - if (handle < 0) - return handle; -#else - static int handle; - - /* - * FIXME: dirty hack to avoid re-using IDs, virglrenderer - * can't deal with that. Needs fixing in virglrenderer, also - * should figure a better way to handle that in the guest. - */ - handle++; -#endif - - *resid = handle; + if (virtio_gpu_virglrenderer_workaround) { + /* + * Hack to avoid re-using resource IDs. + * + * virglrenderer versions up to (and including) 0.7.0 + * can't deal with that. virglrenderer commit + * "f91a9dd35715 Fix unlinking resources from hash + * table." (Feb 2019) fixes the bug. + */ + static int handle; + handle++; + *resid = handle; + } else { + int handle; + + idr_preload(GFP_KERNEL); + spin_lock(&vgdev->resource_idr_lock); + handle = idr_alloc(&vgdev->resource_idr, NULL, 1, 0, + GFP_NOWAIT); + spin_unlock(&vgdev->resource_idr_lock); + idr_preload_end(); + + if (handle < 0) + return handle; + + *resid = handle; + } return 0; } static void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id) { -#if 0 - spin_lock(&vgdev->resource_idr_lock); - idr_remove(&vgdev->resource_idr, id); - spin_unlock(&vgdev->resource_idr_lock); -#endif + if (!virtio_gpu_virglrenderer_workaround) { + spin_lock(&vgdev->resource_idr_lock); + idr_remove(&vgdev->resource_idr, id); + spin_unlock(&vgdev->resource_idr_lock); + } } static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) -- GitLab From f74faa4836c8d8e9b9528d7ef86cd000c17aca29 Mon Sep 17 00:00:00 2001 From: John Bates Date: Thu, 20 Feb 2020 14:53:19 -0800 Subject: [PATCH 0859/1278] BACKPORT: drm/virtio: fix resource id creation race commit fbb30168c7395b9cfeb9e6f7b0c0bca854a6552d upstream. The previous code was not thread safe and caused undefined behavior from spurious duplicate resource IDs. In this patch, an atomic_t is used instead. We no longer see any duplicate IDs in tests with this change. Fixes: 16065fcdd19d ("drm/virtio: do NOT reuse resource ids") Signed-off-by: John Bates Reviewed-by: Chia-I Wu Link: http://patchwork.freedesktop.org/patch/msgid/20200220225319.45621-1-jbates@chromium.org Signed-off-by: Gerd Hoffmann Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9998ebb5e73c72ba11a025434be9c2ff1c33b1f0) Bug: 153580313 Signed-off-by: Alistair Delva Change-Id: Iad0ca447e5be8b8098962b50e8d38356d1024126 --- drivers/gpu/drm/virtio/virtgpu_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 60203c6ab57a..9d43989d8449 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -42,8 +42,8 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, * "f91a9dd35715 Fix unlinking resources from hash * table." (Feb 2019) fixes the bug. */ - static int handle; - handle++; + static atomic_t seqno = ATOMIC_INIT(0); + int handle = atomic_inc_return(&seqno); *resid = handle; } else { int handle; -- GitLab From 7c3250e0dfdb8878ae2bf5e9f8fe79e7fa925ea8 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 28 Aug 2019 18:55:16 +1000 Subject: [PATCH 0860/1278] UPSTREAM: drm/virtio: module_param_named() requires linux/moduleparam.h commit b0138364da17617db052c4a738b58bf45e42f500 upstream. Fixes: 3e93bc2a58aa ("drm/virtio: make resource id workaround runtime switchable.") Signed-off-by: Stephen Rothwell Link: http://patchwork.freedesktop.org/patch/msgid/20190828185516.22b03da8@canb.auug.org.au Signed-off-by: Gerd Hoffmann Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 564f7c2b5691740b881ff1cb457386de218c7181) Bug: 153580313 Signed-off-by: Alistair Delva Change-Id: If763753355efee31efa6f434b4e5f83a2bceee94 --- drivers/gpu/drm/virtio/virtgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 9d43989d8449..2092993e37d3 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -23,6 +23,8 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include "virtgpu_drv.h" -- GitLab From ffd01edc77002e702b13114632bc0c69132bf075 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 10 Apr 2019 13:42:25 +0200 Subject: [PATCH 0861/1278] UPSTREAM: virtio-gpu api: comment feature flags Add comments to the existing feature flags, documenting which commands belong to them. Signed-off-by: Gerd Hoffmann Reviewed-by: Gurchetan Singh Link: http://patchwork.freedesktop.org/patch/msgid/20190410114227.25846-2-kraxel@redhat.com (cherry picked from commit beb941b970fb129dae206050593d3e768859b146) Bug: 153580313 Signed-off-by: Alistair Delva Change-Id: I8130b6a04aea46f8f6ea8f4ab14acccf0c1932eb --- include/uapi/linux/virtio_gpu.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 3d6d63f9237c..01d4681e796d 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -40,8 +40,16 @@ #include -#define VIRTIO_GPU_F_VIRGL 0 -#define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_CTX_* + * VIRTIO_GPU_CMD_*_3D + */ +#define VIRTIO_GPU_F_VIRGL 0 + +/* + * VIRTIO_GPU_CMD_GET_EDID + */ +#define VIRTIO_GPU_F_EDID 1 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, -- GitLab From 21590d3fb39058165a39ad95779370e2714bfd85 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Fri, 17 Apr 2020 14:05:42 -0700 Subject: [PATCH 0862/1278] ANDROID: drm: ttm: Add ttm_tt_create2 driver hook Implements the equivalent of dde5da2379319c08ceb2295467df6e60a3cf5da1 without requiring all drivers to be updated. Bug: 152417756 Signed-off-by: Alistair Delva Change-Id: Icb33fb848d6056736e2dcad45146ec7b539f8a76 --- drivers/gpu/drm/ttm/ttm_bo.c | 18 +++++++++++++----- include/drm/ttm/ttm_bo_driver.h | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 68eed684dff5..1669af08fafa 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -245,15 +245,23 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) if (zero_alloc) page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; case ttm_bo_type_kernel: - bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, - page_flags, glob->dummy_read_page); + if (bdev->driver->ttm_tt_create2) + bo->ttm = bdev->driver->ttm_tt_create2(bo, page_flags, + glob->dummy_read_page); + else + bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, + page_flags, glob->dummy_read_page); if (unlikely(bo->ttm == NULL)) ret = -ENOMEM; break; case ttm_bo_type_sg: - bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, - page_flags | TTM_PAGE_FLAG_SG, - glob->dummy_read_page); + if (bdev->driver->ttm_tt_create2) + bo->ttm = bdev->driver->ttm_tt_create2(bo, page_flags | TTM_PAGE_FLAG_SG, + glob->dummy_read_page); + else + bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, + page_flags | TTM_PAGE_FLAG_SG, + glob->dummy_read_page); if (unlikely(bo->ttm == NULL)) { ret = -ENOMEM; break; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 5f821a9b3a1f..49fdce1f5941 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -343,6 +343,22 @@ struct ttm_bo_driver { uint32_t page_flags, struct page *dummy_read_page); + /** + * ttm_tt_create2 + * + * @bo: pointer to a struct ttm_buffer_object + * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags. + * @dummy_read_page: See struct ttm_bo_device. + * + * Create a struct ttm_tt to back data with system memory pages. + * No pages are actually allocated. + * Returns: + * NULL: Out of memory. + */ + struct ttm_tt *(*ttm_tt_create2)(struct ttm_buffer_object *bo, + uint32_t page_flags, + struct page *dummy_read_page); + /** * ttm_tt_populate * -- GitLab From 58d064884c8926e012b8c0bf53d1b4d1422b9477 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 18 Mar 2019 12:33:28 +0100 Subject: [PATCH 0863/1278] BACKPORT: drm/virtio: move virtio_gpu_object_{attach, detach} calls. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the dummy ttm backend implementation, add a real one for TTM_PL_FLAG_TT objects. The bin/unbind callbacks will call virtio_gpu_object_{attach,detach}, to update the object state on the host side, instead of invoking those calls from the move_notify() callback. With that in place the move and move_notify callbacks are not needed any more, so drop them. Signed-off-by: Gerd Hoffmann Acked-by: Noralf Trønnes Link: http://patchwork.freedesktop.org/patch/msgid/20190318113332.10900-2-kraxel@redhat.com (cherry picked from commit 42ca472603a210a03f4e5d34d2adbf4239f6b1aa) Bug: 152417756 Signed-off-by: Alistair Delva Change-Id: I2cff23af466264a7b8fac284eceeb0618d31d400 --- drivers/gpu/drm/virtio/virtgpu_ttm.c | 107 ++++++++------------------- 1 file changed, 31 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index aff33b9b47da..9a6a0923e6da 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -246,42 +246,45 @@ static void virtio_gpu_ttm_io_mem_free(struct ttm_bo_device *bdev, */ struct virtio_gpu_ttm_tt { struct ttm_dma_tt ttm; - struct virtio_gpu_device *vgdev; - u64 offset; + struct virtio_gpu_object *obj; }; -static int virtio_gpu_ttm_backend_bind(struct ttm_tt *ttm, - struct ttm_mem_reg *bo_mem) +static int virtio_gpu_ttm_tt_bind(struct ttm_tt *ttm, + struct ttm_mem_reg *bo_mem) { - struct virtio_gpu_ttm_tt *gtt = (void *)ttm; + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); + struct virtio_gpu_device *vgdev = + virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); - gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); - if (!ttm->num_pages) - WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", - ttm->num_pages, bo_mem, ttm); - - /* Not implemented */ + virtio_gpu_object_attach(vgdev, gtt->obj, NULL); return 0; } -static int virtio_gpu_ttm_backend_unbind(struct ttm_tt *ttm) +static int virtio_gpu_ttm_tt_unbind(struct ttm_tt *ttm) { - /* Not implemented */ + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); + struct virtio_gpu_device *vgdev = + virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); + + virtio_gpu_object_detach(vgdev, gtt->obj); return 0; } -static void virtio_gpu_ttm_backend_destroy(struct ttm_tt *ttm) +static void virtio_gpu_ttm_tt_destroy(struct ttm_tt *ttm) { - struct virtio_gpu_ttm_tt *gtt = (void *)ttm; + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); ttm_dma_tt_fini(>t->ttm); kfree(gtt); } -static struct ttm_backend_func virtio_gpu_backend_func = { - .bind = &virtio_gpu_ttm_backend_bind, - .unbind = &virtio_gpu_ttm_backend_unbind, - .destroy = &virtio_gpu_ttm_backend_destroy, +static struct ttm_backend_func virtio_gpu_tt_func = { + .bind = &virtio_gpu_ttm_tt_bind, + .unbind = &virtio_gpu_ttm_tt_unbind, + .destroy = &virtio_gpu_ttm_tt_destroy, }; static int virtio_gpu_ttm_tt_populate(struct ttm_tt *ttm) @@ -297,21 +300,21 @@ static void virtio_gpu_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } -static struct ttm_tt *virtio_gpu_ttm_tt_create(struct ttm_bo_device *bdev, - unsigned long size, - uint32_t page_flags, - struct page *dummy_read_page) +static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, + uint32_t page_flags, + struct page *dummy_read_page) { + unsigned long size = bo->num_pages << PAGE_SHIFT; struct virtio_gpu_device *vgdev; struct virtio_gpu_ttm_tt *gtt; - vgdev = virtio_gpu_get_vgdev(bdev); + vgdev = virtio_gpu_get_vgdev(bo->bdev); gtt = kzalloc(sizeof(struct virtio_gpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) return NULL; - gtt->ttm.ttm.func = &virtio_gpu_backend_func; - gtt->vgdev = vgdev; - if (ttm_dma_tt_init(>t->ttm, bdev, size, page_flags, + gtt->ttm.ttm.func = &virtio_gpu_tt_func; + gtt->obj = container_of(bo, struct virtio_gpu_object, tbo); + if (ttm_dma_tt_init(>t->ttm, bo->bdev, size, page_flags, dummy_read_page)) { kfree(gtt); return NULL; @@ -319,52 +322,6 @@ static struct ttm_tt *virtio_gpu_ttm_tt_create(struct ttm_bo_device *bdev, return >t->ttm.ttm; } -static void virtio_gpu_move_null(struct ttm_buffer_object *bo, - struct ttm_mem_reg *new_mem) -{ - struct ttm_mem_reg *old_mem = &bo->mem; - - BUG_ON(old_mem->mm_node != NULL); - *old_mem = *new_mem; - new_mem->mm_node = NULL; -} - -static int virtio_gpu_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) -{ - int ret; - - ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); - if (ret) - return ret; - - virtio_gpu_move_null(bo, new_mem); - return 0; -} - -static void virtio_gpu_bo_move_notify(struct ttm_buffer_object *tbo, - bool evict, - struct ttm_mem_reg *new_mem) -{ - struct virtio_gpu_object *bo; - struct virtio_gpu_device *vgdev; - - bo = container_of(tbo, struct virtio_gpu_object, tbo); - vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private; - - if (!new_mem || (new_mem->placement & TTM_PL_FLAG_SYSTEM)) { - if (bo->hw_res_handle) - virtio_gpu_object_detach(vgdev, bo); - - } else if (new_mem->placement & TTM_PL_FLAG_TT) { - if (bo->hw_res_handle) { - virtio_gpu_object_attach(vgdev, bo, NULL); - } - } -} - static void virtio_gpu_bo_swap_notify(struct ttm_buffer_object *tbo) { struct virtio_gpu_object *bo; @@ -376,19 +333,17 @@ static void virtio_gpu_bo_swap_notify(struct ttm_buffer_object *tbo) } static struct ttm_bo_driver virtio_gpu_bo_driver = { - .ttm_tt_create = &virtio_gpu_ttm_tt_create, + .ttm_tt_create2 = &virtio_gpu_ttm_tt_create2, .ttm_tt_populate = &virtio_gpu_ttm_tt_populate, .ttm_tt_unpopulate = &virtio_gpu_ttm_tt_unpopulate, .invalidate_caches = &virtio_gpu_invalidate_caches, .init_mem_type = &virtio_gpu_init_mem_type, .eviction_valuable = ttm_bo_eviction_valuable, .evict_flags = &virtio_gpu_evict_flags, - .move = &virtio_gpu_bo_move, .verify_access = &virtio_gpu_verify_access, .io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve, .io_mem_free = &virtio_gpu_ttm_io_mem_free, .io_mem_pfn = ttm_bo_default_io_mem_pfn, - .move_notify = &virtio_gpu_bo_move_notify, .swap_notify = &virtio_gpu_bo_swap_notify, }; -- GitLab From b47c175e70b5e93f88c907b893a81fb713fc507d Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 09:45:10 -0700 Subject: [PATCH 0864/1278] CHROMIUM: drm/virtgpu: expose new ioctls to userspace These are necessary for creating new types of resources and memories. BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1629910 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: Iec22f634000bad1a8713e690db3774d0ab96408d --- include/uapi/drm/virtgpu_drm.h | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index f06a789f34cd..a7c2ce87773b 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -46,6 +46,9 @@ extern "C" { #define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 #define DRM_VIRTGPU_WAIT 0x08 #define DRM_VIRTGPU_GET_CAPS 0x09 +#define DRM_VIRTGPU_RESOURCE_CREATE_V2 0x0a +#define DRM_VIRTGPU_ALLOCATION_METADATA_REQUEST 0x0b +#define DRM_VIRTGPU_ALLOCATION_METADATA_RESPONSE 0x0c #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 @@ -71,6 +74,19 @@ struct drm_virtgpu_execbuffer { #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ +#define VIRTGPU_PARAM_RESOURCE_V2 3 +#define VIRTGPU_PARAM_SHARED_GUEST 4 +#define VIRTGPU_PARAM_HOST_COHERENT 5 + +#define VIRTGPU_MEMORY_UNDEFINED 0 +#define VIRTGPU_MEMORY_TRANSFER 1 +#define VIRTGPU_MEMORY_SHARED_GUEST 2 +#define VIRTGPU_MEMORY_HOST_COHERENT 3 + +#define VIRTGPU_UNDEFINED_CACHING 0 +#define VIRTGPU_CACHED 1 +#define VIRTGPU_WRITE_COMBINE 2 +#define VIRTGPU_UNCACHED 3 struct drm_virtgpu_getparam { __u64 param; @@ -140,6 +156,31 @@ struct drm_virtgpu_get_caps { __u32 pad; }; +struct drm_virtgpu_resource_create_v2 { + __u32 resource_id; + __u32 guest_memory_type; + __u32 caching_type; + __u32 args_size; + __u32 gem_handle; + __u64 size; + __u64 args; /* void */ +}; + +struct drm_virtgpu_allocation_metadata_request { + __u32 request_id; + __u32 pad; + __u32 request_size; + __u32 response_size; + __u64 request; /* void */ +}; + +struct drm_virtgpu_allocation_metadata_response { + __u32 request_id; + __u32 pad; + __u32 response_size; + __u64 response; /* void */ +}; + #define DRM_IOCTL_VIRTGPU_MAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) @@ -175,6 +216,18 @@ struct drm_virtgpu_get_caps { DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ struct drm_virtgpu_get_caps) +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_V2 \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_V2, \ + struct drm_virtgpu_resource_create_v2) + +#define DRM_IOCTL_VIRTGPU_ALLOCATION_METADATA_REQUEST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_ALLOCATION_METADATA_REQUEST, \ + struct drm_virtgpu_allocation_metadata_request) + +#define DRM_IOCTL_VIRTGPU_ALLOCATION_METADATA_RESPONSE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_ALLOCATION_METADATA_RESPONSE, \ + struct drm_virtgpu_allocation_metadata_response) + #if defined(__cplusplus) } #endif -- GitLab From 9c350c6dbf6ddb6b643fc4d005dff1d94838fec3 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 09:50:45 -0700 Subject: [PATCH 0865/1278] CHROMIUM: virtwl: store plane info per virtio_gpu_object This change extends the drm_virtgpu_resource_info struct to include that plane info. BUG=chromium:875998 TEST=wayland-simple-egl Signed-off-by: Zach Reizner Reviewed-on: https://chromium-review.googlesource.com/1351813 Commit-Ready: David Riley Tested-by: David Riley Reviewed-by: Tomasz Figa Reviewed-by: Zach Reizner Reviewed-by: Gurchetan Singh [rebase54(groeck): Context conflicts] Signed-off-by: Guenter Roeck Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I1ef98e718a14204f6beb9fd81f48f15bda6052f9 --- drivers/gpu/drm/virtio/virtgpu_drv.h | 9 ++++- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 26 ++++++++++++- drivers/gpu/drm/virtio/virtgpu_vq.c | 51 +++++++++++++++++++++++++- include/uapi/drm/virtgpu_drm.h | 8 +++- include/uapi/linux/virtio_gpu.h | 10 +++++ 5 files changed, 98 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index db60aabf9176..e105fd15b99e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -71,6 +71,13 @@ struct virtio_gpu_object { struct drm_gem_object gem_base; uint32_t hw_res_handle; + bool create_callback_done; + /* These variables are only valid if create_callback_done is true */ + uint32_t num_planes; + uint64_t format_modifier; + uint32_t strides[4]; + uint32_t offsets[4]; + struct sg_table *pages; uint32_t mapped; void *vmap; @@ -339,7 +346,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, uint64_t offset, uint32_t level, struct virtio_gpu_box *box, struct virtio_gpu_fence *fence); -void +int virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, struct virtio_gpu_object_params *params, diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 402d01ed255e..6e07c594fe03 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -334,9 +334,11 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, static int virtio_gpu_resource_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct virtio_gpu_device *vgdev = dev->dev_private; struct drm_virtgpu_resource_info *ri = data; struct drm_gem_object *gobj = NULL; struct virtio_gpu_object *qobj = NULL; + int ret = 0; gobj = drm_gem_object_lookup(file_priv, ri->bo_handle); if (gobj == NULL) @@ -344,10 +346,30 @@ static int virtio_gpu_resource_info_ioctl(struct drm_device *dev, void *data, qobj = gem_to_virtio_gpu_obj(gobj); - ri->size = qobj->gem_base.size; ri->res_handle = qobj->hw_res_handle; + ri->size = qobj->gem_base.size; + + if (!qobj->create_callback_done) { + ret = wait_event_interruptible(vgdev->resp_wq, + qobj->create_callback_done); + if (ret) + goto out; + } + + if (qobj->num_planes) { + int i; + + ri->num_planes = qobj->num_planes; + for (i = 0; i < qobj->num_planes; i++) { + ri->strides[i] = qobj->strides[i]; + ri->offsets[i] = qobj->offsets[i]; + } + } + + ri->format_modifier = qobj->format_modifier; +out: drm_gem_object_put_unlocked(gobj); - return 0; + return ret; } static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev, diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index feddc29b3f2c..70b63cb56cb7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -856,7 +856,40 @@ void virtio_gpu_cmd_context_detach_resource(struct virtio_gpu_device *vgdev, virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); } -void +static void virtio_gpu_cmd_resource_create_cb(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf) +{ + struct virtio_gpu_resp_resource_plane_info *resp = + (struct virtio_gpu_resp_resource_plane_info *)vbuf->resp_buf; + struct virtio_gpu_object *obj = + (struct virtio_gpu_object *)vbuf->data_buf; + uint32_t resp_type = le32_to_cpu(resp->hdr.type); + int i; + + /* + * Keeps the data_buf, which points to this virtio_gpu_object, from + * getting kfree'd after this cb returns. + */ + vbuf->data_buf = NULL; + + if (resp_type != VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO) + goto finish_pending; + + obj->num_planes = le32_to_cpu(resp->num_planes); + obj->format_modifier = le64_to_cpu(resp->format_modifier); + + for (i = 0; i < obj->num_planes; i++) { + obj->strides[i] = le32_to_cpu(resp->strides[i]); + obj->offsets[i] = le32_to_cpu(resp->offsets[i]); + } + +finish_pending: + obj->create_callback_done = true; + drm_gem_object_put_unlocked(&obj->gem_base); + wake_up_all(&vgdev->resp_wq); +} + +int virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, struct virtio_gpu_object_params *params, @@ -864,8 +897,15 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, { struct virtio_gpu_resource_create_3d *cmd_p; struct virtio_gpu_vbuffer *vbuf; + struct virtio_gpu_resp_resource_plane_info *resp_buf; - cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + resp_buf = kzalloc(sizeof(*resp_buf), GFP_KERNEL); + if (!resp_buf) + return -ENOMEM; + + cmd_p = virtio_gpu_alloc_cmd_resp(vgdev, + virtio_gpu_cmd_resource_create_cb, &vbuf, sizeof(*cmd_p), + sizeof(struct virtio_gpu_resp_resource_plane_info), resp_buf); memset(cmd_p, 0, sizeof(*cmd_p)); cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D); @@ -882,8 +922,15 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, cmd_p->nr_samples = cpu_to_le32(params->nr_samples); cmd_p->flags = cpu_to_le32(params->flags); + /* Reuse the data_buf pointer for the object pointer. */ + vbuf->data_buf = bo; + bo->create_callback_done = false; + drm_gem_object_get(&bo->gem_base); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); bo->created = true; + + return 0; } void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index a7c2ce87773b..8211b48f2241 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -116,7 +116,13 @@ struct drm_virtgpu_resource_info { __u32 bo_handle; __u32 res_handle; __u32 size; - __u32 stride; + union { + __u32 stride; + __u32 strides[4]; /* strides[0] is accessible with stride. */ + }; + __u32 num_planes; + __u32 offsets[4]; + __u64 format_modifier; }; struct drm_virtgpu_3d_box { diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 01d4681e796d..67da04441e32 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -87,6 +87,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -316,6 +317,15 @@ struct virtio_gpu_resp_edid { __u8 edid[1024]; }; +/* VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO */ +struct virtio_gpu_resp_resource_plane_info { + struct virtio_gpu_ctrl_hdr hdr; + __le32 num_planes; + __le64 format_modifier; + __le32 strides[4]; + __le32 offsets[4]; +}; + #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) struct virtio_gpu_config { -- GitLab From 8e25efaff4917fd29853ecc38a8c56c3c455aca4 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 09:54:50 -0700 Subject: [PATCH 0866/1278] CHROMIUM: virtio-gpu api: VIRTIO_GPU_F_MEMORY Introduce the concept of memory types and memory regions to virtio-gpu. Initially only memory regions composed of guest pages are supported (VIRTIO_GPU_MEMORY_TRANSFER, pretty much like current backing storage for resources). I expect support for other memory types will be added later on. VIRTIO_GPU_CMD_MEMORY_CREATE: creates a new memory region. VIRTIO_GPU_CMD_MEMORY_UNREF: destroys a memory region. VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 : new command to create resources. It will (a) allow to specify the memory type, (b) adds support planar resources and (c) returns alignment, stride and size of the resource planes. The later will be needed in case we support mapping host resources into the guest some day. VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY: Use memory region as backing storage for the given resource. The existing VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING command can be used to detach. Motivation: This separates storage management from resource management. It allows memory pooling (vulkan support will most likely need this). It makes things a bit more flexible in general, for example we can represent gem objects as memory regions even if we don't know the format yet (happens on dma-buf import for example). Signed-off-by: Gerd Hoffmann (cherry picked from commit 8eefa19d3e2ace7047c0082bf601c6f4f36f37fd) Signed-off-by: Gurchetan Singh BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1591998 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh [rebase54(groeck): Context conflicts Note that this patch does not match the patch with the same subject on the kernel mailing list (https://patchwork.kernel.org/patch/10893777/). No idea if that means that this version is more recent or older or inofficial, or that it was modified locally. ] Signed-off-by: Guenter Roeck Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I258a3a735678ddefd75b968d3f6874f394ac3e46 --- include/uapi/linux/virtio_gpu.h | 82 +++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 67da04441e32..f1d86721fef8 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -51,6 +51,13 @@ */ #define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_MEMORY_CREATE + * VIRTIO_GPU_CMD_MEMORY_UNREF + * VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 + * VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY + */ +#define VIRTIO_GPU_F_MEMORY 2 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -66,6 +73,10 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_MEMORY_CREATE, + VIRTIO_GPU_CMD_MEMORY_UNREF, + VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY, + VIRTIO_GPU_CMD_RESOURCE_CREATE_V2, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -88,6 +99,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO, + VIRTIO_GPU_RESP_OK_RESOURCE_INFO, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -96,6 +108,22 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + VIRTIO_GPU_RESP_ERR_INVALID_MEMORY_ID, +}; + +enum virtio_gpu_memory_type { + VIRTIO_GPU_MEMORY_UNDEFINED = 0, + + /* + * Traditional virtio-gpu memory. + * Has both host and guest side storage. + * + * VIRTIO_GPU_CMD_TRANSFER_* commands are used + * to copy between guest and host storage. + * + * Created using VIRTIO_GPU_CMD_MEMORY_CREATE. + */ + VIRTIO_GPU_MEMORY_TRANSFER, }; #define VIRTIO_GPU_FLAG_FENCE (1 << 0) @@ -147,6 +175,7 @@ struct virtio_gpu_resource_unref { struct virtio_gpu_resource_create_2d { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; + /* memory_type is VIRTIO_GPU_MEMORY_TRANSFER */ __le32 format; __le32 width; __le32 height; @@ -188,6 +217,7 @@ struct virtio_gpu_resource_attach_backing { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; __le32 nr_entries; + /* struct virtio_gpu_mem_entry entries follow here */ }; /* VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING */ @@ -231,6 +261,7 @@ struct virtio_gpu_transfer_host_3d { struct virtio_gpu_resource_create_3d { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; + /* memory_type is VIRTIO_GPU_MEMORY_TRANSFER */ __le32 target; __le32 format; __le32 bind; @@ -271,6 +302,57 @@ struct virtio_gpu_cmd_submit { __le32 padding; }; +/* VIRTIO_GPU_CMD_MEMORY_CREATE */ +struct virtio_gpu_cmd_memory_create { + struct virtio_gpu_ctrl_hdr hdr; + __le32 memory_id; + __le32 memory_type; + __le32 flags; + __le32 nr_entries; + /* struct virtio_gpu_mem_entry entries follow here */ +}; + +/* VIRTIO_GPU_CMD_MEMORY_UNREF */ +struct virtio_gpu_cmd_memory_unref { + struct virtio_gpu_ctrl_hdr hdr; + __le32 memory_id; + __le32 padding; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY */ +struct virtio_gpu_cmd_resource_attach_memory { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 memory_id; + __le64 offset[4]; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 */ +struct virtio_gpu_cmd_resource_create_v2 { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 memory_type; + __le32 format; + __le32 width; + __le32 height; + /* 3d only */ + __le32 target; + __le32 bind; + __le32 depth; + __le32 array_size; + __le32 last_level; + __le32 nr_samples; + __le32 flags; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_INFO */ +struct virtio_gpu_resp_resource_info { + struct virtio_gpu_ctrl_hdr hdr; + __le32 align[4]; + __le32 stride[4]; + __le32 size[4]; +}; + #define VIRTIO_GPU_CAPSET_VIRGL 1 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ -- GitLab From 08994a48fb16eadc8ddebd472d27fac2b8c02d78 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 09:57:16 -0700 Subject: [PATCH 0867/1278] CHROMIUM: drm/virtgpu: make memory and resource creation opaque It's not necessary for the guest kernel to know about parameters such as width, height, format, or queue indices (Vulkan). BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1702156 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Conflicts: include/uapi/linux/virtio_gpu.h [rebase54(groeck): Context conflicts] Signed-off-by: Guenter Roeck Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I5582fbf1f623900564e90223d2806faefba247eb --- include/uapi/linux/virtio_gpu.h | 96 +++++++++++++++++---------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index f1d86721fef8..c6353769821e 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -52,12 +52,18 @@ #define VIRTIO_GPU_F_EDID 1 /* - * VIRTIO_GPU_CMD_MEMORY_CREATE - * VIRTIO_GPU_CMD_MEMORY_UNREF + * VIRTIO_GPU_CMD_ALLOCATION_METADATA * VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 - * VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY */ -#define VIRTIO_GPU_F_MEMORY 2 +#define VIRTIO_GPU_F_RESOURCE_V2 2 +/* + * Ability to turn guest pages into host buffers. + */ +#define VIRTIO_GPU_F_SHARED_GUEST 3 +/* + * Can inject host pages into guest. + */ +#define VIRTIO_GPU_F_HOST_COHERENT 4 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -73,10 +79,6 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, - VIRTIO_GPU_CMD_MEMORY_CREATE, - VIRTIO_GPU_CMD_MEMORY_UNREF, - VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY, - VIRTIO_GPU_CMD_RESOURCE_CREATE_V2, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -87,6 +89,9 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D, VIRTIO_GPU_CMD_SUBMIT_3D, + VIRTIO_GPU_CMD_RESOURCE_CREATE_V2, + VIRTIO_GPU_CMD_RESOURCE_CREATE_V2_UNREF, + VIRTIO_GPU_CMD_ALLOCATION_METADATA, /* cursor commands */ VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300, @@ -99,7 +104,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO, - VIRTIO_GPU_RESP_OK_RESOURCE_INFO, + VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -121,9 +126,18 @@ enum virtio_gpu_memory_type { * VIRTIO_GPU_CMD_TRANSFER_* commands are used * to copy between guest and host storage. * - * Created using VIRTIO_GPU_CMD_MEMORY_CREATE. + * Created using VIRTIO_GPU_CMD_RESOURCE_CREATE_V2. */ VIRTIO_GPU_MEMORY_TRANSFER, + VIRTIO_GPU_MEMORY_SHARED_GUEST, + VIRTIO_GPU_MEMORY_HOST_COHERENT, +}; + +enum virtio_gpu_caching_type { + VIRTIO_GPU_UNDEFINED_CACHING = 0, + VIRTIO_GPU_CACHED, + VIRTIO_GPU_WRITE_COMBINE, + VIRTIO_GPU_UNCACHED, }; #define VIRTIO_GPU_FLAG_FENCE (1 << 0) @@ -302,55 +316,45 @@ struct virtio_gpu_cmd_submit { __le32 padding; }; -/* VIRTIO_GPU_CMD_MEMORY_CREATE */ -struct virtio_gpu_cmd_memory_create { +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 */ +struct virtio_gpu_resource_create_v2 { struct virtio_gpu_ctrl_hdr hdr; - __le32 memory_id; - __le32 memory_type; - __le32 flags; + __le32 resource_id; + __le32 guest_memory_type; + __le32 caching_type; + __le32 pad; + __le64 size; + __le64 pci_addr; + __le32 args_size; __le32 nr_entries; - /* struct virtio_gpu_mem_entry entries follow here */ -}; - -/* VIRTIO_GPU_CMD_MEMORY_UNREF */ -struct virtio_gpu_cmd_memory_unref { - struct virtio_gpu_ctrl_hdr hdr; - __le32 memory_id; - __le32 padding; + /* ('nr_entries' * struct virtio_gpu_mem_entry) + 'args_size' + * bytes follow here. + */ }; -/* VIRTIO_GPU_CMD_RESOURCE_ATTACH_MEMORY */ -struct virtio_gpu_cmd_resource_attach_memory { +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2_UNREF */ +struct virtio_gpu_resource_v2_unref { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; - __le32 memory_id; - __le64 offset[4]; + __le32 padding; }; /* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 */ -struct virtio_gpu_cmd_resource_create_v2 { +struct virtio_gpu_allocation_metadata { struct virtio_gpu_ctrl_hdr hdr; - __le32 resource_id; - __le32 memory_type; - __le32 format; - __le32 width; - __le32 height; - /* 3d only */ - __le32 target; - __le32 bind; - __le32 depth; - __le32 array_size; - __le32 last_level; - __le32 nr_samples; - __le32 flags; + __le32 request_id; + __le32 pad; + __le32 request_size; + __le32 response_size; + /* 'request_size' bytes go here */ }; -/* VIRTIO_GPU_RESP_OK_RESOURCE_INFO */ -struct virtio_gpu_resp_resource_info { +/* VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA */ +struct virtio_gpu_resp_allocation_metadata { struct virtio_gpu_ctrl_hdr hdr; - __le32 align[4]; - __le32 stride[4]; - __le32 size[4]; + __le32 request_id; + __le32 response_size; + /* 'response_size' bytes go here */ }; #define VIRTIO_GPU_CAPSET_VIRGL 1 -- GitLab From 400919b2ae0fae0490d0e286f1edecb4970f9d1d Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 10:42:00 -0700 Subject: [PATCH 0868/1278] CHROMIUM: drm/virtgpu: add memory type to virtio_gpu_object_params This will allow us to distinguish between host-coherent and guest memory. BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1629913 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: If875c25aabf9116f825cd681b1cebcb8ac5d25ae --- drivers/gpu/drm/virtio/virtgpu_drv.h | 6 ++++++ drivers/gpu/drm/virtio/virtgpu_gem.c | 4 ++-- drivers/gpu/drm/virtio/virtgpu_object.c | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index e105fd15b99e..79f4159b191f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -58,6 +58,9 @@ struct virtio_gpu_object_params { bool dumb; /* 3d */ bool virgl; + bool resource_v2; + enum virtio_gpu_memory_type guest_memory_type; + enum virtio_gpu_caching_type caching_type; uint32_t target; uint32_t bind; uint32_t depth; @@ -82,11 +85,14 @@ struct virtio_gpu_object { uint32_t mapped; void *vmap; bool dumb; + bool resource_v2; struct ttm_place placement_code; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; bool created; + enum virtio_gpu_memory_type guest_memory_type; + enum virtio_gpu_caching_type caching_type; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, gem_base) diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index a607bd154e38..ce6e4ec4a432 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -134,7 +134,7 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj, struct virtio_gpu_object *qobj = gem_to_virtio_gpu_obj(obj); int r; - if (!vgdev->has_virgl_3d) + if (!vgdev->has_virgl_3d || qobj->resource_v2) return 0; r = virtio_gpu_object_reserve(qobj, false); @@ -155,7 +155,7 @@ void virtio_gpu_gem_object_close(struct drm_gem_object *obj, struct virtio_gpu_object *qobj = gem_to_virtio_gpu_obj(obj); int r; - if (!vgdev->has_virgl_3d) + if (!vgdev->has_virgl_3d || qobj->resource_v2) return; r = virtio_gpu_object_reserve(qobj, false); diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 2092993e37d3..7ae77d6e6ea2 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -139,6 +139,9 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, return ret; } bo->dumb = params->dumb; + bo->resource_v2 = params->resource_v2; + bo->guest_memory_type = params->guest_memory_type; + bo->caching_type = params->caching_type; if (params->virgl) { virtio_gpu_cmd_resource_create_3d(vgdev, bo, params, fence); -- GitLab From e9855d59354c32bde9c9e98fe4ce64862ee7f3d1 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:03:32 -0700 Subject: [PATCH 0869/1278] CHROMIUM: drm/virtgpu: check for revelant capabilites Initialize the TTM VRAM support when host coherent memory is detected. BUG=chromium:924405 TEST=compile Bug: b/153580313 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1702157 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh [rebase54(groeck): Squashed: FIXUP: CHROMIUM: drm/virtgpu: check for revelant capabilites ] Change-Id: If2c6269d82cc4e9826660d8ffaefe19320fba2e1 Signed-off-by: Guenter Roeck Signed-off-by: Lingfeng Yang --- drivers/gpu/drm/virtio/virtgpu_debugfs.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 8 +++ drivers/gpu/drm/virtio/virtgpu_kms.c | 24 +++++++ drivers/gpu/drm/virtio/virtgpu_object.c | 34 +++++++++- drivers/gpu/drm/virtio/virtgpu_ttm.c | 81 ++++++++++++++++++++---- 6 files changed, 138 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c index ed0fcda713c3..7ff140c6ef3c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_debugfs.c +++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c @@ -47,6 +47,9 @@ static int virtio_gpu_features(struct seq_file *m, void *data) virtio_add_bool(m, "virgl", vgdev->has_virgl_3d); virtio_add_bool(m, "edid", vgdev->has_edid); + virtio_add_bool(m, "resource v2", vgdev->has_resource_v2); + virtio_add_bool(m, "shared guest", vgdev->has_shared_guest); + virtio_add_bool(m, "host coherent", vgdev->has_host_coherent); virtio_add_int(m, "cap sets", vgdev->num_capsets); virtio_add_int(m, "scanouts", vgdev->num_scanouts); return 0; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index e2c26a2914c5..a947a1901030 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -178,6 +178,9 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, + VIRTIO_GPU_F_RESOURCE_V2, + VIRTIO_GPU_F_SHARED_GUEST, + VIRTIO_GPU_F_HOST_COHERENT, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 79f4159b191f..4ab2496b7eae 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -234,12 +234,20 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; + bool has_resource_v2; + bool has_shared_guest; + bool has_host_coherent; struct work_struct config_changed_work; struct virtio_gpu_drv_capset *capsets; uint32_t num_capsets; struct list_head cap_cache; + + /* coherent memory */ + int cbar; + unsigned long caddr; + unsigned long csize; }; struct virtio_gpu_fpriv { diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 1c721b39f2ba..d1d018e0fe54 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -23,6 +23,7 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -181,6 +182,29 @@ int virtio_gpu_init(struct drm_device *dev) DRM_INFO("EDID support available.\n"); } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_V2)) { + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_HOST_COHERENT)) { + vgdev->cbar = 4; + vgdev->caddr = pci_resource_start(dev->pdev, vgdev->cbar); + vgdev->csize = pci_resource_len(dev->pdev, vgdev->cbar); + ret = pci_request_region(dev->pdev, vgdev->cbar, "virtio-gpu-coherent"); + if (ret != 0) { + DRM_WARN("Cannot request coherent memory bar\n"); + } else { + DRM_INFO("coherent host resources enabled, using %s bar %d," + "at 0x%lx, size %ld MB", dev_name(&dev->pdev->dev), + vgdev->cbar, vgdev->caddr, vgdev->csize >> 20); + + vgdev->has_host_coherent = true; + } + } + + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_SHARED_GUEST)) + vgdev->has_shared_guest = true; + + vgdev->has_resource_v2 = true; + } + ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL); if (ret) { DRM_ERROR("failed to find virt queues\n"); diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 7ae77d6e6ea2..ef25eacf7fb7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -96,14 +96,42 @@ static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo) { u32 c = 1; + u32 ttm_caching_flags = 0; vgbo->placement.placement = &vgbo->placement_code; vgbo->placement.busy_placement = &vgbo->placement_code; vgbo->placement_code.fpfn = 0; vgbo->placement_code.lpfn = 0; - vgbo->placement_code.flags = - TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | - TTM_PL_FLAG_NO_EVICT; + + switch (vgbo->caching_type) { + case VIRTIO_GPU_CACHED: + ttm_caching_flags = TTM_PL_FLAG_CACHED; + break; + case VIRTIO_GPU_WRITE_COMBINE: + ttm_caching_flags = TTM_PL_FLAG_WC; + break; + case VIRTIO_GPU_UNCACHED: + ttm_caching_flags = TTM_PL_FLAG_UNCACHED; + break; + default: + ttm_caching_flags = TTM_PL_MASK_CACHING; + } + + + switch (vgbo->guest_memory_type) { + case VIRTIO_GPU_MEMORY_UNDEFINED: + case VIRTIO_GPU_MEMORY_TRANSFER: + case VIRTIO_GPU_MEMORY_SHARED_GUEST: + vgbo->placement_code.flags = + TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | + TTM_PL_FLAG_NO_EVICT; + break; + case VIRTIO_GPU_MEMORY_HOST_COHERENT: + vgbo->placement_code.flags = + ttm_caching_flags | TTM_PL_FLAG_VRAM | + TTM_PL_FLAG_NO_EVICT; + break; + } vgbo->placement.num_placement = c; vgbo->placement.num_busy_placement = c; diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index 9a6a0923e6da..89639d401fa5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -185,6 +185,12 @@ static int virtio_gpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; break; + case TTM_PL_VRAM: + man->func = &ttm_bo_manager_func; + man->flags = TTM_MEMTYPE_FLAG_MAPPABLE; + man->available_caching = TTM_PL_MASK_CACHING; + man->default_caching = TTM_PL_FLAG_CACHED; + break; default: DRM_ERROR("Unsupported memory type %u\n", (unsigned int)type); return -EINVAL; @@ -216,6 +222,7 @@ static int virtio_gpu_verify_access(struct ttm_buffer_object *bo, static int virtio_gpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) { + struct virtio_gpu_device *vgdev = virtio_gpu_get_vgdev(bdev); struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; mem->bus.addr = NULL; @@ -229,8 +236,18 @@ static int virtio_gpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, case TTM_PL_SYSTEM: case TTM_PL_TT: /* system memory */ + mem->bus.offset = 0; + mem->bus.base = 0; + mem->bus.is_iomem = false; + return 0; + case TTM_PL_VRAM: + /* coherent memory (pci bar) */ + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.base = vgdev->caddr; + mem->bus.is_iomem = true; return 0; default: + DRM_ERROR("Unsupported memory type %u\n", mem->mem_type); return -EINVAL; } return 0; @@ -249,8 +266,19 @@ struct virtio_gpu_ttm_tt { struct virtio_gpu_object *obj; }; -static int virtio_gpu_ttm_tt_bind(struct ttm_tt *ttm, - struct ttm_mem_reg *bo_mem) +static int virtio_gpu_ttm_vram_bind(struct ttm_tt *ttm, + struct ttm_mem_reg *bo_mem) +{ + return 0; +} + +static int virtio_gpu_ttm_vram_unbind(struct ttm_tt *ttm) +{ + return 0; +} + +static int virtio_gpu_ttm_backend_bind(struct ttm_tt *ttm, + struct ttm_mem_reg *bo_mem) { struct virtio_gpu_ttm_tt *gtt = container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); @@ -261,7 +289,7 @@ static int virtio_gpu_ttm_tt_bind(struct ttm_tt *ttm, return 0; } -static int virtio_gpu_ttm_tt_unbind(struct ttm_tt *ttm) +static int virtio_gpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct virtio_gpu_ttm_tt *gtt = container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); @@ -281,9 +309,15 @@ static void virtio_gpu_ttm_tt_destroy(struct ttm_tt *ttm) kfree(gtt); } -static struct ttm_backend_func virtio_gpu_tt_func = { - .bind = &virtio_gpu_ttm_tt_bind, - .unbind = &virtio_gpu_ttm_tt_unbind, +static struct ttm_backend_func virtio_gpu_backend_func = { + .bind = &virtio_gpu_ttm_backend_bind, + .unbind = &virtio_gpu_ttm_backend_unbind, + .destroy = &virtio_gpu_ttm_tt_destroy, +}; + +static struct ttm_backend_func virtio_gpu_vram_func = { + .bind = &virtio_gpu_ttm_vram_bind, + .unbind = &virtio_gpu_ttm_vram_unbind, .destroy = &virtio_gpu_ttm_tt_destroy, }; @@ -306,19 +340,33 @@ static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, { unsigned long size = bo->num_pages << PAGE_SHIFT; struct virtio_gpu_device *vgdev; + struct virtio_gpu_object *obj; struct virtio_gpu_ttm_tt *gtt; vgdev = virtio_gpu_get_vgdev(bo->bdev); + obj = container_of(bo, struct virtio_gpu_object, tbo); + gtt = kzalloc(sizeof(struct virtio_gpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) return NULL; - gtt->ttm.ttm.func = &virtio_gpu_tt_func; - gtt->obj = container_of(bo, struct virtio_gpu_object, tbo); - if (ttm_dma_tt_init(>t->ttm, bo->bdev, size, page_flags, - dummy_read_page)) { - kfree(gtt); - return NULL; + gtt->obj = obj; + + if (obj->guest_memory_type == VIRTIO_GPU_MEMORY_HOST_COHERENT) { + gtt->ttm.ttm.func = &virtio_gpu_vram_func; + if (ttm_tt_init(>t->ttm.ttm, bo->bdev, size, page_flags, + dummy_read_page)) { + kfree(gtt); + return NULL; + } + } else { + gtt->ttm.ttm.func = &virtio_gpu_backend_func; + if (ttm_dma_tt_init(>t->ttm, bo->bdev, size, page_flags, + dummy_read_page)) { + kfree(gtt); + return NULL; + } } + return >t->ttm.ttm; } @@ -370,6 +418,15 @@ int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev) DRM_ERROR("Failed initializing GTT heap.\n"); goto err_mm_init; } + + if (vgdev->has_host_coherent) { + r = ttm_bo_init_mm(&vgdev->mman.bdev, TTM_PL_VRAM, + vgdev->csize >> PAGE_SHIFT); + if (r) { + DRM_ERROR("Failed initializing VRAM heap.\n"); + goto err_mm_init; + } + } return 0; err_mm_init: -- GitLab From 73738beaa7074cf60b1fd0b4b8d930115bfd4aff Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:35:40 -0700 Subject: [PATCH 0870/1278] CHROMIUM: drm/virtgpu: add stub ioctl implementation Add plumbing for new ioctls. BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1629911 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh [rebase54(groeck): Context conflicts] Signed-off-by: Guenter Roeck Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I3c4a27e4f656d0884ef9e1f056a89aa875ec6d2a --- drivers/gpu/drm/virtio/virtgpu_drv.h | 2 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 30 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 4ab2496b7eae..965b91ab8cf8 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -255,7 +255,7 @@ struct virtio_gpu_fpriv { }; /* virtio_ioctl.c */ -#define DRM_VIRTIO_NUM_IOCTLS 10 +#define DRM_VIRTIO_NUM_IOCTLS 13 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS]; int virtio_gpu_object_list_validate(struct ww_acquire_ctx *ticket, struct list_head *head); diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 6e07c594fe03..fdc370ae2071 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -572,6 +572,24 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, return 0; } +static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + return 0; +} + +static int virtio_gpu_allocation_metadata_request_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + return 0; +} + +static int virtio_gpu_allocation_metadata_response_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + return 0; +} + struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { DRM_IOCTL_DEF_DRV(VIRTGPU_MAP, virtio_gpu_map_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), @@ -604,4 +622,16 @@ struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { DRM_IOCTL_DEF_DRV(VIRTGPU_GET_CAPS, virtio_gpu_get_caps_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), + + DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE_V2, + virtio_gpu_resource_create_v2_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + + DRM_IOCTL_DEF_DRV(VIRTGPU_ALLOCATION_METADATA_REQUEST, + virtio_gpu_allocation_metadata_request_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + + DRM_IOCTL_DEF_DRV(VIRTGPU_ALLOCATION_METADATA_RESPONSE, + virtio_gpu_allocation_metadata_response_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), }; -- GitLab From dc92d4ac179f5e6a766616af82827e6c0a17d276 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:39:34 -0700 Subject: [PATCH 0871/1278] CHROMIUM: drm/virtgpu: implement DRM_VIRTGPU_RESOURCE_CREATE_V2 Pass 'args_size' bytes to the host, where it can be decoded. Allocate 'size' bytes from the pool specified by 'guest_memory_type'. BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1629915 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh [rebase54(groeck): Add missing include file Renamed reservation_object_add_excl_fence -> dma_resv_add_excl_fence ] Signed-off-by: Guenter Roeck Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I7901019384017cae2f7b341c60ff91bb774abe94 --- drivers/gpu/drm/virtio/virtgpu_drv.h | 11 +++ drivers/gpu/drm/virtio/virtgpu_ioctl.c | 115 ++++++++++++++++++++++++- drivers/gpu/drm/virtio/virtgpu_ttm.c | 7 ++ drivers/gpu/drm/virtio/virtgpu_vq.c | 48 +++++++++++ 4 files changed, 180 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 965b91ab8cf8..54773d96e641 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -365,6 +365,17 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, struct virtio_gpu_object_params *params, struct virtio_gpu_fence *fence); +void +virtio_gpu_cmd_resource_create_v2(struct virtio_gpu_device *vgdev, + uint32_t resource_id, uint32_t guest_memory_type, + uint32_t caching_type, uint64_t size, + uint64_t pci_addr, uint32_t nents, + uint32_t args_size, void *data, uint32_t data_size, + struct virtio_gpu_fence *fence); +void +virtio_gpu_cmd_resource_v2_unref(struct virtio_gpu_device *vgdev, + uint32_t resource_id, + struct virtio_gpu_fence *fence); void virtio_gpu_ctrl_ack(struct virtqueue *vq); void virtio_gpu_cursor_ack(struct virtqueue *vq); void virtio_gpu_fence_ack(struct virtqueue *vq); diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index fdc370ae2071..21a16df5e7ee 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -25,10 +25,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include +#include + #include #include #include -#include #include "virtgpu_drv.h" @@ -575,7 +578,117 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + void *buf; + int ret, si, nents; + uint32_t handle = 0; + uint64_t pci_addr = 0; + struct scatterlist *sg; + size_t total_size, offset; + struct virtio_gpu_object *obj; + struct virtio_gpu_fence *fence; + struct virtio_gpu_mem_entry *ents; + struct drm_virtgpu_resource_create_v2 *rc_v2 = data; + struct virtio_gpu_object_params params = { 0 }; + struct virtio_gpu_device *vgdev = dev->dev_private; + bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + void __user *args = u64_to_user_ptr(rc_v2->args); + + total_size = offset = 0; + params.size = rc_v2->size; + params.guest_memory_type = rc_v2->guest_memory_type; + params.resource_v2 = true; + params.caching_type = rc_v2->caching_type; + + obj = virtio_gpu_alloc_object(dev, ¶ms, NULL); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (!obj->pages) { + int ret; + ret = virtio_gpu_object_get_sg_table(vgdev, obj); + if (ret) + goto err_free_obj; + } + + if (rc_v2->guest_memory_type == VIRTGPU_MEMORY_HOST_COHERENT) { + nents = 0; + } else if (use_dma_api) { + obj->mapped = dma_map_sg(vgdev->vdev->dev.parent, + obj->pages->sgl, obj->pages->nents, + DMA_TO_DEVICE); + nents = obj->mapped; + } else { + nents = obj->pages->nents; + } + + total_size = nents * sizeof(struct virtio_gpu_mem_entry) + + rc_v2->args_size; + + buf = kzalloc(total_size, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto err_free_obj; + } + + ents = buf; + if (rc_v2->guest_memory_type == VIRTGPU_MEMORY_HOST_COHERENT) { + pci_addr = vgdev->caddr + obj->tbo.offset; + } else { + for_each_sg(obj->pages->sgl, sg, nents, si) { + ents[si].addr = cpu_to_le64(use_dma_api + ? sg_dma_address(sg) + : sg_phys(sg)); + ents[si].length = cpu_to_le32(sg->length); + ents[si].padding = 0; + offset += sizeof(struct virtio_gpu_mem_entry); + } + } + + if (rc_v2->args_size) { + if (copy_from_user(buf + offset, args, + rc_v2->args_size)) { + ret = -EFAULT; + goto err_free_buf; + } + } + + fence = virtio_gpu_fence_alloc(vgdev); + if (!fence) { + ret = -ENOMEM; + goto err_free_buf; + } + + ret = drm_gem_handle_create(file, &obj->gem_base, &handle); + if (ret) + goto err_fence_put; + + virtio_gpu_cmd_resource_create_v2(vgdev, obj->hw_res_handle, + rc_v2->guest_memory_type, + rc_v2->caching_type, rc_v2->size, + pci_addr, nents, rc_v2->args_size, + buf, total_size, fence); + + /* + * No need to call virtio_gpu_object_reserve since the buffer is not + * being used for ttm validation and no other processes can access + * the reservation object at this point. + */ + reservation_object_add_excl_fence(obj->tbo.resv, &fence->f); + + dma_fence_put(&fence->f); + drm_gem_object_put_unlocked(&obj->gem_base); + + rc_v2->resource_id = obj->hw_res_handle; + rc_v2->gem_handle = handle; return 0; + +err_fence_put: + dma_fence_put(&fence->f); +err_free_buf: + kfree(buf); +err_free_obj: + drm_gem_object_release(&obj->gem_base); + return ret; } static int virtio_gpu_allocation_metadata_request_ioctl(struct drm_device *dev, diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index 89639d401fa5..f381a477cd00 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -274,6 +274,13 @@ static int virtio_gpu_ttm_vram_bind(struct ttm_tt *ttm, static int virtio_gpu_ttm_vram_unbind(struct ttm_tt *ttm) { + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); + struct virtio_gpu_device *vgdev = + virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); + struct virtio_gpu_object *obj = gtt->obj; + + virtio_gpu_cmd_resource_v2_unref(vgdev, obj->hw_res_handle, NULL); return 0; } diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 70b63cb56cb7..cf2419515119 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -532,6 +532,54 @@ virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev, virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); } +void +virtio_gpu_cmd_resource_create_v2(struct virtio_gpu_device *vgdev, + uint32_t resource_id, + uint32_t guest_memory_type, + uint32_t caching_type, uint64_t size, + uint64_t pci_addr, uint32_t nents, + uint32_t args_size, void *data, + uint32_t data_size, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_resource_create_v2 *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_V2); + cmd_p->resource_id = cpu_to_le32(resource_id); + cmd_p->guest_memory_type = cpu_to_le32(guest_memory_type); + cmd_p->caching_type = cpu_to_le32(caching_type); + cmd_p->size = cpu_to_le64(size); + cmd_p->pci_addr = cpu_to_le64(pci_addr); + cmd_p->args_size = cpu_to_le32(args_size); + cmd_p->nr_entries = cpu_to_le32(nents); + + vbuf->data_buf = data; + vbuf->data_size = data_size; + + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); +} + +void +virtio_gpu_cmd_resource_v2_unref(struct virtio_gpu_device *vgdev, + uint32_t resource_id, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_resource_v2_unref *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_V2_UNREF); + cmd_p->resource_id = cpu_to_le32(resource_id); + + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); +} + static void virtio_gpu_cmd_get_display_info_cb(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) { -- GitLab From d5e291eac302db1299f9d5e973b1749a8c1927c9 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:42:40 -0700 Subject: [PATCH 0872/1278] CHROMIUM: drm/virtgpu: introduce request IDRs This way we can lookup allocation metadata requests from the host. BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1702158 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: Ic32335aa6a3f97489321c4fae2be8c00748906dd --- drivers/gpu/drm/virtio/virtgpu_drv.h | 3 +++ drivers/gpu/drm/virtio/virtgpu_kms.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 54773d96e641..87133224344e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -248,6 +248,9 @@ struct virtio_gpu_device { int cbar; unsigned long caddr; unsigned long csize; + + struct idr request_idr; + spinlock_t request_idr_lock; }; struct virtio_gpu_fpriv { diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index d1d018e0fe54..dff2cd32f783 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -158,6 +158,8 @@ int virtio_gpu_init(struct drm_device *dev) idr_init(&vgdev->ctx_id_idr); spin_lock_init(&vgdev->resource_idr_lock); idr_init(&vgdev->resource_idr); + spin_lock_init(&vgdev->request_idr_lock); + idr_init(&vgdev->request_idr); init_waitqueue_head(&vgdev->resp_wq); virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func); virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func); -- GitLab From 0bacdf8f4b7718d9949b1a168b4cce9b104331e8 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:45:01 -0700 Subject: [PATCH 0873/1278] CHROMIUM: drm/virtgpu: implement metadata allocation ioctl No guest side buffers are allocated with this. This just sends and recieves metadata from/to host. With this, we can expose new features to userspace. BUG=chromium:924405 TEST=compile Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1629917 Reviewed-by: Robert Tarasov Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I1c55b4f4cc0a320c35586380ec37a6e03436274c --- drivers/gpu/drm/virtio/virtgpu_drv.h | 13 +++++ drivers/gpu/drm/virtio/virtgpu_ioctl.c | 78 +++++++++++++++++++++++++- drivers/gpu/drm/virtio/virtgpu_vq.c | 66 ++++++++++++++++++++++ 3 files changed, 156 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 87133224344e..a2efe4bdb7ea 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -201,6 +201,12 @@ struct virtio_gpu_drv_cap_cache { atomic_t is_valid; }; +struct virtio_gpu_allocation_metadata_response { + bool callback_done; + struct virtio_gpu_resp_allocation_metadata info; + uint32_t response_data[]; +}; + struct virtio_gpu_device { struct device *dev; struct drm_device *ddev; @@ -379,6 +385,13 @@ void virtio_gpu_cmd_resource_v2_unref(struct virtio_gpu_device *vgdev, uint32_t resource_id, struct virtio_gpu_fence *fence); +int +virtio_gpu_cmd_allocation_metadata(struct virtio_gpu_device *vgdev, + uint32_t request_id, + uint32_t request_size, + uint32_t response_size, + void *request, + struct virtio_gpu_fence *fence); void virtio_gpu_ctrl_ack(struct virtqueue *vq); void virtio_gpu_cursor_ack(struct virtqueue *vq); void virtio_gpu_fence_ack(struct virtqueue *vq); diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 21a16df5e7ee..1850619a5d58 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -694,13 +694,89 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, static int virtio_gpu_allocation_metadata_request_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + void *request; + uint32_t request_id; + struct drm_virtgpu_allocation_metadata_request *amr = data; + struct virtio_gpu_device *vgdev = dev->dev_private; + struct virtio_gpu_allocation_metadata_response *response; + void __user *params = u64_to_user_ptr(amr->request); + + if (!amr->request_size) + return -EINVAL; + + request = kzalloc(amr->request_size, GFP_KERNEL); + if (!request) { + return -ENOMEM; + } + + if (copy_from_user(request, params, + amr->request_size)) { + kfree(request); + return -EFAULT; + } + + if (amr->response_size) { + response = kzalloc(sizeof(struct virtio_gpu_allocation_metadata_response) + + amr->response_size, GFP_KERNEL); + if (!response) { + kfree(request); + return -ENOMEM; + } + + response->callback_done = false; + idr_preload(GFP_KERNEL); + spin_lock(&vgdev->request_idr_lock); + request_id = idr_alloc(&vgdev->request_idr, response, 1, 0, + GFP_NOWAIT); + spin_unlock(&vgdev->request_idr_lock); + idr_preload_end(); + amr->request_id = request_id; + } + + virtio_gpu_cmd_allocation_metadata(vgdev, request_id, + amr->request_size, + amr->response_size, + request, + NULL); return 0; } static int virtio_gpu_allocation_metadata_response_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - return 0; + int ret = -EINVAL; + struct virtio_gpu_allocation_metadata_response *response; + struct virtio_gpu_device *vgdev = dev->dev_private; + struct drm_virtgpu_allocation_metadata_response *rcr = data; + void __user *user_data = u64_to_user_ptr(rcr->response); + + spin_lock(&vgdev->request_idr_lock); + response = idr_find(&vgdev->request_idr, rcr->request_id); + spin_unlock(&vgdev->request_idr_lock); + + if (!response) + goto out; + + ret = wait_event_interruptible(vgdev->resp_wq, + response->callback_done); + if (ret) + goto out_remove; + + if (copy_to_user(user_data, &response->response_data, + rcr->response_size)) { + ret = -EFAULT; + goto out_remove; + } + + ret = 0; + +out_remove: + spin_lock(&vgdev->request_idr_lock); + response = idr_remove(&vgdev->request_idr, rcr->request_id); + spin_unlock(&vgdev->request_idr_lock); + kfree(response); +out: + return ret; } struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index cf2419515119..0165722af485 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -937,6 +937,31 @@ static void virtio_gpu_cmd_resource_create_cb(struct virtio_gpu_device *vgdev, wake_up_all(&vgdev->resp_wq); } +static void virtio_gpu_cmd_allocation_metadata_cb(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf) +{ + struct virtio_gpu_allocation_metadata_response *response; + struct virtio_gpu_resp_allocation_metadata *resp = + (struct virtio_gpu_resp_allocation_metadata *)vbuf->resp_buf; + uint32_t resp_type = le32_to_cpu(resp->hdr.type); + uint32_t handle = le32_to_cpu(resp->request_id); + size_t total_size = sizeof(struct virtio_gpu_resp_allocation_metadata) + + le32_to_cpu(resp->response_size); + + spin_lock(&vgdev->request_idr_lock); + response = idr_find(&vgdev->request_idr, handle); + spin_unlock(&vgdev->request_idr_lock); + + if (!response) + return; + + if (resp_type == VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA) + memcpy(&response->info, resp, total_size); + + response->callback_done = true; + wake_up_all(&vgdev->resp_wq); +} + int virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, @@ -981,6 +1006,47 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, return 0; } +int +virtio_gpu_cmd_allocation_metadata(struct virtio_gpu_device *vgdev, + uint32_t request_id, + uint32_t request_size, + uint32_t response_size, + void *request, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_vbuffer *vbuf; + struct virtio_gpu_allocation_metadata *cmd_p; + + if (response_size) { + struct virtio_gpu_resp_allocation_metadata *resp_buf; + size_t resp_size = sizeof(struct virtio_gpu_resp_allocation_metadata) + + response_size; + resp_buf = kzalloc(resp_size, GFP_KERNEL); + if (!resp_buf) + return -ENOMEM; + + cmd_p = virtio_gpu_alloc_cmd_resp(vgdev, + &virtio_gpu_cmd_allocation_metadata_cb, &vbuf, + sizeof(*cmd_p), resp_size, + resp_buf); + resp_buf->request_id = cpu_to_le32(request_id); + } else { + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + } + + memset(cmd_p, 0, sizeof(*cmd_p)); + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_ALLOCATION_METADATA); + cmd_p->request_id = cpu_to_le32(request_id); + cmd_p->request_size = request_size; + cmd_p->response_size = response_size; + + vbuf->data_buf = request; + vbuf->data_size = request_size; + + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + return 0; +} + void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, uint32_t ctx_id, -- GitLab From f5c353b091839f4cdcc7f11090cf5d3514f6f70c Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:49:07 -0700 Subject: [PATCH 0874/1278] CHROMIUM: drm/virtgpu: fix various warnings >> drivers/gpu/drm/virtio/virtgpu_ioctl.c:688:9: warning: 'ret' may be used uninitialized in this function [-Wmaybe-uninitialized] >> drivers/gpu/drm/virtio/virtgpu_ioctl.c:742:2: warning: 'request_id' may be used uninitialized in this function [-Wmaybe-uninitialized] >> drivers/gpu/drm/virtio/virtgpu_vq.c:1097:30: sparse: expected restricted __le32 [usertype] response_size BUG=chromium:924405 TEST=glxgears on Crostini Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1818731 Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Reviewed-by: David Riley Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: Ie5d93558d597dabb2dd673248aa8a1301913947c --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 ++--- drivers/gpu/drm/virtio/virtgpu_vq.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 1850619a5d58..360de6fbd00f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -593,7 +593,7 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); void __user *args = u64_to_user_ptr(rc_v2->args); - total_size = offset = 0; + ret = total_size = offset = 0; params.size = rc_v2->size; params.guest_memory_type = rc_v2->guest_memory_type; params.resource_v2 = true; @@ -604,7 +604,6 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, return PTR_ERR(obj); if (!obj->pages) { - int ret; ret = virtio_gpu_object_get_sg_table(vgdev, obj); if (ret) goto err_free_obj; @@ -695,7 +694,7 @@ static int virtio_gpu_allocation_metadata_request_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { void *request; - uint32_t request_id; + uint32_t request_id = 0; struct drm_virtgpu_allocation_metadata_request *amr = data; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_allocation_metadata_response *response; diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 0165722af485..a0479a96a1b6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -1037,8 +1037,8 @@ virtio_gpu_cmd_allocation_metadata(struct virtio_gpu_device *vgdev, memset(cmd_p, 0, sizeof(*cmd_p)); cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_ALLOCATION_METADATA); cmd_p->request_id = cpu_to_le32(request_id); - cmd_p->request_size = request_size; - cmd_p->response_size = response_size; + cmd_p->request_size = cpu_to_le32(request_size); + cmd_p->response_size = cpu_to_le32(response_size); vbuf->data_buf = request; vbuf->data_size = request_size; -- GitLab From c2d015d2d8a6e1575369f4cf3de867bceef6e2d4 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:53:41 -0700 Subject: [PATCH 0875/1278] CHROMIUM: drm/virtgpu: add legacy VIRTIO_GPU_* values for non-upstream variants Upstream is foolishly using values for VIRTIO_GPU_RESP_OK_* that we are already using in crosvm's virtio-gpu impl. In order to work around this, this change renumbers to values that are unlikely to collide with existing ones, and renaming the existing ones as *_LEGACY so that the kernel may be compatible with both old and new versions of crosvm. BUG=chromium:1047867 TEST=glxgears on crostini Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2051223 Reviewed-by: Daniel Verkamp Tested-by: Zach Reizner Commit-Queue: Daniel Verkamp Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: I40d37c877d13392cc26274561f13684aabc54a23 --- drivers/gpu/drm/virtio/virtgpu_vq.c | 12 ++++++++++-- include/uapi/linux/virtio_gpu.h | 10 ++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index a0479a96a1b6..305705a3a05a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -920,8 +920,13 @@ static void virtio_gpu_cmd_resource_create_cb(struct virtio_gpu_device *vgdev, */ vbuf->data_buf = NULL; - if (resp_type != VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO) + switch (resp_type) { + case VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO: + case VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY: + break; + default: goto finish_pending; + } obj->num_planes = le32_to_cpu(resp->num_planes); obj->format_modifier = le64_to_cpu(resp->format_modifier); @@ -955,8 +960,11 @@ static void virtio_gpu_cmd_allocation_metadata_cb(struct virtio_gpu_device *vgde if (!response) return; - if (resp_type == VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA) + switch (resp_type) { + case VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA: + case VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA_LEGACY: memcpy(&response->info, resp, total_size); + } response->callback_done = true; wake_up_all(&vgdev->resp_wq); diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index c6353769821e..f483dff166e3 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -103,8 +103,14 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, - VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO, - VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA, + + /* CHROMIUM: legacy responses */ + VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY = 0x1104, + VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA_LEGACY = 0x1106, + + /* CHROMIUM: success responses */ + VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO = 0x11FF, + VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA = 0x11FE, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, -- GitLab From 8c6da983ea97c2d9ee39205e4e6541928b5b53e1 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 11:56:27 -0700 Subject: [PATCH 0876/1278] CHROMIUM: virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature This feature allows the guest to request a UUID from the host for a particular virtio_gpu resource. The UUID can then be shared with other virtio devices, to allow the other host devices to access the virtio_gpu's corresponding host resource. Signed-off-by: David Stevens (am from https://patchwork.kernel.org/patch/11431391/) Fixes: - Renumber out-of-tree virtio_gpu resource v2 feature bits. Although that is technically a breaking change with respect to crosvm, all usage of the bits is currently hidden behind flags which aren't enabled outside of local builds. BUG=b:136269340 TEST=boot ARCVM and launch play store Signed-off-by: Keiichi Watanabe Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2090952 Reviewed-by: Gurchetan Singh Bug: 153580313 Signed-off-by: Lingfeng Yang Change-Id: Ie48129c56b521ded1d33c4b4d3ea6ae05104447d --- include/uapi/linux/virtio_gpu.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index f483dff166e3..3c3bb3342b7b 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -50,20 +50,24 @@ * VIRTIO_GPU_CMD_GET_EDID */ #define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 /* * VIRTIO_GPU_CMD_ALLOCATION_METADATA * VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 */ -#define VIRTIO_GPU_F_RESOURCE_V2 2 +#define VIRTIO_GPU_F_RESOURCE_V2 3 /* * Ability to turn guest pages into host buffers. */ -#define VIRTIO_GPU_F_SHARED_GUEST 3 +#define VIRTIO_GPU_F_SHARED_GUEST 4 /* * Can inject host pages into guest. */ -#define VIRTIO_GPU_F_HOST_COHERENT 4 +#define VIRTIO_GPU_F_HOST_COHERENT 5 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -79,6 +83,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -103,6 +108,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, /* CHROMIUM: legacy responses */ VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY = 0x1104, @@ -441,4 +447,17 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + #endif -- GitLab From 4d701a3899580b291122ab7b147bc20981afd349 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 16 Apr 2020 12:19:08 -0700 Subject: [PATCH 0877/1278] CHROMIUM: drm/virtio: rebase zero-copy patches to virgl/drm-misc-next * Adds RESOURCE_MAP/RESOURCE_UNMAP * Removes guest_memory_type/guest_caching_type in favor of a bitmask * Removes EXECBUFFER_v2 until Q3 * Renames HOST_COHERENT to HOST_VISIBLE BUG=chromium:924405 TEST=compile Test: - dEQP-VK.smoke* pass w/ gfxstream and host coherent memory enabled - launch_cvd with 2d, virgl, and gfxstream modes work with current - launch_cvd with 2d, virgl, and gfxstream modes work w/ crosvm modified for host coherent memory (https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2035595) Signed-off-by: Lingfeng Yang Bug: 153580313 Change-Id: I04052c3d164c77c713bbc7251c357fd43653fa50 --- drivers/gpu/drm/virtio/virtgpu_debugfs.c | 5 +- drivers/gpu/drm/virtio/virtgpu_drv.c | 5 +- drivers/gpu/drm/virtio/virtgpu_drv.h | 54 +++--- drivers/gpu/drm/virtio/virtgpu_gem.c | 4 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 186 ++++++--------------- drivers/gpu/drm/virtio/virtgpu_kms.c | 13 +- drivers/gpu/drm/virtio/virtgpu_object.c | 37 +++-- drivers/gpu/drm/virtio/virtgpu_ttm.c | 8 +- drivers/gpu/drm/virtio/virtgpu_vq.c | 199 ++++++++++------------- include/uapi/drm/virtgpu_drm.h | 73 +++------ include/uapi/linux/virtio_gpu.h | 146 ++++++++--------- 11 files changed, 280 insertions(+), 450 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c index 7ff140c6ef3c..0e29f1ce3f69 100644 --- a/drivers/gpu/drm/virtio/virtgpu_debugfs.c +++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c @@ -47,9 +47,8 @@ static int virtio_gpu_features(struct seq_file *m, void *data) virtio_add_bool(m, "virgl", vgdev->has_virgl_3d); virtio_add_bool(m, "edid", vgdev->has_edid); - virtio_add_bool(m, "resource v2", vgdev->has_resource_v2); - virtio_add_bool(m, "shared guest", vgdev->has_shared_guest); - virtio_add_bool(m, "host coherent", vgdev->has_host_coherent); + virtio_add_bool(m, "resource blob", vgdev->has_resource_blob); + virtio_add_bool(m, "host visible", vgdev->has_host_visible); virtio_add_int(m, "cap sets", vgdev->num_capsets); virtio_add_int(m, "scanouts", vgdev->num_scanouts); return 0; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index a947a1901030..a58e9d52c07e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -178,9 +178,8 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, - VIRTIO_GPU_F_RESOURCE_V2, - VIRTIO_GPU_F_SHARED_GUEST, - VIRTIO_GPU_F_HOST_COHERENT, + VIRTIO_GPU_F_RESOURCE_BLOB, + VIRTIO_GPU_F_HOST_VISIBLE, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index a2efe4bdb7ea..cb4ec764b522 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -58,9 +58,8 @@ struct virtio_gpu_object_params { bool dumb; /* 3d */ bool virgl; - bool resource_v2; - enum virtio_gpu_memory_type guest_memory_type; - enum virtio_gpu_caching_type caching_type; + bool blob; + uint32_t blob_flags; uint32_t target; uint32_t bind; uint32_t depth; @@ -85,14 +84,13 @@ struct virtio_gpu_object { uint32_t mapped; void *vmap; bool dumb; - bool resource_v2; + bool blob; struct ttm_place placement_code; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; bool created; - enum virtio_gpu_memory_type guest_memory_type; - enum virtio_gpu_caching_type caching_type; + uint32_t blob_flags; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, gem_base) @@ -201,12 +199,6 @@ struct virtio_gpu_drv_cap_cache { atomic_t is_valid; }; -struct virtio_gpu_allocation_metadata_response { - bool callback_done; - struct virtio_gpu_resp_allocation_metadata info; - uint32_t response_data[]; -}; - struct virtio_gpu_device { struct device *dev; struct drm_device *ddev; @@ -240,9 +232,8 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; - bool has_resource_v2; - bool has_shared_guest; - bool has_host_coherent; + bool has_resource_blob; + bool has_host_visible; struct work_struct config_changed_work; @@ -374,24 +365,23 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, struct virtio_gpu_object_params *params, struct virtio_gpu_fence *fence); + void -virtio_gpu_cmd_resource_create_v2(struct virtio_gpu_device *vgdev, - uint32_t resource_id, uint32_t guest_memory_type, - uint32_t caching_type, uint64_t size, - uint64_t pci_addr, uint32_t nents, - uint32_t args_size, void *data, uint32_t data_size, - struct virtio_gpu_fence *fence); -void -virtio_gpu_cmd_resource_v2_unref(struct virtio_gpu_device *vgdev, - uint32_t resource_id, - struct virtio_gpu_fence *fence); -int -virtio_gpu_cmd_allocation_metadata(struct virtio_gpu_device *vgdev, - uint32_t request_id, - uint32_t request_size, - uint32_t response_size, - void *request, - struct virtio_gpu_fence *fence); +virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint32_t ctx_id, uint32_t flags, + uint64_t size, uint64_t memory_id, + uint32_t nents, + struct virtio_gpu_mem_entry *ents); + +void virtio_gpu_cmd_map(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint64_t offset, + struct virtio_gpu_fence *fence); + +void virtio_gpu_cmd_unmap(struct virtio_gpu_device *vgdev, + uint32_t resource_id); + void virtio_gpu_ctrl_ack(struct virtqueue *vq); void virtio_gpu_cursor_ack(struct virtqueue *vq); void virtio_gpu_fence_ack(struct virtqueue *vq); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index ce6e4ec4a432..a607bd154e38 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -134,7 +134,7 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj, struct virtio_gpu_object *qobj = gem_to_virtio_gpu_obj(obj); int r; - if (!vgdev->has_virgl_3d || qobj->resource_v2) + if (!vgdev->has_virgl_3d) return 0; r = virtio_gpu_object_reserve(qobj, false); @@ -155,7 +155,7 @@ void virtio_gpu_gem_object_close(struct drm_gem_object *obj, struct virtio_gpu_object *qobj = gem_to_virtio_gpu_obj(obj); int r; - if (!vgdev->has_virgl_3d || qobj->resource_v2) + if (!vgdev->has_virgl_3d) return; r = virtio_gpu_object_reserve(qobj, false); diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 360de6fbd00f..df0a86e3562c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -261,6 +261,12 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data, case VIRTGPU_PARAM_CAPSET_QUERY_FIX: value = 1; break; + case VIRTGPU_PARAM_RESOURCE_BLOB: + value = vgdev->has_resource_blob == true ? 1 : 0; + break; + case VIRTGPU_PARAM_HOST_VISIBLE: + value = vgdev->has_host_visible == true ? 1 : 0; + break; default: return -EINVAL; } @@ -575,29 +581,44 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, return 0; } -static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, +static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { void *buf; int ret, si, nents; uint32_t handle = 0; - uint64_t pci_addr = 0; struct scatterlist *sg; - size_t total_size, offset; struct virtio_gpu_object *obj; struct virtio_gpu_fence *fence; struct virtio_gpu_mem_entry *ents; - struct drm_virtgpu_resource_create_v2 *rc_v2 = data; + struct drm_virtgpu_resource_create_blob *rc_blob = data; struct virtio_gpu_object_params params = { 0 }; struct virtio_gpu_device *vgdev = dev->dev_private; + struct virtio_gpu_fpriv *vfpriv = file->driver_priv; bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); - void __user *args = u64_to_user_ptr(rc_v2->args); + bool mappable = rc_blob->flags & VIRTGPU_RES_BLOB_USE_MAPPABLE; + bool guest = rc_blob->flags & VIRTGPU_RES_BLOB_GUEST_MASK; + + params.size = rc_blob->size; + params.blob_flags = rc_blob->flags; + params.blob = true; + + if (rc_blob->cmd_size && vfpriv) { + void *buf; + void __user *cmd = u64_to_user_ptr(rc_blob->cmd); + + buf = kzalloc(rc_blob->cmd_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, cmd, rc_blob->cmd_size)) { + kfree(buf); + return -EFAULT; + } - ret = total_size = offset = 0; - params.size = rc_v2->size; - params.guest_memory_type = rc_v2->guest_memory_type; - params.resource_v2 = true; - params.caching_type = rc_v2->caching_type; + virtio_gpu_cmd_submit(vgdev, buf, rc_blob->cmd_size, + vfpriv->ctx_id, NULL); + } obj = virtio_gpu_alloc_object(dev, ¶ms, NULL); if (IS_ERR(obj)) @@ -609,7 +630,7 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, goto err_free_obj; } - if (rc_v2->guest_memory_type == VIRTGPU_MEMORY_HOST_COHERENT) { + if (!guest) { nents = 0; } else if (use_dma_api) { obj->mapped = dma_map_sg(vgdev->vdev->dev.parent, @@ -620,34 +641,14 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, nents = obj->pages->nents; } - total_size = nents * sizeof(struct virtio_gpu_mem_entry) + - rc_v2->args_size; - - buf = kzalloc(total_size, GFP_KERNEL); - if (!buf) { - ret = -ENOMEM; - goto err_free_obj; - } - - ents = buf; - if (rc_v2->guest_memory_type == VIRTGPU_MEMORY_HOST_COHERENT) { - pci_addr = vgdev->caddr + obj->tbo.offset; - } else { + ents = kzalloc(nents * sizeof(struct virtio_gpu_mem_entry), GFP_KERNEL); + if (guest) { for_each_sg(obj->pages->sgl, sg, nents, si) { ents[si].addr = cpu_to_le64(use_dma_api ? sg_dma_address(sg) : sg_phys(sg)); ents[si].length = cpu_to_le32(sg->length); ents[si].padding = 0; - offset += sizeof(struct virtio_gpu_mem_entry); - } - } - - if (rc_v2->args_size) { - if (copy_from_user(buf + offset, args, - rc_v2->args_size)) { - ret = -EFAULT; - goto err_free_buf; } } @@ -657,15 +658,18 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, goto err_free_buf; } + virtio_gpu_cmd_resource_create_blob(vgdev, obj, vfpriv->ctx_id, + rc_blob->flags, rc_blob->size, + rc_blob->memory_id, nents, + ents); + ret = drm_gem_handle_create(file, &obj->gem_base, &handle); if (ret) goto err_fence_put; - virtio_gpu_cmd_resource_create_v2(vgdev, obj->hw_res_handle, - rc_v2->guest_memory_type, - rc_v2->caching_type, rc_v2->size, - pci_addr, nents, rc_v2->args_size, - buf, total_size, fence); + if (!guest && mappable) { + virtio_gpu_cmd_map(vgdev, obj, obj->tbo.offset, fence); + } /* * No need to call virtio_gpu_object_reserve since the buffer is not @@ -677,8 +681,8 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, dma_fence_put(&fence->f); drm_gem_object_put_unlocked(&obj->gem_base); - rc_v2->resource_id = obj->hw_res_handle; - rc_v2->gem_handle = handle; + rc_blob->res_handle = obj->hw_res_handle; + rc_blob->bo_handle = handle; return 0; err_fence_put: @@ -690,94 +694,6 @@ static int virtio_gpu_resource_create_v2_ioctl(struct drm_device *dev, return ret; } -static int virtio_gpu_allocation_metadata_request_ioctl(struct drm_device *dev, - void *data, struct drm_file *file) -{ - void *request; - uint32_t request_id = 0; - struct drm_virtgpu_allocation_metadata_request *amr = data; - struct virtio_gpu_device *vgdev = dev->dev_private; - struct virtio_gpu_allocation_metadata_response *response; - void __user *params = u64_to_user_ptr(amr->request); - - if (!amr->request_size) - return -EINVAL; - - request = kzalloc(amr->request_size, GFP_KERNEL); - if (!request) { - return -ENOMEM; - } - - if (copy_from_user(request, params, - amr->request_size)) { - kfree(request); - return -EFAULT; - } - - if (amr->response_size) { - response = kzalloc(sizeof(struct virtio_gpu_allocation_metadata_response) + - amr->response_size, GFP_KERNEL); - if (!response) { - kfree(request); - return -ENOMEM; - } - - response->callback_done = false; - idr_preload(GFP_KERNEL); - spin_lock(&vgdev->request_idr_lock); - request_id = idr_alloc(&vgdev->request_idr, response, 1, 0, - GFP_NOWAIT); - spin_unlock(&vgdev->request_idr_lock); - idr_preload_end(); - amr->request_id = request_id; - } - - virtio_gpu_cmd_allocation_metadata(vgdev, request_id, - amr->request_size, - amr->response_size, - request, - NULL); - return 0; -} - -static int virtio_gpu_allocation_metadata_response_ioctl(struct drm_device *dev, - void *data, struct drm_file *file) -{ - int ret = -EINVAL; - struct virtio_gpu_allocation_metadata_response *response; - struct virtio_gpu_device *vgdev = dev->dev_private; - struct drm_virtgpu_allocation_metadata_response *rcr = data; - void __user *user_data = u64_to_user_ptr(rcr->response); - - spin_lock(&vgdev->request_idr_lock); - response = idr_find(&vgdev->request_idr, rcr->request_id); - spin_unlock(&vgdev->request_idr_lock); - - if (!response) - goto out; - - ret = wait_event_interruptible(vgdev->resp_wq, - response->callback_done); - if (ret) - goto out_remove; - - if (copy_to_user(user_data, &response->response_data, - rcr->response_size)) { - ret = -EFAULT; - goto out_remove; - } - - ret = 0; - -out_remove: - spin_lock(&vgdev->request_idr_lock); - response = idr_remove(&vgdev->request_idr, rcr->request_id); - spin_unlock(&vgdev->request_idr_lock); - kfree(response); -out: - return ret; -} - struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { DRM_IOCTL_DEF_DRV(VIRTGPU_MAP, virtio_gpu_map_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), @@ -811,15 +727,7 @@ struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { DRM_IOCTL_DEF_DRV(VIRTGPU_GET_CAPS, virtio_gpu_get_caps_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE_V2, - virtio_gpu_resource_create_v2_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), - - DRM_IOCTL_DEF_DRV(VIRTGPU_ALLOCATION_METADATA_REQUEST, - virtio_gpu_allocation_metadata_request_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), - - DRM_IOCTL_DEF_DRV(VIRTGPU_ALLOCATION_METADATA_RESPONSE, - virtio_gpu_allocation_metadata_response_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE_BLOB, + virtio_gpu_resource_create_blob_ioctl, + DRM_RENDER_ALLOW) }; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index dff2cd32f783..22434e34b4d8 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -184,8 +184,8 @@ int virtio_gpu_init(struct drm_device *dev) DRM_INFO("EDID support available.\n"); } - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_V2)) { - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_HOST_COHERENT)) { + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_BLOB)) { + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_HOST_VISIBLE)) { vgdev->cbar = 4; vgdev->caddr = pci_resource_start(dev->pdev, vgdev->cbar); vgdev->csize = pci_resource_len(dev->pdev, vgdev->cbar); @@ -197,14 +197,13 @@ int virtio_gpu_init(struct drm_device *dev) "at 0x%lx, size %ld MB", dev_name(&dev->pdev->dev), vgdev->cbar, vgdev->caddr, vgdev->csize >> 20); - vgdev->has_host_coherent = true; + vgdev->has_host_visible = true; } } - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_SHARED_GUEST)) - vgdev->has_shared_guest = true; - - vgdev->has_resource_v2 = true; + vgdev->has_resource_blob = true; + DRM_INFO("resource_v2: %u, host visible %u\n", + vgdev->has_resource_blob, vgdev->has_host_visible); } ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL); diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index ef25eacf7fb7..67d52189cfb8 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -28,6 +28,7 @@ #include #include "virtgpu_drv.h" +#include static int virtio_gpu_virglrenderer_workaround = 1; module_param_named(virglhack, virtio_gpu_virglrenderer_workaround, int, 0400); @@ -93,44 +94,47 @@ static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) kfree(bo); } +// define internally for testing purposes +#define VIRTGPU_RESOURCE_CACHE_MASK 0xf000 +#define VIRTGPU_RESOURCE_CACHE_CACHED 0x1000 +#define VIRTGPU_RESOURCE_CACHE_UNCACHED 0x2000 +#define VIRTGPU_RESOURCE_CACHE_WC 0x3000 + static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo) { u32 c = 1; u32 ttm_caching_flags = 0; + u32 cache_type = (vgbo->blob_flags & VIRTGPU_RESOURCE_CACHE_MASK); + u32 guest = (vgbo->blob_flags & VIRTGPU_RES_BLOB_GUEST_MASK); + vgbo->placement.placement = &vgbo->placement_code; vgbo->placement.busy_placement = &vgbo->placement_code; vgbo->placement_code.fpfn = 0; vgbo->placement_code.lpfn = 0; - switch (vgbo->caching_type) { - case VIRTIO_GPU_CACHED: + switch (cache_type) { + case VIRTGPU_RESOURCE_CACHE_CACHED: ttm_caching_flags = TTM_PL_FLAG_CACHED; break; - case VIRTIO_GPU_WRITE_COMBINE: + case VIRTGPU_RESOURCE_CACHE_WC: ttm_caching_flags = TTM_PL_FLAG_WC; break; - case VIRTIO_GPU_UNCACHED: + case VIRTGPU_RESOURCE_CACHE_UNCACHED: ttm_caching_flags = TTM_PL_FLAG_UNCACHED; break; default: ttm_caching_flags = TTM_PL_MASK_CACHING; } - - switch (vgbo->guest_memory_type) { - case VIRTIO_GPU_MEMORY_UNDEFINED: - case VIRTIO_GPU_MEMORY_TRANSFER: - case VIRTIO_GPU_MEMORY_SHARED_GUEST: + if (!guest && vgbo->blob) { vgbo->placement_code.flags = - TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | + ttm_caching_flags | TTM_PL_FLAG_VRAM | TTM_PL_FLAG_NO_EVICT; - break; - case VIRTIO_GPU_MEMORY_HOST_COHERENT: + } else { vgbo->placement_code.flags = - ttm_caching_flags | TTM_PL_FLAG_VRAM | + TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | TTM_PL_FLAG_NO_EVICT; - break; } vgbo->placement.num_placement = c; vgbo->placement.num_busy_placement = c; @@ -167,9 +171,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, return ret; } bo->dumb = params->dumb; - bo->resource_v2 = params->resource_v2; - bo->guest_memory_type = params->guest_memory_type; - bo->caching_type = params->caching_type; + bo->blob = params->blob; + bo->blob_flags = params->blob_flags; if (params->virgl) { virtio_gpu_cmd_resource_create_3d(vgdev, bo, params, fence); diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index f381a477cd00..5202fc4f51cd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -280,7 +280,7 @@ static int virtio_gpu_ttm_vram_unbind(struct ttm_tt *ttm) virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); struct virtio_gpu_object *obj = gtt->obj; - virtio_gpu_cmd_resource_v2_unref(vgdev, obj->hw_res_handle, NULL); + virtio_gpu_cmd_unmap(vgdev, obj->hw_res_handle); return 0; } @@ -349,6 +349,7 @@ static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, struct virtio_gpu_device *vgdev; struct virtio_gpu_object *obj; struct virtio_gpu_ttm_tt *gtt; + uint32_t guest; vgdev = virtio_gpu_get_vgdev(bo->bdev); obj = container_of(bo, struct virtio_gpu_object, tbo); @@ -357,8 +358,9 @@ static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, if (gtt == NULL) return NULL; gtt->obj = obj; + guest = (obj->blob_flags & VIRTGPU_RES_BLOB_GUEST_MASK); - if (obj->guest_memory_type == VIRTIO_GPU_MEMORY_HOST_COHERENT) { + if (!guest && obj->blob) { gtt->ttm.ttm.func = &virtio_gpu_vram_func; if (ttm_tt_init(>t->ttm.ttm, bo->bdev, size, page_flags, dummy_read_page)) { @@ -426,7 +428,7 @@ int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev) goto err_mm_init; } - if (vgdev->has_host_coherent) { + if (vgdev->has_host_visible) { r = ttm_bo_init_mm(&vgdev->mman.bdev, TTM_PL_VRAM, vgdev->csize >> PAGE_SHIFT); if (r) { diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 305705a3a05a..cb54e1ac5343 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -532,54 +532,6 @@ virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev, virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); } -void -virtio_gpu_cmd_resource_create_v2(struct virtio_gpu_device *vgdev, - uint32_t resource_id, - uint32_t guest_memory_type, - uint32_t caching_type, uint64_t size, - uint64_t pci_addr, uint32_t nents, - uint32_t args_size, void *data, - uint32_t data_size, - struct virtio_gpu_fence *fence) -{ - struct virtio_gpu_resource_create_v2 *cmd_p; - struct virtio_gpu_vbuffer *vbuf; - - cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); - memset(cmd_p, 0, sizeof(*cmd_p)); - - cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_V2); - cmd_p->resource_id = cpu_to_le32(resource_id); - cmd_p->guest_memory_type = cpu_to_le32(guest_memory_type); - cmd_p->caching_type = cpu_to_le32(caching_type); - cmd_p->size = cpu_to_le64(size); - cmd_p->pci_addr = cpu_to_le64(pci_addr); - cmd_p->args_size = cpu_to_le32(args_size); - cmd_p->nr_entries = cpu_to_le32(nents); - - vbuf->data_buf = data; - vbuf->data_size = data_size; - - virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); -} - -void -virtio_gpu_cmd_resource_v2_unref(struct virtio_gpu_device *vgdev, - uint32_t resource_id, - struct virtio_gpu_fence *fence) -{ - struct virtio_gpu_resource_v2_unref *cmd_p; - struct virtio_gpu_vbuffer *vbuf; - - cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); - memset(cmd_p, 0, sizeof(*cmd_p)); - - cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_V2_UNREF); - cmd_p->resource_id = cpu_to_le32(resource_id); - - virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); -} - static void virtio_gpu_cmd_get_display_info_cb(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) { @@ -942,34 +894,6 @@ static void virtio_gpu_cmd_resource_create_cb(struct virtio_gpu_device *vgdev, wake_up_all(&vgdev->resp_wq); } -static void virtio_gpu_cmd_allocation_metadata_cb(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) -{ - struct virtio_gpu_allocation_metadata_response *response; - struct virtio_gpu_resp_allocation_metadata *resp = - (struct virtio_gpu_resp_allocation_metadata *)vbuf->resp_buf; - uint32_t resp_type = le32_to_cpu(resp->hdr.type); - uint32_t handle = le32_to_cpu(resp->request_id); - size_t total_size = sizeof(struct virtio_gpu_resp_allocation_metadata) + - le32_to_cpu(resp->response_size); - - spin_lock(&vgdev->request_idr_lock); - response = idr_find(&vgdev->request_idr, handle); - spin_unlock(&vgdev->request_idr_lock); - - if (!response) - return; - - switch (resp_type) { - case VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA: - case VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA_LEGACY: - memcpy(&response->info, resp, total_size); - } - - response->callback_done = true; - wake_up_all(&vgdev->resp_wq); -} - int virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, @@ -1014,47 +938,6 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, return 0; } -int -virtio_gpu_cmd_allocation_metadata(struct virtio_gpu_device *vgdev, - uint32_t request_id, - uint32_t request_size, - uint32_t response_size, - void *request, - struct virtio_gpu_fence *fence) -{ - struct virtio_gpu_vbuffer *vbuf; - struct virtio_gpu_allocation_metadata *cmd_p; - - if (response_size) { - struct virtio_gpu_resp_allocation_metadata *resp_buf; - size_t resp_size = sizeof(struct virtio_gpu_resp_allocation_metadata) + - response_size; - resp_buf = kzalloc(resp_size, GFP_KERNEL); - if (!resp_buf) - return -ENOMEM; - - cmd_p = virtio_gpu_alloc_cmd_resp(vgdev, - &virtio_gpu_cmd_allocation_metadata_cb, &vbuf, - sizeof(*cmd_p), resp_size, - resp_buf); - resp_buf->request_id = cpu_to_le32(request_id); - } else { - cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); - } - - memset(cmd_p, 0, sizeof(*cmd_p)); - cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_ALLOCATION_METADATA); - cmd_p->request_id = cpu_to_le32(request_id); - cmd_p->request_size = cpu_to_le32(request_size); - cmd_p->response_size = cpu_to_le32(response_size); - - vbuf->data_buf = request; - vbuf->data_size = request_size; - - virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); - return 0; -} - void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, uint32_t ctx_id, @@ -1135,6 +1018,9 @@ int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev, struct scatterlist *sg; int si, nents; + if (obj->blob) + return 0; + if (WARN_ON_ONCE(!obj->created)) return -EINVAL; @@ -1210,3 +1096,82 @@ void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev, memcpy(cur_p, &output->cursor, sizeof(output->cursor)); virtio_gpu_queue_cursor(vgdev, vbuf); } + +static void virtio_gpu_cmd_resource_map_cb(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf) +{ + /* + * No-op for v5.4. + */ +} + +void virtio_gpu_cmd_map(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint64_t offset, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_resource_map *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + struct virtio_gpu_resp_map_info *resp_buf; + + resp_buf = kzalloc(sizeof(*resp_buf), GFP_KERNEL); + if (!resp_buf) { + DRM_ERROR("allocation failure\n"); + return; + } + + cmd_p = virtio_gpu_alloc_cmd_resp(vgdev, + virtio_gpu_cmd_resource_map_cb, &vbuf, sizeof(*cmd_p), + sizeof(struct virtio_gpu_resp_map_info), resp_buf); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_MAP); + cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); + cmd_p->offset = offset; + + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); +} + +void virtio_gpu_cmd_unmap(struct virtio_gpu_device *vgdev, + uint32_t resource_id) +{ + struct virtio_gpu_resource_unmap *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_UNMAP); + cmd_p->resource_id = cpu_to_le32(resource_id); + + virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); +} + +void +virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint32_t ctx_id, uint32_t flags, + uint64_t size, uint64_t memory_id, + uint32_t nents, + struct virtio_gpu_mem_entry *ents) +{ + struct virtio_gpu_resource_create_blob *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB); + cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); + cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); + cmd_p->flags = cpu_to_le32(flags); + cmd_p->size = cpu_to_le64(size); + cmd_p->memory_id = cpu_to_le64(memory_id); + cmd_p->nr_entries = cpu_to_le32(nents); + + vbuf->data_buf = ents; + vbuf->data_size = sizeof(*ents) * nents; + + virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); + bo->created = true; +} diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 8211b48f2241..6b9c8a5f87e7 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -46,9 +46,7 @@ extern "C" { #define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 #define DRM_VIRTGPU_WAIT 0x08 #define DRM_VIRTGPU_GET_CAPS 0x09 -#define DRM_VIRTGPU_RESOURCE_CREATE_V2 0x0a -#define DRM_VIRTGPU_ALLOCATION_METADATA_REQUEST 0x0b -#define DRM_VIRTGPU_ALLOCATION_METADATA_RESPONSE 0x0c +#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 @@ -74,19 +72,8 @@ struct drm_virtgpu_execbuffer { #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ -#define VIRTGPU_PARAM_RESOURCE_V2 3 -#define VIRTGPU_PARAM_SHARED_GUEST 4 -#define VIRTGPU_PARAM_HOST_COHERENT 5 - -#define VIRTGPU_MEMORY_UNDEFINED 0 -#define VIRTGPU_MEMORY_TRANSFER 1 -#define VIRTGPU_MEMORY_SHARED_GUEST 2 -#define VIRTGPU_MEMORY_HOST_COHERENT 3 - -#define VIRTGPU_UNDEFINED_CACHING 0 -#define VIRTGPU_CACHED 1 -#define VIRTGPU_WRITE_COMBINE 2 -#define VIRTGPU_UNCACHED 3 +#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ +#define VIRTGPU_PARAM_HOST_VISIBLE 4 struct drm_virtgpu_getparam { __u64 param; @@ -162,29 +149,27 @@ struct drm_virtgpu_get_caps { __u32 pad; }; -struct drm_virtgpu_resource_create_v2 { - __u32 resource_id; - __u32 guest_memory_type; - __u32 caching_type; - __u32 args_size; - __u32 gem_handle; - __u64 size; - __u64 args; /* void */ -}; +struct drm_virtgpu_resource_create_blob { +#define VIRTGPU_RES_BLOB_GUEST_MASK 0x000f +#define VIRTGPU_RES_BLOB_GUEST_NONE 0x0000 +#define VIRTGPU_RES_BLOB_GUEST_SYSTEM 0x0001 -struct drm_virtgpu_allocation_metadata_request { - __u32 request_id; - __u32 pad; - __u32 request_size; - __u32 response_size; - __u64 request; /* void */ -}; +#define VIRTGPU_RES_BLOB_HOST_MASK 0x00f0 +#define VIRTGPU_RES_BLOB_HOST_NONE 0x0000 +#define VIRTGPU_RES_BLOB_HOST 0x0010 -struct drm_virtgpu_allocation_metadata_response { - __u32 request_id; - __u32 pad; - __u32 response_size; - __u64 response; /* void */ +#define VIRTGPU_RES_BLOB_USE_MASK 0x0f00 +#define VIRTGPU_RES_BLOB_USE_NONE 0x0000 +#define VIRTGPU_RES_BLOB_USE_MAPPABLE 0x0100 +#define VIRTGPU_RES_BLOB_USE_SHAREABLE 0x0200 +#define VIRTGPU_RES_BLOB_USE_CROSS_DEVICE 0x0400 + __u32 flags; + __u32 bo_handle; + __u32 res_handle; + __u32 cmd_size; + __u64 cmd; + __u64 size; + __u64 memory_id; }; #define DRM_IOCTL_VIRTGPU_MAP \ @@ -222,17 +207,9 @@ struct drm_virtgpu_allocation_metadata_response { DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ struct drm_virtgpu_get_caps) -#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_V2 \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_V2, \ - struct drm_virtgpu_resource_create_v2) - -#define DRM_IOCTL_VIRTGPU_ALLOCATION_METADATA_REQUEST \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_ALLOCATION_METADATA_REQUEST, \ - struct drm_virtgpu_allocation_metadata_request) - -#define DRM_IOCTL_VIRTGPU_ALLOCATION_METADATA_RESPONSE \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_ALLOCATION_METADATA_RESPONSE, \ - struct drm_virtgpu_allocation_metadata_response) +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ + struct drm_virtgpu_resource_create_blob) #if defined(__cplusplus) } diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 3c3bb3342b7b..cd303076225e 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -54,20 +54,20 @@ * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ #define VIRTIO_GPU_F_RESOURCE_UUID 2 - /* - * VIRTIO_GPU_CMD_ALLOCATION_METADATA - * VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 + * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */ -#define VIRTIO_GPU_F_RESOURCE_V2 3 +#define VIRTIO_GPU_F_RESOURCE_BLOB 3 /* - * Ability to turn guest pages into host buffers. + * VIRTIO_GPU_CMD_RESOURCE_MAP + * VIRTIO_GPU_CMD_RESOURCE_UMAP */ -#define VIRTIO_GPU_F_SHARED_GUEST 4 +#define VIRTIO_GPU_F_HOST_VISIBLE 4 /* - * Can inject host pages into guest. + * VIRTIO_GPU_CMD_CTX_CREATE_V2 */ -#define VIRTIO_GPU_F_HOST_COHERENT 5 +#define VIRTIO_GPU_F_VULKAN 5 + enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -94,9 +94,9 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D, VIRTIO_GPU_CMD_SUBMIT_3D, - VIRTIO_GPU_CMD_RESOURCE_CREATE_V2, - VIRTIO_GPU_CMD_RESOURCE_CREATE_V2_UNREF, - VIRTIO_GPU_CMD_ALLOCATION_METADATA, + VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, + VIRTIO_GPU_CMD_RESOURCE_MAP, + VIRTIO_GPU_CMD_RESOURCE_UNMAP, /* cursor commands */ VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300, @@ -109,14 +109,13 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, VIRTIO_GPU_RESP_OK_RESOURCE_UUID, + VIRTIO_GPU_RESP_OK_MAP_INFO, /* CHROMIUM: legacy responses */ VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY = 0x1104, - VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA_LEGACY = 0x1106, /* CHROMIUM: success responses */ VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO = 0x11FF, - VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA = 0x11FE, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -128,30 +127,6 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_ERR_INVALID_MEMORY_ID, }; -enum virtio_gpu_memory_type { - VIRTIO_GPU_MEMORY_UNDEFINED = 0, - - /* - * Traditional virtio-gpu memory. - * Has both host and guest side storage. - * - * VIRTIO_GPU_CMD_TRANSFER_* commands are used - * to copy between guest and host storage. - * - * Created using VIRTIO_GPU_CMD_RESOURCE_CREATE_V2. - */ - VIRTIO_GPU_MEMORY_TRANSFER, - VIRTIO_GPU_MEMORY_SHARED_GUEST, - VIRTIO_GPU_MEMORY_HOST_COHERENT, -}; - -enum virtio_gpu_caching_type { - VIRTIO_GPU_UNDEFINED_CACHING = 0, - VIRTIO_GPU_CACHED, - VIRTIO_GPU_WRITE_COMBINE, - VIRTIO_GPU_UNCACHED, -}; - #define VIRTIO_GPU_FLAG_FENCE (1 << 0) struct virtio_gpu_ctrl_hdr { @@ -287,7 +262,6 @@ struct virtio_gpu_transfer_host_3d { struct virtio_gpu_resource_create_3d { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; - /* memory_type is VIRTIO_GPU_MEMORY_TRANSFER */ __le32 target; __le32 format; __le32 bind; @@ -328,47 +302,6 @@ struct virtio_gpu_cmd_submit { __le32 padding; }; -/* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 */ -struct virtio_gpu_resource_create_v2 { - struct virtio_gpu_ctrl_hdr hdr; - __le32 resource_id; - __le32 guest_memory_type; - __le32 caching_type; - __le32 pad; - __le64 size; - __le64 pci_addr; - __le32 args_size; - __le32 nr_entries; - /* ('nr_entries' * struct virtio_gpu_mem_entry) + 'args_size' - * bytes follow here. - */ -}; - -/* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2_UNREF */ -struct virtio_gpu_resource_v2_unref { - struct virtio_gpu_ctrl_hdr hdr; - __le32 resource_id; - __le32 padding; -}; - -/* VIRTIO_GPU_CMD_RESOURCE_CREATE_V2 */ -struct virtio_gpu_allocation_metadata { - struct virtio_gpu_ctrl_hdr hdr; - __le32 request_id; - __le32 pad; - __le32 request_size; - __le32 response_size; - /* 'request_size' bytes go here */ -}; - -/* VIRTIO_GPU_RESP_OK_ALLOCATION_METADATA */ -struct virtio_gpu_resp_allocation_metadata { - struct virtio_gpu_ctrl_hdr hdr; - __le32 request_id; - __le32 response_size; - /* 'response_size' bytes go here */ -}; - #define VIRTIO_GPU_CAPSET_VIRGL 1 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ @@ -460,4 +393,59 @@ struct virtio_gpu_resp_resource_uuid { __u8 uuid[16]; }; + +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */ +struct virtio_gpu_resource_create_blob { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; +#define VIRTIO_GPU_RES_BLOB_GUEST_MASK 0x000f +#define VIRTIO_GPU_RES_BLOB_GUEST_NONE 0x0000 +#define VIRTIO_GPU_RES_BLOB_GUEST_SYSTEM 0x0001 + +#define VIRTIO_GPU_RES_BLOB_HOST_MASK 0x00f0 +#define VIRTIO_GPU_RES_BLOB_HOST_NONE 0x0000 +#define VIRTIO_GPU_RES_BLOB_HOST 0x0010 + +#define VIRTIO_GPU_RES_BLOB_USE_MASK 0x0f00 +#define VIRTIO_GPU_RES_BLOB_USE_NONE 0x0000 +#define VIRTIO_GPU_RES_BLOB_USE_MAPPABLE 0x0100 +#define VIRTIO_GPU_RES_BLOB_USE_SHAREABLE 0x0200 +#define VIRTIO_GPU_RES_BLOB_USE_CROSS_DEVICE 0x0400 + __le32 flags; + __le64 size; + __le64 memory_id; + __le32 nr_entries; + __le32 padding; + /* + * sizeof(nr_entries * virtio_gpu_mem_entry) bytes follow + */ +}; + +/* VIRTIO_GPU_CMD_RESOURCE_MAP */ +struct virtio_gpu_resource_map { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; + __le64 offset; +}; + +/* VIRTIO_GPU_RESP_OK_MAP_INFO */ +#define VIRTIO_GPU_MAP_CACHE_MASK 0x0f +#define VIRTIO_GPU_MAP_CACHE_NONE 0x00 +#define VIRTIO_GPU_MAP_CACHE_CACHED 0x01 +#define VIRTIO_GPU_MAP_CACHE_UNCACHED 0x02 +#define VIRTIO_GPU_MAP_CACHE_WC 0x03 +struct virtio_gpu_resp_map_info { + struct virtio_gpu_ctrl_hdr hdr; + __u32 map_flags; + __u32 padding; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_UNMAP */ +struct virtio_gpu_resource_unmap { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + #endif -- GitLab From 3a5930a6027c5825c408a131b2b7567ea14e5f47 Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Mon, 2 Mar 2020 15:50:44 -0800 Subject: [PATCH 0878/1278] UPSTREAM: gpu/trace: add a gpu total memory usage tracepoint This change adds the below gpu memory tracepoint: gpu_mem/gpu_mem_total: track global or proc gpu memory total usages Per process tracking of total gpu memory usage in the gem layer is not appropriate and hard to implement with trivial overhead. So for the gfx device driver layer to track total gpu memory usage both globally and per process in an easy and uniform way is to integrate the tracepoint in this patch to the underlying varied implementations of gpu memory tracking system from vendors. Putting this tracepoint in the common trace events can not only help wean the gfx drivers off of debugfs but also greatly help the downstream Android gpu vendors because debugfs is to be deprecated in the upcoming Android release. Then the gpu memory tracking of both Android kernel and the upstream linux kernel can stay closely, which can benefit the whole kernel eco-system in the long term. Link: http://lkml.kernel.org/r/20200302235044.59163-1-zzyiwei@google.com Bug: 154523335 Test: saw this tracepoint on Pixel after gfx driver integration Change-Id: Ia6f273015c656e7a286041d850e83e49fd063aa4 (cherry picked from commit bbd9d05618a6d608c72640b1d3d651a75913456a) Acked-by: Greg Kroah-Hartman Signed-off-by: Yiwei Zhang Signed-off-by: Steven Rostedt (VMware) --- drivers/Kconfig | 2 ++ drivers/gpu/Makefile | 1 + drivers/gpu/trace/Kconfig | 4 +++ drivers/gpu/trace/Makefile | 3 ++ drivers/gpu/trace/trace_gpu_mem.c | 13 +++++++ include/trace/events/gpu_mem.h | 57 +++++++++++++++++++++++++++++++ 6 files changed, 80 insertions(+) create mode 100644 drivers/gpu/trace/Kconfig create mode 100644 drivers/gpu/trace/Makefile create mode 100644 drivers/gpu/trace/trace_gpu_mem.c create mode 100644 include/trace/events/gpu_mem.h diff --git a/drivers/Kconfig b/drivers/Kconfig index 52ff5b25d146..4ac9b0379377 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -193,6 +193,8 @@ source "drivers/thunderbolt/Kconfig" source "drivers/android/Kconfig" +source "drivers/gpu/trace/Kconfig" + source "drivers/nvdimm/Kconfig" source "drivers/dax/Kconfig" diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index e9ed439a5b65..99251b5b3be3 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_TEGRA_HOST1X) += host1x/ obj-y += drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ +obj-$(CONFIG_TRACE_GPU_MEM) += trace/ diff --git a/drivers/gpu/trace/Kconfig b/drivers/gpu/trace/Kconfig new file mode 100644 index 000000000000..c24e9edd022e --- /dev/null +++ b/drivers/gpu/trace/Kconfig @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config TRACE_GPU_MEM + bool diff --git a/drivers/gpu/trace/Makefile b/drivers/gpu/trace/Makefile new file mode 100644 index 000000000000..b70fbdc5847f --- /dev/null +++ b/drivers/gpu/trace/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_TRACE_GPU_MEM) += trace_gpu_mem.o diff --git a/drivers/gpu/trace/trace_gpu_mem.c b/drivers/gpu/trace/trace_gpu_mem.c new file mode 100644 index 000000000000..01e855897b6d --- /dev/null +++ b/drivers/gpu/trace/trace_gpu_mem.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPU memory trace points + * + * Copyright (C) 2020 Google, Inc. + */ + +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(gpu_mem_total); diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h new file mode 100644 index 000000000000..1897822a9150 --- /dev/null +++ b/include/trace/events/gpu_mem.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GPU memory trace points + * + * Copyright (C) 2020 Google, Inc. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_mem + +#if !defined(_TRACE_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GPU_MEM_H + +#include + +/* + * The gpu_memory_total event indicates that there's an update to either the + * global or process total gpu memory counters. + * + * This event should be emitted whenever the kernel device driver allocates, + * frees, imports, unimports memory in the GPU addressable space. + * + * @gpu_id: This is the gpu id. + * + * @pid: Put 0 for global total, while positive pid for process total. + * + * @size: Virtual size of the allocation in bytes. + * + */ +TRACE_EVENT(gpu_mem_total, + + TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size), + + TP_ARGS(gpu_id, pid, size), + + TP_STRUCT__entry( + __field(uint32_t, gpu_id) + __field(uint32_t, pid) + __field(uint64_t, size) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->pid = pid; + __entry->size = size; + ), + + TP_printk("gpu_id=%u pid=%u size=%llu", + __entry->gpu_id, + __entry->pid, + __entry->size) +); + +#endif /* _TRACE_GPU_MEM_H */ + +/* This part must be outside protection */ +#include -- GitLab From f20eb0ba961c8dc720b96a8671e95bb8b7acaa08 Mon Sep 17 00:00:00 2001 From: Kelly Rossmoyer Date: Tue, 21 Apr 2020 22:46:29 -0700 Subject: [PATCH 0879/1278] ANDROID: fix wakeup reason findings The 0-day test bot found three minor issues in the wakeup_reason enhancements patch, including two undeclared functions that should have been static, an allegedly uninitialized pointer (which is actually set in the line immediately prior to cppcheck's complaint), and a type mismatch when printing timespec64 fields on a 32-bit build. These changes address those findings. Fixes: 8c29afa60138 ("ANDROID: power: wakeup_reason: wake reason enhancements") Bug: 153727431 Reported-by: kbuild test robot Change-Id: I9194f85d0ca7921461866b73dc24e1783b1da6c6 Signed-off-by: Kelly Rossmoyer --- kernel/power/wakeup_reason.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 503a71fc49fc..3c118c044633 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -102,7 +102,7 @@ static void delete_list(struct list_head *head) static bool add_sibling_node_sorted(struct list_head *head, int irq) { - struct wakeup_irq_node *n; + struct wakeup_irq_node *n = NULL; struct list_head *predecessor = head; if (unlikely(WARN_ON(!head))) @@ -196,7 +196,8 @@ void log_threaded_irq_wakeup_reason(int irq, int parent_irq) spin_unlock_irqrestore(&wakeup_reason_lock, flags); } -void __log_abort_or_abnormal_wake(bool abort, const char *fmt, va_list args) +static void __log_abort_or_abnormal_wake(bool abort, const char *fmt, + va_list args) { unsigned long flags; @@ -330,8 +331,10 @@ static ssize_t last_suspend_time_show(struct kobject *kobj, /* Export suspend_resume_time and sleep_time in pair here. */ return sprintf(buf, "%llu.%09lu %llu.%09lu\n", - suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, - sleep_time.tv_sec, sleep_time.tv_nsec); + (unsigned long long)suspend_resume_time.tv_sec, + suspend_resume_time.tv_nsec, + (unsigned long long)sleep_time.tv_sec, + sleep_time.tv_nsec); } static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); @@ -375,7 +378,7 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { .notifier_call = wakeup_reason_pm_event, }; -int __init wakeup_reason_init(void) +static int __init wakeup_reason_init(void) { if (register_pm_notifier(&wakeup_reason_pm_notifier_block)) { pr_warn("[%s] failed to register PM notifier\n", __func__); -- GitLab From 5645b6891509dad2eda5b7d22b1b7fde5dcac144 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Fri, 21 Feb 2020 21:27:26 +0100 Subject: [PATCH 0880/1278] bus: sunxi-rsb: Return correct data when mixing 16-bit and 8-bit reads [ Upstream commit a43ab30dcd4a1abcdd0d2461bf1cf7c0817f6cd3 ] When doing a 16-bit read that returns data in the MSB byte, the RSB_DATA register will keep the MSB byte unchanged when doing the following 8-bit read. sunxi_rsb_read() will then return a result that contains high byte from 16-bit read mixed with the 8-bit result. The consequence is that after this happens the PMIC's regmap will look like this: (0x33 is the high byte from the 16-bit read) % cat /sys/kernel/debug/regmap/sunxi-rsb-3a3/registers 00: 33 01: 33 02: 33 03: 33 04: 33 05: 33 06: 33 07: 33 08: 33 09: 33 0a: 33 0b: 33 0c: 33 0d: 33 0e: 33 [snip] Fix this by masking the result of the read with the correct mask based on the size of the read. There are no 16-bit users in the mainline kernel, so this doesn't need to get into the stable tree. Signed-off-by: Ondrej Jirman Acked-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/bus/sunxi-rsb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 1b76d9585902..2ca2cc56bcef 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -345,7 +345,7 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr, if (ret) goto unlock; - *buf = readl(rsb->regs + RSB_DATA); + *buf = readl(rsb->regs + RSB_DATA) & GENMASK(len * 8 - 1, 0); unlock: mutex_unlock(&rsb->lock); -- GitLab From 2cd620744fc3f7d81423ba42f50f24f03d6d8956 Mon Sep 17 00:00:00 2001 From: Zheng Wei Date: Mon, 16 Mar 2020 22:23:47 +0800 Subject: [PATCH 0881/1278] net: vxge: fix wrong __VA_ARGS__ usage [ Upstream commit b317538c47943f9903860d83cc0060409e12d2ff ] printk in macro vxge_debug_ll uses __VA_ARGS__ without "##" prefix, it causes a build error when there is no variable arguments(e.g. only fmt is specified.). Signed-off-by: Zheng Wei Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/neterion/vxge/vxge-config.h | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h index cfa970417f81..fe4a4315d20d 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h @@ -2065,7 +2065,7 @@ vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask); if ((level >= VXGE_ERR && VXGE_COMPONENT_LL & VXGE_DEBUG_ERR_MASK) || \ (level >= VXGE_TRACE && VXGE_COMPONENT_LL & VXGE_DEBUG_TRACE_MASK))\ if ((mask & VXGE_DEBUG_MASK) == mask) \ - printk(fmt "\n", __VA_ARGS__); \ + printk(fmt "\n", ##__VA_ARGS__); \ } while (0) #else #define vxge_debug_ll(level, mask, fmt, ...) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h index 3a79d93b8445..5b535aa10d23 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h @@ -454,49 +454,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override); #if (VXGE_DEBUG_LL_CONFIG & VXGE_DEBUG_MASK) #define vxge_debug_ll_config(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__) #else #define vxge_debug_ll_config(level, fmt, ...) #endif #if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK) #define vxge_debug_init(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__) #else #define vxge_debug_init(level, fmt, ...) #endif #if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK) #define vxge_debug_tx(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__) #else #define vxge_debug_tx(level, fmt, ...) #endif #if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK) #define vxge_debug_rx(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__) #else #define vxge_debug_rx(level, fmt, ...) #endif #if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK) #define vxge_debug_mem(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__) #else #define vxge_debug_mem(level, fmt, ...) #endif #if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK) #define vxge_debug_entryexit(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__) #else #define vxge_debug_entryexit(level, fmt, ...) #endif #if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK) #define vxge_debug_intr(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__) #else #define vxge_debug_intr(level, fmt, ...) #endif -- GitLab From e70596018d9cd7eb5615a1b0ddfb9b381bb4f348 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:16 +0000 Subject: [PATCH 0882/1278] hinic: fix a bug of waitting for IO stopped [ Upstream commit 96758117dc528e6d84bd23d205e8cf7f31eda029 ] it's unreliable for fw to check whether IO is stopped, so driver wait for enough time to ensure IO process is done in hw before freeing resources Signed-off-by: Luo bin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/huawei/hinic/hinic_hw_dev.c | 51 +------------------ 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 46aba02b8672..5763e333a9af 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -373,50 +373,6 @@ static int wait_for_db_state(struct hinic_hwdev *hwdev) return -EFAULT; } -static int wait_for_io_stopped(struct hinic_hwdev *hwdev) -{ - struct hinic_cmd_io_status cmd_io_status; - struct hinic_hwif *hwif = hwdev->hwif; - struct pci_dev *pdev = hwif->pdev; - struct hinic_pfhwdev *pfhwdev; - unsigned long end; - u16 out_size; - int err; - - if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) { - dev_err(&pdev->dev, "Unsupported PCI Function type\n"); - return -EINVAL; - } - - pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev); - - cmd_io_status.func_idx = HINIC_HWIF_FUNC_IDX(hwif); - - end = jiffies + msecs_to_jiffies(IO_STATUS_TIMEOUT); - do { - err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM, - HINIC_COMM_CMD_IO_STATUS_GET, - &cmd_io_status, sizeof(cmd_io_status), - &cmd_io_status, &out_size, - HINIC_MGMT_MSG_SYNC); - if ((err) || (out_size != sizeof(cmd_io_status))) { - dev_err(&pdev->dev, "Failed to get IO status, ret = %d\n", - err); - return err; - } - - if (cmd_io_status.status == IO_STOPPED) { - dev_info(&pdev->dev, "IO stopped\n"); - return 0; - } - - msleep(20); - } while (time_before(jiffies, end)); - - dev_err(&pdev->dev, "Wait for IO stopped - Timeout\n"); - return -ETIMEDOUT; -} - /** * clear_io_resource - set the IO resources as not active in the NIC * @hwdev: the NIC HW device @@ -436,11 +392,8 @@ static int clear_io_resources(struct hinic_hwdev *hwdev) return -EINVAL; } - err = wait_for_io_stopped(hwdev); - if (err) { - dev_err(&pdev->dev, "IO has not stopped yet\n"); - return err; - } + /* sleep 100ms to wait for firmware stopping I/O */ + msleep(100); cmd_clear_io_res.func_idx = HINIC_HWIF_FUNC_IDX(hwif); -- GitLab From 82146d1de45651ddd02a2c693382b732e4d428bb Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:19 +0000 Subject: [PATCH 0883/1278] hinic: fix wrong para of wait_for_completion_timeout [ Upstream commit 0da7c322f116210ebfdda59c7da663a6fc5e9cc8 ] the second input parameter of wait_for_completion_timeout should be jiffies instead of millisecond Signed-off-by: Luo bin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 3 ++- drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c index 7d95f0866fb0..e1de97effcd2 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -398,7 +398,8 @@ static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq *cmdq, spin_unlock_bh(&cmdq->cmdq_lock); - if (!wait_for_completion_timeout(&done, CMDQ_TIMEOUT)) { + if (!wait_for_completion_timeout(&done, + msecs_to_jiffies(CMDQ_TIMEOUT))) { spin_lock_bh(&cmdq->cmdq_lock); if (cmdq->errcode[curr_prod_idx] == &errcode) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 278dc13f3dae..9fcf2e5e0003 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -52,7 +52,7 @@ #define MSG_NOT_RESP 0xFFFF -#define MGMT_MSG_TIMEOUT 1000 +#define MGMT_MSG_TIMEOUT 5000 #define mgmt_to_pfhwdev(pf_mgmt) \ container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt) @@ -276,7 +276,8 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt, goto unlock_sync_msg; } - if (!wait_for_completion_timeout(recv_done, MGMT_MSG_TIMEOUT)) { + if (!wait_for_completion_timeout(recv_done, + msecs_to_jiffies(MGMT_MSG_TIMEOUT))) { dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id); err = -ETIMEDOUT; goto unlock_sync_msg; -- GitLab From 43fde2214bfc82bf4f015350ad882f258bbde356 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Tue, 24 Mar 2020 17:10:00 +0530 Subject: [PATCH 0884/1278] cxgb4/ptp: pass the sign of offset delta in FW CMD [ Upstream commit 50e0d28d3808146cc19b0d5564ef4ba9e5bf3846 ] cxgb4_ptp_fineadjtime() doesn't pass the signedness of offset delta in FW_PTP_CMD. Fix it by passing correct sign. Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index 9f9d6cae39d5..758f2b836328 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -246,6 +246,9 @@ static int cxgb4_ptp_fineadjtime(struct adapter *adapter, s64 delta) FW_PTP_CMD_PORTID_V(0)); c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); c.u.ts.sc = FW_PTP_SC_ADJ_FTIME; + c.u.ts.sign = (delta < 0) ? 1 : 0; + if (delta < 0) + delta = -delta; c.u.ts.tm = cpu_to_be64(delta); err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), NULL); -- GitLab From d2bca2f3e947f34c22fde7d2ddfd8d2715ee0987 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 26 Mar 2020 18:14:29 +0800 Subject: [PATCH 0885/1278] qlcnic: Fix bad kzalloc null test [ Upstream commit bcaeb886ade124331a6f3a5cef34a3f1484c0a03 ] In qlcnic_83xx_get_reset_instruction_template, the variable of null test is bad, so correct it. Signed-off-by: Xu Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 07f9067affc6..cda5b0a9e948 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1720,7 +1720,7 @@ static int qlcnic_83xx_get_reset_instruction_template(struct qlcnic_adapter *p_d ahw->reset.seq_error = 0; ahw->reset.buff = kzalloc(QLC_83XX_RESTART_TEMPLATE_SIZE, GFP_KERNEL); - if (p_dev->ahw->reset.buff == NULL) + if (ahw->reset.buff == NULL) return -ENOMEM; p_buff = p_dev->ahw->reset.buff; -- GitLab From 7a737ff66d257cd626811c1a9ccf8896d3e86b60 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Thu, 26 Mar 2020 22:22:43 +0100 Subject: [PATCH 0886/1278] i2c: st: fix missing struct parameter description [ Upstream commit f491c6687332920e296d0209e366fe2ca7eab1c6 ] Fix a missing struct parameter description to allow warning free W=1 compilation. Signed-off-by: Alain Volmat Reviewed-by: Patrice Chotard Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-st.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c index 9e62f893958a..81158ae8bfe3 100644 --- a/drivers/i2c/busses/i2c-st.c +++ b/drivers/i2c/busses/i2c-st.c @@ -437,6 +437,7 @@ static void st_i2c_wr_fill_tx_fifo(struct st_i2c_dev *i2c_dev) /** * st_i2c_rd_fill_tx_fifo() - Fill the Tx FIFO in read mode * @i2c_dev: Controller's private data + * @max: Maximum amount of data to fill into the Tx FIFO * * This functions fills the Tx FIFO with fixed pattern when * in read mode to trigger clock. -- GitLab From a1200a6c59585b37c0fd7d180c41d856a427596c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 9 Mar 2020 21:26:21 -0700 Subject: [PATCH 0887/1278] null_blk: Fix the null_add_dev() error path [ Upstream commit 2004bfdef945fe55196db6b9cdf321fbc75bb0de ] If null_add_dev() fails, clear dev->nullb. This patch fixes the following KASAN complaint: BUG: KASAN: use-after-free in nullb_device_submit_queues_store+0xcf/0x160 [null_blk] Read of size 8 at addr ffff88803280fc30 by task check/8409 Call Trace: dump_stack+0xa5/0xe6 print_address_description.constprop.0+0x26/0x260 __kasan_report.cold+0x7b/0x99 kasan_report+0x16/0x20 __asan_load8+0x58/0x90 nullb_device_submit_queues_store+0xcf/0x160 [null_blk] configfs_write_file+0x1c4/0x250 [configfs] __vfs_write+0x4c/0x90 vfs_write+0x145/0x2c0 ksys_write+0xd7/0x180 __x64_sys_write+0x47/0x50 do_syscall_64+0x6f/0x2f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7ff370926317 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007fff2dd2da48 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007ff370926317 RDX: 0000000000000002 RSI: 0000559437ef23f0 RDI: 0000000000000001 RBP: 0000559437ef23f0 R08: 000000000000000a R09: 0000000000000001 R10: 0000559436703471 R11: 0000000000000246 R12: 0000000000000002 R13: 00007ff370a006a0 R14: 00007ff370a014a0 R15: 00007ff370a008a0 Allocated by task 8409: save_stack+0x23/0x90 __kasan_kmalloc.constprop.0+0xcf/0xe0 kasan_kmalloc+0xd/0x10 kmem_cache_alloc_node_trace+0x129/0x4c0 null_add_dev+0x24a/0xe90 [null_blk] nullb_device_power_store+0x1b6/0x270 [null_blk] configfs_write_file+0x1c4/0x250 [configfs] __vfs_write+0x4c/0x90 vfs_write+0x145/0x2c0 ksys_write+0xd7/0x180 __x64_sys_write+0x47/0x50 do_syscall_64+0x6f/0x2f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 8409: save_stack+0x23/0x90 __kasan_slab_free+0x112/0x160 kasan_slab_free+0x12/0x20 kfree+0xdf/0x250 null_add_dev+0xaf3/0xe90 [null_blk] nullb_device_power_store+0x1b6/0x270 [null_blk] configfs_write_file+0x1c4/0x250 [configfs] __vfs_write+0x4c/0x90 vfs_write+0x145/0x2c0 ksys_write+0xd7/0x180 __x64_sys_write+0x47/0x50 do_syscall_64+0x6f/0x2f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 2984c8684f96 ("nullb: factor disk parameters") Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Cc: Johannes Thumshirn Cc: Hannes Reinecke Cc: Ming Lei Cc: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/null_blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f01d4a8a783a..e9776ca0996b 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1919,6 +1919,7 @@ static int null_add_dev(struct nullb_device *dev) cleanup_queues(nullb); out_free_nullb: kfree(nullb); + dev->nullb = NULL; out: return rv; } -- GitLab From df92d602b7305702bb784f83db4c0ccb125a5a17 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 9 Mar 2020 21:26:22 -0700 Subject: [PATCH 0888/1278] null_blk: Handle null_add_dev() failures properly [ Upstream commit 9b03b713082a31a5b90e0a893c72aa620e255c26 ] If null_add_dev() fails then null_del_dev() is called with a NULL argument. Make null_del_dev() handle this scenario correctly. This patch fixes the following KASAN complaint: null-ptr-deref in null_del_dev+0x28/0x280 [null_blk] Read of size 8 at addr 0000000000000000 by task find/1062 Call Trace: dump_stack+0xa5/0xe6 __kasan_report.cold+0x65/0x99 kasan_report+0x16/0x20 __asan_load8+0x58/0x90 null_del_dev+0x28/0x280 [null_blk] nullb_group_drop_item+0x7e/0xa0 [null_blk] client_drop_item+0x53/0x80 [configfs] configfs_rmdir+0x395/0x4e0 [configfs] vfs_rmdir+0xb6/0x220 do_rmdir+0x238/0x2c0 __x64_sys_unlinkat+0x75/0x90 do_syscall_64+0x6f/0x2f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Cc: Johannes Thumshirn Cc: Hannes Reinecke Cc: Ming Lei Cc: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/null_blk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index e9776ca0996b..b4078901dbcb 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1593,7 +1593,12 @@ static void null_nvm_unregister(struct nullb *nullb) {} static void null_del_dev(struct nullb *nullb) { - struct nullb_device *dev = nullb->dev; + struct nullb_device *dev; + + if (!nullb) + return; + + dev = nullb->dev; ida_simple_remove(&nullb_indexes, nullb->index); -- GitLab From 063ec77ac61b1990796f70f05ee1df352bd8461b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 12 Feb 2020 23:23:20 +0300 Subject: [PATCH 0889/1278] null_blk: fix spurious IO errors after failed past-wp access [ Upstream commit ff77042296d0a54535ddf74412c5ae92cb4ec76a ] Steps to reproduce: BLKRESETZONE zone 0 // force EIO pwrite(fd, buf, 4096, 4096); [issue more IO including zone ioctls] It will start failing randomly including IO to unrelated zones because of ->error "reuse". Trigger can be partition detection as well if test is not run immediately which is even more entertaining. The fix is of course to clear ->error where necessary. Reviewed-by: Christoph Hellwig Signed-off-by: Alexey Dobriyan (SK hynix) Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/null_blk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index b4078901dbcb..b12e373aa956 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -622,6 +622,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) if (tag != -1U) { cmd = &nq->cmds[tag]; cmd->tag = tag; + cmd->error = BLK_STS_OK; cmd->nq = nq; if (nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, @@ -1399,6 +1400,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->timer.function = null_cmd_timer_expired; } cmd->rq = bd->rq; + cmd->error = BLK_STS_OK; cmd->nq = nq; blk_mq_start_request(bd->rq); -- GitLab From 7037828ef7e37b2f2d8b27128c64b57e32e3fc4c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 4 Mar 2020 12:45:26 +0100 Subject: [PATCH 0890/1278] x86: Don't let pgprot_modify() change the page encryption bit [ Upstream commit 6db73f17c5f155dbcfd5e48e621c706270b84df0 ] When SEV or SME is enabled and active, vm_get_page_prot() typically returns with the encryption bit set. This means that users of pgprot_modify(, vm_get_page_prot()) (mprotect_fixup(), do_mmap()) end up with a value of vma->vm_pg_prot that is not consistent with the intended protection of the PTEs. This is also important for fault handlers that rely on the VMA vm_page_prot to set the page protection. Fix this by not allowing pgprot_modify() to change the encryption bit, similar to how it's done for PAT bits. Signed-off-by: Thomas Hellstrom Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Acked-by: Tom Lendacky Link: https://lkml.kernel.org/r/20200304114527.3636-2-thomas_os@shipmail.org Signed-off-by: Sasha Levin --- arch/x86/include/asm/pgtable.h | 7 +++++-- arch/x86/include/asm/pgtable_types.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6a4b1a54ff47..98a337e3835d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -588,12 +588,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return __pmd(val); } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 85f8279c885a..e6c870c24065 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* -- GitLab From 6695041218fd683b906afd20ca6ada7e3a7512f7 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 28 Feb 2020 17:51:48 +0300 Subject: [PATCH 0891/1278] block: keep bdi->io_pages in sync with max_sectors_kb for stacked devices [ Upstream commit e74d93e96d721c4297f2a900ad0191890d2fc2b0 ] Field bdi->io_pages added in commit 9491ae4aade6 ("mm: don't cap request size based on read-ahead setting") removes unneeded split of read requests. Stacked drivers do not call blk_queue_max_hw_sectors(). Instead they set limits of their devices by blk_set_stacking_limits() + disk_stack_limits(). Field bio->io_pages stays zero until user set max_sectors_kb via sysfs. This patch updates io_pages after merging limits in disk_stack_limits(). Commit c6d6e9b0f6b4 ("dm: do not allow readahead to limit IO size") fixed the same problem for device-mapper devices, this one fixes MD RAIDs. Fixes: 9491ae4aade6 ("mm: don't cap request size based on read-ahead setting") Reviewed-by: Paul Menzel Reviewed-by: Bob Liu Signed-off-by: Konstantin Khlebnikov Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- block/blk-settings.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 6c2faaa38cc1..e0a744921ed3 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -717,6 +717,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", top, bottom); } + + t->backing_dev_info->io_pages = + t->limits.max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(disk_stack_limits); -- GitLab From 151996b56cbef447c17aa4c91ab1c88e9ce71ebc Mon Sep 17 00:00:00 2001 From: Sungbo Eo Date: Thu, 19 Mar 2020 11:34:48 +0900 Subject: [PATCH 0892/1278] irqchip/versatile-fpga: Handle chained IRQs properly [ Upstream commit 486562da598c59e9f835b551d7cf19507de2d681 ] Enclose the chained handler with chained_irq_{enter,exit}(), so that the muxed interrupts get properly acked. This patch also fixes a reboot bug on OX820 SoC, where the jiffies timer interrupt is never acked. The kernel waits a clock tick forever in calibrate_delay_converge(), which leads to a boot hang. Fixes: c41b16f8c9d9 ("ARM: integrator/versatile: consolidate FPGA IRQ handling code") Signed-off-by: Sungbo Eo Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200319023448.1479701-1-mans0n@gorani.run Signed-off-by: Sasha Levin --- drivers/irqchip/irq-versatile-fpga.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 928858dada75..70e2cfff8175 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -68,12 +69,16 @@ static void fpga_irq_unmask(struct irq_data *d) static void fpga_irq_handle(struct irq_desc *desc) { + struct irq_chip *chip = irq_desc_get_chip(desc); struct fpga_irq_data *f = irq_desc_get_handler_data(desc); - u32 status = readl(f->base + IRQ_STATUS); + u32 status; + + chained_irq_enter(chip, desc); + status = readl(f->base + IRQ_STATUS); if (status == 0) { do_bad_IRQ(desc); - return; + goto out; } do { @@ -82,6 +87,9 @@ static void fpga_irq_handle(struct irq_desc *desc) status &= ~(1 << irq); generic_handle_irq(irq_find_mapping(f->domain, irq)); } while (status); + +out: + chained_irq_exit(chip, desc); } /* -- GitLab From 964a86d29150870d22b70f72c02a13d01cca909a Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 18 Mar 2020 10:15:15 +0800 Subject: [PATCH 0893/1278] sched: Avoid scale real weight down to zero [ Upstream commit 26cf52229efc87e2effa9d788f9b33c40fb3358a ] During our testing, we found a case that shares no longer working correctly, the cgroup topology is like: /sys/fs/cgroup/cpu/A (shares=102400) /sys/fs/cgroup/cpu/A/B (shares=2) /sys/fs/cgroup/cpu/A/B/C (shares=1024) /sys/fs/cgroup/cpu/D (shares=1024) /sys/fs/cgroup/cpu/D/E (shares=1024) /sys/fs/cgroup/cpu/D/E/F (shares=1024) The same benchmark is running in group C & F, no other tasks are running, the benchmark is capable to consumed all the CPUs. We suppose the group C will win more CPU resources since it could enjoy all the shares of group A, but it's F who wins much more. The reason is because we have group B with shares as 2, since A->cfs_rq.load.weight == B->se.load.weight == B->shares/nr_cpus, so A->cfs_rq.load.weight become very small. And in calc_group_shares() we calculate shares as: load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg); shares = (tg_shares * load) / tg_weight; Since the 'cfs_rq->load.weight' is too small, the load become 0 after scale down, although 'tg_shares' is 102400, shares of the se which stand for group A on root cfs_rq become 2. While the se of D on root cfs_rq is far more bigger than 2, so it wins the battle. Thus when scale_load_down() scale real weight down to 0, it's no longer telling the real story, the caller will have the wrong information and the calculation will be buggy. This patch add check in scale_load_down(), so the real weight will be >= MIN_SHARES after scale, after applied the group C wins as expected. Suggested-by: Peter Zijlstra Signed-off-by: Michael Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/38e8e212-59a1-64b2-b247-b6d0b52d8dc1@linux.alibaba.com Signed-off-by: Sasha Levin --- kernel/sched/sched.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 268f560ec998..391d73a12ad7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -89,7 +89,13 @@ static inline void cpu_load_update_active(struct rq *this_rq) { } #ifdef CONFIG_64BIT # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) -# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT) +# define scale_load_down(w) \ +({ \ + unsigned long __w = (w); \ + if (__w) \ + __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ + __w; \ +}) #else # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) (w) -- GitLab From d012deee1cb9501a899bdf0387dafcdcbe9dfbea Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 12 Mar 2020 15:35:51 -0700 Subject: [PATCH 0894/1278] selftests/x86/ptrace_syscall_32: Fix no-vDSO segfault [ Upstream commit 630b99ab60aa972052a4202a1ff96c7e45eb0054 ] If AT_SYSINFO is not present, don't try to call a NULL pointer. Reported-by: kbuild test robot Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/faaf688265a7e1a5b944d6f8bc0f6368158306d3.1584052409.git.luto@kernel.org Signed-off-by: Sasha Levin --- tools/testing/selftests/x86/ptrace_syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 6f22238f3217..12aaa063196e 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -414,8 +414,12 @@ int main() #if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16) vsyscall32 = (void *)getauxval(AT_SYSINFO); - printf("[RUN]\tCheck AT_SYSINFO return regs\n"); - test_sys32_regs(do_full_vsyscall32); + if (vsyscall32) { + printf("[RUN]\tCheck AT_SYSINFO return regs\n"); + test_sys32_regs(do_full_vsyscall32); + } else { + printf("[SKIP]\tAT_SYSINFO is not available\n"); + } #endif test_ptrace_syscall_restart(); -- GitLab From 16a55efd94e6e36fc2cf347e22848d9895868118 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Sat, 21 Mar 2020 12:25:45 +0100 Subject: [PATCH 0895/1278] PCI/switchtec: Fix init_completion race condition with poll_wait() [ Upstream commit efbdc769601f4d50018bf7ca50fc9f7c67392ece ] The call to init_completion() in mrpc_queue_cmd() can theoretically race with the call to poll_wait() in switchtec_dev_poll(). poll() write() switchtec_dev_poll() switchtec_dev_write() poll_wait(&s->comp.wait); mrpc_queue_cmd() init_completion(&s->comp) init_waitqueue_head(&s->comp.wait) To my knowledge, no one has hit this bug. Fix this by using reinit_completion() instead of init_completion() in mrpc_queue_cmd(). Fixes: 080b47def5e5 ("MicroSemi Switchtec management interface driver") Reported-by: Sebastian Andrzej Siewior Signed-off-by: Logan Gunthorpe Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Bjorn Helgaas Link: https://lkml.kernel.org/r/20200313183608.2646-1-logang@deltatee.com Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index bf229b442e72..6ef0d4b756f0 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -412,7 +412,7 @@ static int mrpc_queue_cmd(struct switchtec_user *stuser) kref_get(&stuser->kref); stuser->read_len = sizeof(stuser->data); stuser_set_state(stuser, MRPC_QUEUED); - init_completion(&stuser->comp); + reinit_completion(&stuser->comp); list_add_tail(&stuser->list, &stdev->mrpc_queue); mrpc_cmd_submit(stdev); -- GitLab From 401aacd5191dbc7036f3569467399f78c3686e13 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Feb 2020 19:33:35 +0800 Subject: [PATCH 0896/1278] libata: Remove extra scsi_host_put() in ata_scsi_add_hosts() [ Upstream commit 1d72f7aec3595249dbb83291ccac041a2d676c57 ] If the call to scsi_add_host_with_dma() in ata_scsi_add_hosts() fails, then we may get use-after-free KASAN warns: ================================================================== BUG: KASAN: use-after-free in kobject_put+0x24/0x180 Read of size 1 at addr ffff0026b8c80364 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 5.6.0-rc3-00004-g5a71b206ea82-dirty #1765 Hardware name: Huawei TaiShan 200 (Model 2280)/BC82AMDD, BIOS 2280-V2 CS V3.B160.01 02/24/2020 Call trace: dump_backtrace+0x0/0x298 show_stack+0x14/0x20 dump_stack+0x118/0x190 print_address_description.isra.9+0x6c/0x3b8 __kasan_report+0x134/0x23c kasan_report+0xc/0x18 __asan_load1+0x5c/0x68 kobject_put+0x24/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_devres_release+0x74/0xb0 release_nodes+0x2d0/0x470 devres_release_all+0x50/0x78 really_probe+0x2d4/0x560 driver_probe_device+0x7c/0x148 device_driver_attach+0x94/0xa0 __driver_attach+0xa8/0x110 bus_for_each_dev+0xe8/0x158 driver_attach+0x30/0x40 bus_add_driver+0x220/0x2e0 driver_register+0xbc/0x1d0 __pci_register_driver+0xbc/0xd0 ahci_pci_driver_init+0x20/0x28 do_one_initcall+0xf0/0x608 kernel_init_freeable+0x31c/0x384 kernel_init+0x10/0x118 ret_from_fork+0x10/0x18 Allocated by task 5: save_stack+0x28/0xc8 __kasan_kmalloc.isra.8+0xbc/0xd8 kasan_kmalloc+0xc/0x18 __kmalloc+0x1a8/0x280 scsi_host_alloc+0x44/0x678 ata_scsi_add_hosts+0x74/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 Freed by task 5: save_stack+0x28/0xc8 __kasan_slab_free+0x118/0x180 kasan_slab_free+0x10/0x18 slab_free_freelist_hook+0xa4/0x1a0 kfree+0xd4/0x3a0 scsi_host_dev_release+0x100/0x148 device_release+0x7c/0xe0 kobject_put+0xb0/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_scsi_add_hosts+0x210/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 There is also refcount issue, as well: WARNING: CPU: 1 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0xf8/0x170 The issue is that we make an erroneous extra call to scsi_host_put() for that host: So in ahci_init_one()->ata_host_alloc_pinfo()->ata_host_alloc(), we setup a device release method - ata_devres_release() - which intends to release the SCSI hosts: static void ata_devres_release(struct device *gendev, void *res) { ... for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; if (!ap) continue; if (ap->scsi_host) scsi_host_put(ap->scsi_host); } ... } However in the ata_scsi_add_hosts() error path, we also call scsi_host_put() for the SCSI hosts. Fix by removing the the scsi_host_put() calls in ata_scsi_add_hosts() and leave this to ata_devres_release(). Fixes: f31871951b38 ("libata: separate out ata_host_alloc() and ata_host_register()") Signed-off-by: John Garry Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index eb0c4ee20525..2f81d6534270 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4571,22 +4571,19 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht) */ shost->max_host_blocked = 1; - rc = scsi_add_host_with_dma(ap->scsi_host, - &ap->tdev, ap->host->dev); + rc = scsi_add_host_with_dma(shost, &ap->tdev, ap->host->dev); if (rc) - goto err_add; + goto err_alloc; } return 0; - err_add: - scsi_host_put(host->ports[i]->scsi_host); err_alloc: while (--i >= 0) { struct Scsi_Host *shost = host->ports[i]->scsi_host; + /* scsi_host_put() is in ata_devres_release() */ scsi_remove_host(shost); - scsi_host_put(shost); } return rc; } -- GitLab From 790829dc62decb248e0304a90d0ed959ebf8bc2e Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 14:08:45 -0600 Subject: [PATCH 0897/1278] gfs2: Don't demote a glock until its revokes are written [ Upstream commit df5db5f9ee112e76b5202fbc331f990a0fc316d6 ] Before this patch, run_queue would demote glocks based on whether there are any more holders. But if the glock has pending revokes that haven't been written to the media, giving up the glock might end in file system corruption if the revokes never get written due to io errors, node crashes and fences, etc. In that case, another node will replay the metadata blocks associated with the glock, but because the revoke was never written, it could replay that block even though the glock had since been granted to another node who might have made changes. This patch changes the logic in run_queue so that it never demotes a glock until its count of pending revokes reaches zero. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/glock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index aea1ed0aebd0..1e2ff4b32c79 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -636,6 +636,9 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; + smp_mb(); + if (atomic_read(&gl->gl_revokes) != 0) + goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; -- GitLab From bf849b3523ae65b55db7558422bd67ea330579ff Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Sun, 8 Mar 2020 09:08:44 +0100 Subject: [PATCH 0898/1278] x86/boot: Use unsigned comparison for addresses [ Upstream commit 81a34892c2c7c809f9c4e22c5ac936ae673fb9a2 ] The load address is compared with LOAD_PHYSICAL_ADDR using a signed comparison currently (using jge instruction). When loading a 64-bit kernel using the new efi32_pe_entry() point added by: 97aa276579b2 ("efi/x86: Add true mixed mode entry point into .compat section") using Qemu with -m 3072, the firmware actually loads us above 2Gb, resulting in a very early crash. Use the JAE instruction to perform a unsigned comparison instead, as physical addresses should be considered unsigned. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200301230436.2246909-6-nivedita@alum.mit.edu Link: https://lore.kernel.org/r/20200308080859.21568-14-ardb@kernel.org Signed-off-by: Sasha Levin --- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 37380c0d5999..01d628ea3402 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -106,7 +106,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 39fdede523f2..a25127916e67 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -105,7 +105,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: @@ -280,7 +280,7 @@ ENTRY(startup_64) notq %rax andq %rax, %rbp cmpq $LOAD_PHYSICAL_ADDR, %rbp - jge 1f + jae 1f #endif movq $LOAD_PHYSICAL_ADDR, %rbp 1: -- GitLab From 5cfce5a0db3822adf577aa23fa520fdbfe2acd38 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 8 Mar 2020 09:08:51 +0100 Subject: [PATCH 0899/1278] efi/x86: Ignore the memory attributes table on i386 [ Upstream commit dd09fad9d2caad2325a39b766ce9e79cfc690184 ] Commit: 3a6b6c6fb23667fa ("efi: Make EFI_MEMORY_ATTRIBUTES_TABLE initialization common across all architectures") moved the call to efi_memattr_init() from ARM specific to the generic EFI init code, in order to be able to apply the restricted permissions described in that table on x86 as well. We never enabled this feature fully on i386, and so mapping and reserving this table is pointless. However, due to the early call to memblock_reserve(), the memory bookkeeping gets confused to the point where it produces the splat below when we try to map the memory later on: ------------[ cut here ]------------ ioremap on RAM at 0x3f251000 - 0x3fa1afff WARNING: CPU: 0 PID: 0 at arch/x86/mm/ioremap.c:166 __ioremap_caller ... Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.20.0 #48 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 EIP: __ioremap_caller.constprop.0+0x249/0x260 Code: 90 0f b7 05 4e 38 40 de 09 45 e0 e9 09 ff ff ff 90 8d 45 ec c6 05 ... EAX: 00000029 EBX: 00000000 ECX: de59c228 EDX: 00000001 ESI: 3f250fff EDI: 00000000 EBP: de3edf20 ESP: de3edee0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00200296 CR0: 80050033 CR2: ffd17000 CR3: 1e58c000 CR4: 00040690 Call Trace: ioremap_cache+0xd/0x10 ? old_map_region+0x72/0x9d old_map_region+0x72/0x9d efi_map_region+0x8/0xa efi_enter_virtual_mode+0x260/0x43b start_kernel+0x329/0x3aa i386_start_kernel+0xa7/0xab startup_32_smp+0x164/0x168 ---[ end trace e15ccf6b9f356833 ]--- Let's work around this by disregarding the memory attributes table altogether on i386, which does not result in a loss of functionality or protection, given that we never consumed the contents. Fixes: 3a6b6c6fb23667fa ("efi: Make EFI_MEMORY_ATTRIBUTES_TABLE ... ") Tested-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200304165917.5893-1-ardb@kernel.org Link: https://lore.kernel.org/r/20200308080859.21568-21-ardb@kernel.org Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f50072b51aef..b39b7e6d4e4d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -550,7 +550,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, } } - if (efi_enabled(EFI_MEMMAP)) + if (!IS_ENABLED(CONFIG_X86_32) && efi_enabled(EFI_MEMMAP)) efi_memattr_init(); /* Parse the EFI Properties table if it exists */ -- GitLab From b8efdd0c43f8cbb61d8afdb8ecad5b9be33cc451 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 6 Mar 2020 18:47:20 +0100 Subject: [PATCH 0900/1278] genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy() [ Upstream commit 87f2d1c662fa1761359fdf558246f97e484d177a ] irq_domain_alloc_irqs_hierarchy() has 3 call sites in the compilation unit but only one of them checks for the pointer which is being dereferenced inside the called function. Move the check into the function. This allows for catching the error instead of the following crash: Unable to handle kernel NULL pointer dereference at virtual address 00000000 PC is at 0x0 LR is at gpiochip_hierarchy_irq_domain_alloc+0x11f/0x140 ... [] (gpiochip_hierarchy_irq_domain_alloc) [] (__irq_domain_alloc_irqs) [] (irq_create_fwspec_mapping) [] (gpiochip_to_irq) [] (gpiod_to_irq) [] (gpio_irqs_init [gpio_irqs]) [] (gpio_irqs_exit+0xecc/0xe84 [gpio_irqs]) Code: bad PC value Signed-off-by: Alexander Sverdlin Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200306174720.82604-1-alexander.sverdlin@nokia.com Signed-off-by: Sasha Levin --- kernel/irq/irqdomain.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index b269ae16b10c..0d54f8256b9f 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1372,6 +1372,11 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg) { + if (!domain->ops->alloc) { + pr_debug("domain->ops->alloc() is NULL\n"); + return -ENOSYS; + } + return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } @@ -1409,11 +1414,6 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, return -EINVAL; } - if (!domain->ops->alloc) { - pr_debug("domain->ops->alloc() is NULL\n"); - return -ENOSYS; - } - if (realloc && irq_base >= 0) { virq = irq_base; } else { -- GitLab From d9e73ab4434e0f2052a0245fdfa3c5c06df7b95a Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 11 Mar 2020 16:07:50 +0530 Subject: [PATCH 0901/1278] block: Fix use-after-free issue accessing struct io_cq [ Upstream commit 30a2da7b7e225ef6c87a660419ea04d3cef3f6a7 ] There is a potential race between ioc_release_fn() and ioc_clear_queue() as shown below, due to which below kernel crash is observed. It also can result into use-after-free issue. context#1: context#2: ioc_release_fn() __ioc_clear_queue() gets the same icq ->spin_lock(&ioc->lock); ->spin_lock(&ioc->lock); ->ioc_destroy_icq(icq); ->list_del_init(&icq->q_node); ->call_rcu(&icq->__rcu_head, icq_free_icq_rcu); ->spin_unlock(&ioc->lock); ->ioc_destroy_icq(icq); ->hlist_del_init(&icq->ioc_node); This results into below crash as this memory is now used by icq->__rcu_head in context#1. There is a chance that icq could be free'd as well. 22150.386550: <6> Unable to handle kernel write to read-only memory at virtual address ffffffaa8d31ca50 ... Call trace: 22150.607350: <2> ioc_destroy_icq+0x44/0x110 22150.611202: <2> ioc_clear_queue+0xac/0x148 22150.615056: <2> blk_cleanup_queue+0x11c/0x1a0 22150.619174: <2> __scsi_remove_device+0xdc/0x128 22150.623465: <2> scsi_forget_host+0x2c/0x78 22150.627315: <2> scsi_remove_host+0x7c/0x2a0 22150.631257: <2> usb_stor_disconnect+0x74/0xc8 22150.635371: <2> usb_unbind_interface+0xc8/0x278 22150.639665: <2> device_release_driver_internal+0x198/0x250 22150.644897: <2> device_release_driver+0x24/0x30 22150.649176: <2> bus_remove_device+0xec/0x140 22150.653204: <2> device_del+0x270/0x460 22150.656712: <2> usb_disable_device+0x120/0x390 22150.660918: <2> usb_disconnect+0xf4/0x2e0 22150.664684: <2> hub_event+0xd70/0x17e8 22150.668197: <2> process_one_work+0x210/0x480 22150.672222: <2> worker_thread+0x32c/0x4c8 Fix this by adding a new ICQ_DESTROYED flag in ioc_destroy_icq() to indicate this icq is once marked as destroyed. Also, ensure __ioc_clear_queue() is accessing icq within rcu_read_lock/unlock so that icq doesn't get free'd up while it is still using it. Signed-off-by: Sahitya Tummala Co-developed-by: Pradeep P V K Signed-off-by: Pradeep P V K Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-ioc.c | 7 +++++++ include/linux/iocontext.h | 1 + 2 files changed, 8 insertions(+) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index f23311e4b201..e56a480b6f92 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -87,6 +87,7 @@ static void ioc_destroy_icq(struct io_cq *icq) * making it impossible to determine icq_cache. Record it in @icq. */ icq->__rcu_icq_cache = et->icq_cache; + icq->flags |= ICQ_DESTROYED; call_rcu(&icq->__rcu_head, icq_free_icq_rcu); } @@ -230,15 +231,21 @@ static void __ioc_clear_queue(struct list_head *icq_list) { unsigned long flags; + rcu_read_lock(); while (!list_empty(icq_list)) { struct io_cq *icq = list_entry(icq_list->next, struct io_cq, q_node); struct io_context *ioc = icq->ioc; spin_lock_irqsave(&ioc->lock, flags); + if (icq->flags & ICQ_DESTROYED) { + spin_unlock_irqrestore(&ioc->lock, flags); + continue; + } ioc_destroy_icq(icq); spin_unlock_irqrestore(&ioc->lock, flags); } + rcu_read_unlock(); } /** diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index dba15ca8e60b..1dcd9198beb7 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -8,6 +8,7 @@ enum { ICQ_EXITED = 1 << 2, + ICQ_DESTROYED = 1 << 3, }; /* -- GitLab From a752b857872fb6f9281564e94898aad13a403d51 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 21 Feb 2020 10:15:31 +0100 Subject: [PATCH 0902/1278] usb: dwc3: core: add support for disabling SS instances in park mode [ Upstream commit 7ba6b09fda5e0cb741ee56f3264665e0edc64822 ] In certain circumstances, the XHCI SuperSpeed instance in park mode can fail to recover, thus on Amlogic G12A/G12B/SM1 SoCs when there is high load on the single XHCI SuperSpeed instance, the controller can crash like: xhci-hcd xhci-hcd.0.auto: xHCI host not responding to stop endpoint command. xhci-hcd xhci-hcd.0.auto: Host halt failed, -110 xhci-hcd xhci-hcd.0.auto: xHCI host controller not responding, assume dead xhci-hcd xhci-hcd.0.auto: xHCI host not responding to stop endpoint command. hub 2-1.1:1.0: hub_ext_port_status failed (err = -22) xhci-hcd xhci-hcd.0.auto: HC died; cleaning up usb 2-1.1-port1: cannot reset (err = -22) Setting the PARKMODE_DISABLE_SS bit in the DWC3_USB3_GUCTL1 mitigates the issue. The bit is described as : "When this bit is set to '1' all SS bus instances in park mode are disabled" Synopsys explains: The GUCTL1.PARKMODE_DISABLE_SS is only available in dwc_usb3 controller running in host mode. This should not be set for other IPs. This can be disabled by default based on IP, but I recommend to have a property to enable this feature for devices that need this. CC: Dongjin Kim Cc: Jianxin Pan Cc: Thinh Nguyen Cc: Jun Li Reported-by: Tim Signed-off-by: Neil Armstrong Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/core.c | 5 +++++ drivers/usb/dwc3/core.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 021899c58028..010201dbd029 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -867,6 +867,9 @@ static int dwc3_core_init(struct dwc3 *dwc) if (dwc->dis_tx_ipgap_linecheck_quirk) reg |= DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS; + if (dwc->parkmode_disable_ss_quirk) + reg |= DWC3_GUCTL1_PARKMODE_DISABLE_SS; + dwc3_writel(dwc->regs, DWC3_GUCTL1, reg); } @@ -1107,6 +1110,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) "snps,dis-del-phy-power-chg-quirk"); dwc->dis_tx_ipgap_linecheck_quirk = device_property_read_bool(dev, "snps,dis-tx-ipgap-linecheck-quirk"); + dwc->parkmode_disable_ss_quirk = device_property_read_bool(dev, + "snps,parkmode-disable-ss-quirk"); dwc->tx_de_emphasis_quirk = device_property_read_bool(dev, "snps,tx_de_emphasis_quirk"); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 40bf0e0768d9..8747f9f02229 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -206,6 +206,7 @@ #define DWC3_GCTL_DSBLCLKGTNG BIT(0) /* Global User Control 1 Register */ +#define DWC3_GUCTL1_PARKMODE_DISABLE_SS BIT(17) #define DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS BIT(28) #define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW BIT(24) @@ -863,6 +864,8 @@ struct dwc3_scratchpad_array { * change quirk. * @dis_tx_ipgap_linecheck_quirk: set if we disable u2mac linestate * check during HS transmit. + * @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed + * instances in park mode. * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk * @tx_de_emphasis: Tx de-emphasis value * 0 - -6dB de-emphasis @@ -1022,6 +1025,7 @@ struct dwc3 { unsigned dis_u2_freeclk_exists_quirk:1; unsigned dis_del_phy_power_chg_quirk:1; unsigned dis_tx_ipgap_linecheck_quirk:1; + unsigned parkmode_disable_ss_quirk:1; unsigned tx_de_emphasis_quirk:1; unsigned tx_de_emphasis:2; -- GitLab From 117970c7634aafb18103435fcc9ccae705e12727 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Mar 2020 18:49:21 +0000 Subject: [PATCH 0903/1278] irqchip/gic-v4: Provide irq_retrigger to avoid circular locking dependency [ Upstream commit 7809f7011c3bce650e502a98afeb05961470d865 ] On a very heavily loaded D05 with GICv4, I managed to trigger the following lockdep splat: [ 6022.598864] ====================================================== [ 6022.605031] WARNING: possible circular locking dependency detected [ 6022.611200] 5.6.0-rc4-00026-geee7c7b0f498 #680 Tainted: G E [ 6022.618061] ------------------------------------------------------ [ 6022.624227] qemu-system-aar/7569 is trying to acquire lock: [ 6022.629789] ffff042f97606808 (&p->pi_lock){-.-.}, at: try_to_wake_up+0x54/0x7a0 [ 6022.637102] [ 6022.637102] but task is already holding lock: [ 6022.642921] ffff002fae424cf0 (&irq_desc_lock_class){-.-.}, at: __irq_get_desc_lock+0x5c/0x98 [ 6022.651350] [ 6022.651350] which lock already depends on the new lock. [ 6022.651350] [ 6022.659512] [ 6022.659512] the existing dependency chain (in reverse order) is: [ 6022.666980] [ 6022.666980] -> #2 (&irq_desc_lock_class){-.-.}: [ 6022.672983] _raw_spin_lock_irqsave+0x50/0x78 [ 6022.677848] __irq_get_desc_lock+0x5c/0x98 [ 6022.682453] irq_set_vcpu_affinity+0x40/0xc0 [ 6022.687236] its_make_vpe_non_resident+0x6c/0xb8 [ 6022.692364] vgic_v4_put+0x54/0x70 [ 6022.696273] vgic_v3_put+0x20/0xd8 [ 6022.700183] kvm_vgic_put+0x30/0x48 [ 6022.704182] kvm_arch_vcpu_put+0x34/0x50 [ 6022.708614] kvm_sched_out+0x34/0x50 [ 6022.712700] __schedule+0x4bc/0x7f8 [ 6022.716697] schedule+0x50/0xd8 [ 6022.720347] kvm_arch_vcpu_ioctl_run+0x5f0/0x978 [ 6022.725473] kvm_vcpu_ioctl+0x3d4/0x8f8 [ 6022.729820] ksys_ioctl+0x90/0xd0 [ 6022.733642] __arm64_sys_ioctl+0x24/0x30 [ 6022.738074] el0_svc_common.constprop.3+0xa8/0x1e8 [ 6022.743373] do_el0_svc+0x28/0x88 [ 6022.747198] el0_svc+0x14/0x40 [ 6022.750761] el0_sync_handler+0x124/0x2b8 [ 6022.755278] el0_sync+0x140/0x180 [ 6022.759100] [ 6022.759100] -> #1 (&rq->lock){-.-.}: [ 6022.764143] _raw_spin_lock+0x38/0x50 [ 6022.768314] task_fork_fair+0x40/0x128 [ 6022.772572] sched_fork+0xe0/0x210 [ 6022.776484] copy_process+0x8c4/0x18d8 [ 6022.780742] _do_fork+0x88/0x6d8 [ 6022.784478] kernel_thread+0x64/0x88 [ 6022.788563] rest_init+0x30/0x270 [ 6022.792390] arch_call_rest_init+0x14/0x1c [ 6022.796995] start_kernel+0x498/0x4c4 [ 6022.801164] [ 6022.801164] -> #0 (&p->pi_lock){-.-.}: [ 6022.806382] __lock_acquire+0xdd8/0x15c8 [ 6022.810813] lock_acquire+0xd0/0x218 [ 6022.814896] _raw_spin_lock_irqsave+0x50/0x78 [ 6022.819761] try_to_wake_up+0x54/0x7a0 [ 6022.824018] wake_up_process+0x1c/0x28 [ 6022.828276] wakeup_softirqd+0x38/0x40 [ 6022.832533] __tasklet_schedule_common+0xc4/0xf0 [ 6022.837658] __tasklet_schedule+0x24/0x30 [ 6022.842176] check_irq_resend+0xc8/0x158 [ 6022.846609] irq_startup+0x74/0x128 [ 6022.850606] __enable_irq+0x6c/0x78 [ 6022.854602] enable_irq+0x54/0xa0 [ 6022.858431] its_make_vpe_non_resident+0xa4/0xb8 [ 6022.863557] vgic_v4_put+0x54/0x70 [ 6022.867469] kvm_arch_vcpu_blocking+0x28/0x38 [ 6022.872336] kvm_vcpu_block+0x48/0x490 [ 6022.876594] kvm_handle_wfx+0x18c/0x310 [ 6022.880938] handle_exit+0x138/0x198 [ 6022.885022] kvm_arch_vcpu_ioctl_run+0x4d4/0x978 [ 6022.890148] kvm_vcpu_ioctl+0x3d4/0x8f8 [ 6022.894494] ksys_ioctl+0x90/0xd0 [ 6022.898317] __arm64_sys_ioctl+0x24/0x30 [ 6022.902748] el0_svc_common.constprop.3+0xa8/0x1e8 [ 6022.908046] do_el0_svc+0x28/0x88 [ 6022.911871] el0_svc+0x14/0x40 [ 6022.915434] el0_sync_handler+0x124/0x2b8 [ 6022.919951] el0_sync+0x140/0x180 [ 6022.923773] [ 6022.923773] other info that might help us debug this: [ 6022.923773] [ 6022.931762] Chain exists of: [ 6022.931762] &p->pi_lock --> &rq->lock --> &irq_desc_lock_class [ 6022.931762] [ 6022.942101] Possible unsafe locking scenario: [ 6022.942101] [ 6022.948007] CPU0 CPU1 [ 6022.952523] ---- ---- [ 6022.957039] lock(&irq_desc_lock_class); [ 6022.961036] lock(&rq->lock); [ 6022.966595] lock(&irq_desc_lock_class); [ 6022.973109] lock(&p->pi_lock); [ 6022.976324] [ 6022.976324] *** DEADLOCK *** This is happening because we have a pending doorbell that requires retrigger. As SW retriggering is done in a tasklet, we trigger the circular dependency above. The easy cop-out is to provide a retrigger callback that doesn't require acquiring any extra lock. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200310184921.23552-5-maz@kernel.org Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 799df1e598db..84b23d902d5b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2591,12 +2591,18 @@ static int its_vpe_set_irqchip_state(struct irq_data *d, return 0; } +static int its_vpe_retrigger(struct irq_data *d) +{ + return !its_vpe_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true); +} + static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", .irq_mask = its_vpe_mask_irq, .irq_unmask = its_vpe_unmask_irq, .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = its_vpe_set_affinity, + .irq_retrigger = its_vpe_retrigger, .irq_set_irqchip_state = its_vpe_set_irqchip_state, .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, }; -- GitLab From 105d88c02faabd99472211a00d62c4a6d6d4a776 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 12 Mar 2020 23:12:55 +0800 Subject: [PATCH 0904/1278] locking/lockdep: Avoid recursion in lockdep_count_{for,back}ward_deps() [ Upstream commit 25016bd7f4caf5fc983bbab7403d08e64cba3004 ] Qian Cai reported a bug when PROVE_RCU_LIST=y, and read on /proc/lockdep triggered a warning: [ ] DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) ... [ ] Call Trace: [ ] lock_is_held_type+0x5d/0x150 [ ] ? rcu_lockdep_current_cpu_online+0x64/0x80 [ ] rcu_read_lock_any_held+0xac/0x100 [ ] ? rcu_read_lock_held+0xc0/0xc0 [ ] ? __slab_free+0x421/0x540 [ ] ? kasan_kmalloc+0x9/0x10 [ ] ? __kmalloc_node+0x1d7/0x320 [ ] ? kvmalloc_node+0x6f/0x80 [ ] __bfs+0x28a/0x3c0 [ ] ? class_equal+0x30/0x30 [ ] lockdep_count_forward_deps+0x11a/0x1a0 The warning got triggered because lockdep_count_forward_deps() call __bfs() without current->lockdep_recursion being set, as a result a lockdep internal function (__bfs()) is checked by lockdep, which is unexpected, and the inconsistency between the irq-off state and the state traced by lockdep caused the warning. Apart from this warning, lockdep internal functions like __bfs() should always be protected by current->lockdep_recursion to avoid potential deadlocks and data inconsistency, therefore add the current->lockdep_recursion on-and-off section to protect __bfs() in both lockdep_count_forward_deps() and lockdep_count_backward_deps() Reported-by: Qian Cai Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200312151258.128036-1-boqun.feng@gmail.com Signed-off-by: Sasha Levin --- kernel/locking/lockdep.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 90a3469a7a88..03e3ab61a2ed 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1297,9 +1297,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) this.class = class; raw_local_irq_save(flags); + current->lockdep_recursion = 1; arch_spin_lock(&lockdep_lock); ret = __lockdep_count_forward_deps(&this); arch_spin_unlock(&lockdep_lock); + current->lockdep_recursion = 0; raw_local_irq_restore(flags); return ret; @@ -1324,9 +1326,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) this.class = class; raw_local_irq_save(flags); + current->lockdep_recursion = 1; arch_spin_lock(&lockdep_lock); ret = __lockdep_count_backward_deps(&this); arch_spin_unlock(&lockdep_lock); + current->lockdep_recursion = 0; raw_local_irq_restore(flags); return ret; -- GitLab From f83b75ef20ac868046f90a7006b44bfea671a7c3 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Thu, 19 Mar 2020 19:18:13 +0800 Subject: [PATCH 0905/1278] block, bfq: fix use-after-free in bfq_idle_slice_timer_body [ Upstream commit 2f95fa5c955d0a9987ffdc3a095e2f4e62c5f2a9 ] In bfq_idle_slice_timer func, bfqq = bfqd->in_service_queue is not in bfqd-lock critical section. The bfqq, which is not equal to NULL in bfq_idle_slice_timer, may be freed after passing to bfq_idle_slice_timer_body. So we will access the freed memory. In addition, considering the bfqq may be in race, we should firstly check whether bfqq is in service before doing something on it in bfq_idle_slice_timer_body func. If the bfqq in race is not in service, it means the bfqq has been expired through __bfq_bfqq_expire func, and wait_request flags has been cleared in __bfq_bfqd_reset_in_service func. So we do not need to re-clear the wait_request of bfqq which is not in service. KASAN log is given as follows: [13058.354613] ================================================================== [13058.354640] BUG: KASAN: use-after-free in bfq_idle_slice_timer+0xac/0x290 [13058.354644] Read of size 8 at addr ffffa02cf3e63f78 by task fork13/19767 [13058.354646] [13058.354655] CPU: 96 PID: 19767 Comm: fork13 [13058.354661] Call trace: [13058.354667] dump_backtrace+0x0/0x310 [13058.354672] show_stack+0x28/0x38 [13058.354681] dump_stack+0xd8/0x108 [13058.354687] print_address_description+0x68/0x2d0 [13058.354690] kasan_report+0x124/0x2e0 [13058.354697] __asan_load8+0x88/0xb0 [13058.354702] bfq_idle_slice_timer+0xac/0x290 [13058.354707] __hrtimer_run_queues+0x298/0x8b8 [13058.354710] hrtimer_interrupt+0x1b8/0x678 [13058.354716] arch_timer_handler_phys+0x4c/0x78 [13058.354722] handle_percpu_devid_irq+0xf0/0x558 [13058.354731] generic_handle_irq+0x50/0x70 [13058.354735] __handle_domain_irq+0x94/0x110 [13058.354739] gic_handle_irq+0x8c/0x1b0 [13058.354742] el1_irq+0xb8/0x140 [13058.354748] do_wp_page+0x260/0xe28 [13058.354752] __handle_mm_fault+0x8ec/0x9b0 [13058.354756] handle_mm_fault+0x280/0x460 [13058.354762] do_page_fault+0x3ec/0x890 [13058.354765] do_mem_abort+0xc0/0x1b0 [13058.354768] el0_da+0x24/0x28 [13058.354770] [13058.354773] Allocated by task 19731: [13058.354780] kasan_kmalloc+0xe0/0x190 [13058.354784] kasan_slab_alloc+0x14/0x20 [13058.354788] kmem_cache_alloc_node+0x130/0x440 [13058.354793] bfq_get_queue+0x138/0x858 [13058.354797] bfq_get_bfqq_handle_split+0xd4/0x328 [13058.354801] bfq_init_rq+0x1f4/0x1180 [13058.354806] bfq_insert_requests+0x264/0x1c98 [13058.354811] blk_mq_sched_insert_requests+0x1c4/0x488 [13058.354818] blk_mq_flush_plug_list+0x2d4/0x6e0 [13058.354826] blk_flush_plug_list+0x230/0x548 [13058.354830] blk_finish_plug+0x60/0x80 [13058.354838] read_pages+0xec/0x2c0 [13058.354842] __do_page_cache_readahead+0x374/0x438 [13058.354846] ondemand_readahead+0x24c/0x6b0 [13058.354851] page_cache_sync_readahead+0x17c/0x2f8 [13058.354858] generic_file_buffered_read+0x588/0xc58 [13058.354862] generic_file_read_iter+0x1b4/0x278 [13058.354965] ext4_file_read_iter+0xa8/0x1d8 [ext4] [13058.354972] __vfs_read+0x238/0x320 [13058.354976] vfs_read+0xbc/0x1c0 [13058.354980] ksys_read+0xdc/0x1b8 [13058.354984] __arm64_sys_read+0x50/0x60 [13058.354990] el0_svc_common+0xb4/0x1d8 [13058.354994] el0_svc_handler+0x50/0xa8 [13058.354998] el0_svc+0x8/0xc [13058.354999] [13058.355001] Freed by task 19731: [13058.355007] __kasan_slab_free+0x120/0x228 [13058.355010] kasan_slab_free+0x10/0x18 [13058.355014] kmem_cache_free+0x288/0x3f0 [13058.355018] bfq_put_queue+0x134/0x208 [13058.355022] bfq_exit_icq_bfqq+0x164/0x348 [13058.355026] bfq_exit_icq+0x28/0x40 [13058.355030] ioc_exit_icq+0xa0/0x150 [13058.355035] put_io_context_active+0x250/0x438 [13058.355038] exit_io_context+0xd0/0x138 [13058.355045] do_exit+0x734/0xc58 [13058.355050] do_group_exit+0x78/0x220 [13058.355054] __wake_up_parent+0x0/0x50 [13058.355058] el0_svc_common+0xb4/0x1d8 [13058.355062] el0_svc_handler+0x50/0xa8 [13058.355066] el0_svc+0x8/0xc [13058.355067] [13058.355071] The buggy address belongs to the object at ffffa02cf3e63e70#012 which belongs to the cache bfq_queue of size 464 [13058.355075] The buggy address is located 264 bytes inside of#012 464-byte region [ffffa02cf3e63e70, ffffa02cf3e64040) [13058.355077] The buggy address belongs to the page: [13058.355083] page:ffff7e80b3cf9800 count:1 mapcount:0 mapping:ffff802db5c90780 index:0xffffa02cf3e606f0 compound_mapcount: 0 [13058.366175] flags: 0x2ffffe0000008100(slab|head) [13058.370781] raw: 2ffffe0000008100 ffff7e80b53b1408 ffffa02d730c1c90 ffff802db5c90780 [13058.370787] raw: ffffa02cf3e606f0 0000000000370023 00000001ffffffff 0000000000000000 [13058.370789] page dumped because: kasan: bad access detected [13058.370791] [13058.370792] Memory state around the buggy address: [13058.370797] ffffa02cf3e63e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb [13058.370801] ffffa02cf3e63e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [13058.370805] >ffffa02cf3e63f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [13058.370808] ^ [13058.370811] ffffa02cf3e63f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [13058.370815] ffffa02cf3e64000: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [13058.370817] ================================================================== [13058.370820] Disabling lock debugging due to kernel taint Here, we directly pass the bfqd to bfq_idle_slice_timer_body func. -- V2->V3: rewrite the comment as suggested by Paolo Valente V1->V2: add one comment, and add Fixes and Reported-by tag. Fixes: aee69d78d ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler") Acked-by: Paolo Valente Reported-by: Wang Wang Signed-off-by: Zhiqiang Liu Signed-off-by: Feilong Lin Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 93863c6173e6..959bee9fa911 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4541,20 +4541,28 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) spin_unlock_irq(&bfqd->lock); } -static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) +static void +bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_data *bfqd = bfqq->bfqd; enum bfqq_expiration reason; unsigned long flags; spin_lock_irqsave(&bfqd->lock, flags); - bfq_clear_bfqq_wait_request(bfqq); + /* + * Considering that bfqq may be in race, we should firstly check + * whether bfqq is in service before doing something on it. If + * the bfqq in race is not in service, it has already been expired + * through __bfq_bfqq_expire func and its wait_request flags has + * been cleared in __bfq_bfqd_reset_in_service func. + */ if (bfqq != bfqd->in_service_queue) { spin_unlock_irqrestore(&bfqd->lock, flags); return; } + bfq_clear_bfqq_wait_request(bfqq); + if (bfq_bfqq_budget_timeout(bfqq)) /* * Also here the queue can be safely expired @@ -4599,7 +4607,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) * early. */ if (bfqq) - bfq_idle_slice_timer_body(bfqq); + bfq_idle_slice_timer_body(bfqd, bfqq); return HRTIMER_NORESTART; } -- GitLab From c77f5f03518bf1794e7b8e6814fb43e4f367c871 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 4 Mar 2020 11:18:30 -0500 Subject: [PATCH 0906/1278] btrfs: remove a BUG_ON() from merge_reloc_roots() [ Upstream commit 7b7b74315b24dc064bc1c683659061c3d48f8668 ] This was pretty subtle, we default to reloc roots having 0 root refs, so if we crash in the middle of the relocation they can just be deleted. If we successfully complete the relocation operations we'll set our root refs to 1 in prepare_to_merge() and then go on to merge_reloc_roots(). At prepare_to_merge() time if any of the reloc roots have a 0 reference still, we will remove that reloc root from our reloc root rb tree, and then clean it up later. However this only happens if we successfully start a transaction. If we've aborted previously we will skip this step completely, and only have reloc roots with a reference count of 0, but were never properly removed from the reloc control's rb tree. This isn't a problem per-se, our references are held by the list the reloc roots are on, and by the original root the reloc root belongs to. If we end up in this situation all the reloc roots will be added to the dirty_reloc_list, and then properly dropped at that point. The reloc control will be free'd and the rb tree is no longer used. There were two options when fixing this, one was to remove the BUG_ON(), the other was to make prepare_to_merge() handle the case where we couldn't start a trans handle. IMO this is the cleaner solution. I started with handling the error in prepare_to_merge(), but it turned out super ugly. And in the end this BUG_ON() simply doesn't matter, the cleanup was happening properly, we were just panicing because this BUG_ON() only matters in the success case. So I've opted to just remove it and add a comment where it was. Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/relocation.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d4c00edd16d2..42f388ed0796 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2480,7 +2480,21 @@ void merge_reloc_roots(struct reloc_control *rc) free_reloc_roots(&reloc_roots); } - BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); + /* + * We used to have + * + * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); + * + * here, but it's wrong. If we fail to start the transaction in + * prepare_to_merge() we will have only 0 ref reloc roots, none of which + * have actually been removed from the reloc_root_tree rb tree. This is + * fine because we're bailing here, and we hold a reference on the root + * for the list that holds it, so these roots will be cleaned up when we + * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root + * will be cleaned up on unmount. + * + * The remaining nodes will be cleaned up by free_reloc_control. + */ } static void free_block_list(struct rb_root *blocks) -- GitLab From 2ee871239dbc8a25e0379c94894807d09d8db3fd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Mar 2020 17:17:08 -0400 Subject: [PATCH 0907/1278] btrfs: track reloc roots based on their commit root bytenr [ Upstream commit ea287ab157c2816bf12aad4cece41372f9d146b4 ] We always search the commit root of the extent tree for looking up back references, however we track the reloc roots based on their current bytenr. This is wrong, if we commit the transaction between relocating tree blocks we could end up in this code in build_backref_tree if (key.objectid == key.offset) { /* * Only root blocks of reloc trees use backref * pointing to itself. */ root = find_reloc_root(rc, cur->bytenr); ASSERT(root); cur->root = root; break; } find_reloc_root() is looking based on the bytenr we had in the commit root, but if we've COWed this reloc root we will not find that bytenr, and we will trip over the ASSERT(root). Fix this by using the commit_root->start bytenr for indexing the commit root. Then we change the __update_reloc_root() caller to be used when we switch the commit root for the reloc root during commit. This fixes the panic I was seeing when we started throttling relocation for delayed refs. Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/relocation.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 42f388ed0796..04db7c5f5ca4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1306,7 +1306,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) if (!node) return -ENOMEM; - node->bytenr = root->node->start; + node->bytenr = root->commit_root->start; node->data = root; spin_lock(&rc->reloc_root_tree.lock); @@ -1337,10 +1337,11 @@ static void __del_reloc_root(struct btrfs_root *root) if (rc && root->node) { spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); + root->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + RB_CLEAR_NODE(&node->rb_node); } spin_unlock(&rc->reloc_root_tree.lock); if (!node) @@ -1358,7 +1359,7 @@ static void __del_reloc_root(struct btrfs_root *root) * helper to update the 'address of tree root -> reloc tree' * mapping */ -static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) +static int __update_reloc_root(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *rb_node; @@ -1367,7 +1368,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); + root->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); @@ -1379,7 +1380,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) BUG_ON((struct btrfs_root *)node->data != root); spin_lock(&rc->reloc_root_tree.lock); - node->bytenr = new_bytenr; + node->bytenr = root->node->start; rb_node = tree_insert(&rc->reloc_root_tree.rb_root, node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); @@ -1524,6 +1525,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, } if (reloc_root->commit_root != reloc_root->node) { + __update_reloc_root(reloc_root); btrfs_set_root_node(root_item, reloc_root->node); free_extent_buffer(reloc_root->commit_root); reloc_root->commit_root = btrfs_root_node(reloc_root); @@ -4712,11 +4714,6 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, BUG_ON(rc->stage == UPDATE_DATA_PTRS && root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - if (buf == root->node) - __update_reloc_root(root, cow->start); - } - level = btrfs_header_level(buf); if (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item)) -- GitLab From 4eb9d5bc7065b263edb2afee53c5d6cdf2487164 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 30 Jan 2020 22:16:40 -0800 Subject: [PATCH 0908/1278] uapi: rename ext2_swab() to swab() and share globally in swab.h [ Upstream commit d5767057c9a76a29f073dad66b7fa12a90e8c748 ] ext2_swab() is defined locally in lib/find_bit.c However it is not specific to ext2, neither to bitmaps. There are many potential users of it, so rename it to just swab() and move to include/uapi/linux/swab.h ABI guarantees that size of unsigned long corresponds to BITS_PER_LONG, therefore drop unneeded cast. Link: http://lkml.kernel.org/r/20200103202846.21616-1-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Allison Randal Cc: Joe Perches Cc: Thomas Gleixner Cc: William Breathitt Gray Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/swab.h | 1 + include/uapi/linux/swab.h | 10 ++++++++++ lib/find_bit.c | 16 ++-------------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include/linux/swab.h b/include/linux/swab.h index e466fd159c85..bcff5149861a 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -7,6 +7,7 @@ # define swab16 __swab16 # define swab32 __swab32 # define swab64 __swab64 +# define swab __swab # define swahw32 __swahw32 # define swahb32 __swahb32 # define swab16p __swab16p diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 23cd84868cc3..fa7f97da5b76 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -4,6 +4,7 @@ #include #include +#include #include /* @@ -132,6 +133,15 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) __fswab64(x)) #endif +static __always_inline unsigned long __swab(const unsigned long y) +{ +#if BITS_PER_LONG == 64 + return __swab64(y); +#else /* BITS_PER_LONG == 32 */ + return __swab32(y); +#endif +} + /** * __swahw32 - return a word-swapped 32-bit value * @x: value to wordswap diff --git a/lib/find_bit.c b/lib/find_bit.c index 6ed74f78380c..883ef3755a1c 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -133,18 +133,6 @@ EXPORT_SYMBOL(find_last_bit); #ifdef __BIG_ENDIAN -/* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swab(const unsigned long y) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64((u64) y); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32((u32) y); -#else -#error BITS_PER_LONG not defined -#endif -} - #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) static unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long nbits, unsigned long start, unsigned long invert) @@ -157,7 +145,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, tmp = addr[start / BITS_PER_LONG] ^ invert; /* Handle 1st word. */ - tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); + tmp &= swab(BITMAP_FIRST_WORD_MASK(start)); start = round_down(start, BITS_PER_LONG); while (!tmp) { @@ -168,7 +156,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, tmp = addr[start / BITS_PER_LONG] ^ invert; } - return min(start + __ffs(ext2_swab(tmp)), nbits); + return min(start + __ffs(swab(tmp)), nbits); } #endif -- GitLab From 809d05196ea3512931c31d62bca7ca26dbee2588 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 26 Mar 2020 11:26:18 +0800 Subject: [PATCH 0909/1278] misc: rtsx: set correct pcr_ops for rts522A [ Upstream commit 10cea23b6aae15e8324f4101d785687f2c514fe5 ] rts522a should use rts522a_pcr_ops, which is diffrent with rts5227 in phy/hw init setting. Fixes: ce6a5acc9387 ("mfd: rtsx: Add support for rts522A") Signed-off-by: YueHaibing Cc: stable Link: https://lore.kernel.org/r/20200326032618.20472-1-yuehaibing@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/mfd/rts5227.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/rts5227.c b/drivers/mfd/rts5227.c index ff296a4bf3d2..dc6a9432a4b6 100644 --- a/drivers/mfd/rts5227.c +++ b/drivers/mfd/rts5227.c @@ -369,6 +369,7 @@ static const struct pcr_ops rts522a_pcr_ops = { void rts522a_init_params(struct rtsx_pcr *pcr) { rts5227_init_params(pcr); + pcr->ops = &rts522a_pcr_ops; pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; } -- GitLab From 2a77146ab15518e6090a4e4b5a64e9a1436e42cf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Apr 2020 21:04:23 -0700 Subject: [PATCH 0910/1278] slub: improve bit diffusion for freelist ptr obfuscation commit 1ad53d9fa3f6168ebcf48a50e08b170432da2257 upstream. Under CONFIG_SLAB_FREELIST_HARDENED=y, the obfuscation was relatively weak in that the ptr and ptr address were usually so close that the first XOR would result in an almost entirely 0-byte value[1], leaving most of the "secret" number ultimately being stored after the third XOR. A single blind memory content exposure of the freelist was generally sufficient to learn the secret. Add a swab() call to mix bits a little more. This is a cheap way (1 cycle) to make attacks need more than a single exposure to learn the secret (or to know _where_ the exposure is in memory). kmalloc-32 freelist walk, before: ptr ptr_addr stored value secret ffff90c22e019020@ffff90c22e019000 is 86528eb656b3b5bd (86528eb656b3b59d) ffff90c22e019040@ffff90c22e019020 is 86528eb656b3b5fd (86528eb656b3b59d) ffff90c22e019060@ffff90c22e019040 is 86528eb656b3b5bd (86528eb656b3b59d) ffff90c22e019080@ffff90c22e019060 is 86528eb656b3b57d (86528eb656b3b59d) ffff90c22e0190a0@ffff90c22e019080 is 86528eb656b3b5bd (86528eb656b3b59d) ... after: ptr ptr_addr stored value secret ffff9eed6e019020@ffff9eed6e019000 is 793d1135d52cda42 (86528eb656b3b59d) ffff9eed6e019040@ffff9eed6e019020 is 593d1135d52cda22 (86528eb656b3b59d) ffff9eed6e019060@ffff9eed6e019040 is 393d1135d52cda02 (86528eb656b3b59d) ffff9eed6e019080@ffff9eed6e019060 is 193d1135d52cdae2 (86528eb656b3b59d) ffff9eed6e0190a0@ffff9eed6e019080 is f93d1135d52cdac2 (86528eb656b3b59d) [1] https://blog.infosectcbr.com.au/2020/03/weaknesses-in-linux-kernel-heap.html Fixes: 2482ddec670f ("mm: add SLUB free list pointer obfuscation") Reported-by: Silvio Cesare Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Link: http://lkml.kernel.org/r/202003051623.AF4F8CB@keescook Signed-off-by: Linus Torvalds [kees: Backport to v4.19 which doesn't call kasan_reset_untag()] Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 958a8f7a3c25..3c1a16f03b2b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -248,7 +248,7 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, unsigned long ptr_addr) { #ifdef CONFIG_SLAB_FREELIST_HARDENED - return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr); + return (void *)((unsigned long)ptr ^ s->random ^ swab(ptr_addr)); #else return ptr; #endif -- GitLab From bfe67beb9ef1c50e1fddc309aaec7c4c49dcfe75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Mon, 30 Mar 2020 16:35:59 +0900 Subject: [PATCH 0911/1278] ASoC: fix regwmask commit 0ab070917afdc93670c2d0ea02ab6defb6246a7c upstream. If regwshift is 32 and the selected architecture compiles '<<' operator for signed int literal into rotating shift, '1< Link: https://lore.kernel.org/r/001001d60665$db7af3e0$9270dba0$@samsung.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 500f98c730b9..d5ef627e93be 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -837,7 +837,7 @@ int snd_soc_get_xr_sx(struct snd_kcontrol *kcontrol, unsigned int regbase = mc->regbase; unsigned int regcount = mc->regcount; unsigned int regwshift = component->val_bytes * BITS_PER_BYTE; - unsigned int regwmask = (1<invert; unsigned long mask = (1UL<nbits)-1; long min = mc->min; @@ -886,7 +886,7 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned int regbase = mc->regbase; unsigned int regcount = mc->regcount; unsigned int regwshift = component->val_bytes * BITS_PER_BYTE; - unsigned int regwmask = (1<invert; unsigned long mask = (1UL<nbits)-1; long max = mc->max; -- GitLab From ee2e66295f5fcbe193c566ba9077f8dde8fe95d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Tue, 31 Mar 2020 16:55:16 +0900 Subject: [PATCH 0912/1278] ASoC: dapm: connect virtual mux with default value commit 3bbbb7728fc853d71dbce4073fef9f281fbfb4dd upstream. Since a virtual mixer has no backing registers to decide which path to connect, it will try to match with initial state. This is to ensure that the default mixer choice will be correctly powered up during initialization. Invert flag is used to select initial state of the virtual switch. Since actual hardware can't be disconnected by virtual switch, connected is better choice as initial state in many cases. Signed-off-by: Gyeongtaek Lee Link: https://lore.kernel.org/r/01a301d60731$b724ea10$256ebe30$@samsung.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index fb2fef166672..7861cf7a4488 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -799,7 +799,13 @@ static void dapm_set_mixer_path_status(struct snd_soc_dapm_path *p, int i, val = max - val; p->connect = !!val; } else { - p->connect = 0; + /* since a virtual mixer has no backing registers to + * decide which path to connect, it will try to match + * with initial state. This is to ensure + * that the default mixer choice will be + * correctly powered up during initialization. + */ + p->connect = invert; } } -- GitLab From 11f9bc47e5ecd98304d234291e7ead6f20491164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Wed, 1 Apr 2020 10:04:21 +0900 Subject: [PATCH 0913/1278] ASoC: dpcm: allow start or stop during pause for backend commit 21fca8bdbb64df1297e8c65a746c4c9f4a689751 upstream. soc_compr_trigger_fe() allows start or stop after pause_push. In dpcm_be_dai_trigger(), however, only pause_release is allowed command after pause_push. So, start or stop after pause in compress offload is always returned as error if the compress offload is used with dpcm. To fix the problem, SND_SOC_DPCM_STATE_PAUSED should be allowed for start or stop command. Signed-off-by: Gyeongtaek Lee Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/004d01d607c1$7a3d5250$6eb7f6f0$@samsung.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index e75822dd9930..fd4b71729eed 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2048,7 +2048,8 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, switch (cmd) { case SNDRV_PCM_TRIGGER_START: if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP)) + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue; ret = dpcm_do_trigger(dpcm, be_substream, cmd); @@ -2078,7 +2079,8 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, be->dpcm[stream].state = SND_SOC_DPCM_STATE_START; break; case SNDRV_PCM_TRIGGER_STOP: - if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) + if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue; if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream)) -- GitLab From bdf7505f814a6aaea188a3c075979be36ee35c59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Wed, 1 Apr 2020 18:05:24 +0900 Subject: [PATCH 0914/1278] ASoC: topology: use name_prefix for new kcontrol commit abca9e4a04fbe9c6df4d48ca7517e1611812af25 upstream. Current topology doesn't add prefix of component to new kcontrol. Signed-off-by: Gyeongtaek Lee Link: https://lore.kernel.org/r/009b01d60804$ae25c2d0$0a714870$@samsung.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 1a912f72bddd..a215b9ad148c 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -421,7 +421,7 @@ static int soc_tplg_add_kcontrol(struct soc_tplg *tplg, struct snd_soc_component *comp = tplg->comp; return soc_tplg_add_dcontrol(comp->card->snd_card, - comp->dev, k, NULL, comp, kcontrol); + comp->dev, k, comp->name_prefix, comp, kcontrol); } /* remove a mixer kcontrol */ -- GitLab From 03ee78583a9e1a7e52819a2f95ef6c9f70397dbb Mon Sep 17 00:00:00 2001 From: Sriharsha Allenki Date: Thu, 26 Mar 2020 17:26:20 +0530 Subject: [PATCH 0915/1278] usb: gadget: f_fs: Fix use after free issue as part of queue failure commit f63ec55ff904b2f2e126884fcad93175f16ab4bb upstream. In AIO case, the request is freed up if ep_queue fails. However, io_data->req still has the reference to this freed request. In the case of this failure if there is aio_cancel call on this io_data it will lead to an invalid dequeue operation and a potential use after free issue. Fix this by setting the io_data->req to NULL when the request is freed as part of queue failure. Fixes: 2e4c7553cd6f ("usb: gadget: f_fs: add aio support") Signed-off-by: Sriharsha Allenki CC: stable Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20200326115620.12571-1-sallenki@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 282396e8eec6..819fd77a2da4 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1037,6 +1037,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC); if (unlikely(ret)) { + io_data->req = NULL; usb_ep_free_request(ep->ep, req); goto error_lock; } -- GitLab From e591d7ca6e9cd24bff5fcef70d65544585656488 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 3 Feb 2020 18:05:32 -0800 Subject: [PATCH 0916/1278] usb: gadget: composite: Inform controller driver of self-powered commit 5e5caf4fa8d3039140b4548b6ab23dd17fce9b2c upstream. Different configuration/condition may draw different power. Inform the controller driver of the change so it can respond properly (e.g. GET_STATUS request). This fixes an issue with setting MaxPower from configfs. The composite driver doesn't check this value when setting self-powered. Cc: stable@vger.kernel.org Fixes: 88af8bbe4ef7 ("usb: gadget: the start of the configfs interface") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 5a4cf779b269..c1f037af9702 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -850,6 +850,11 @@ static int set_config(struct usb_composite_dev *cdev, else power = min(power, 900U); done: + if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) + usb_gadget_set_selfpowered(gadget); + else + usb_gadget_clear_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) result = USB_GADGET_DELAYED_STATUS; @@ -2281,6 +2286,7 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, 2); } @@ -2309,6 +2315,9 @@ void composite_resume(struct usb_gadget *gadget) else maxpower = min(maxpower, 900U); + if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW) + usb_gadget_clear_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, maxpower); } -- GitLab From cedb741414ac69775b04d39f8ce881501e45f21b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Apr 2020 16:04:48 +0200 Subject: [PATCH 0917/1278] ALSA: usb-audio: Add mixer workaround for TRX40 and co commit 2a48218f8e23d47bd3e23cfdfb8aa9066f7dc3e6 upstream. Some recent boards (supposedly with a new AMD platform) contain the USB audio class 2 device that is often tied with HD-audio. The device exposes an Input Gain Pad control (id=19, control=12) but this node doesn't behave correctly, returning an error for each inquiry of GET_MIN and GET_MAX that should have been mandatory. As a workaround, simply ignore this node by adding a usbmix_name_map table entry. The currently known devices are: * 0414:a002 - Gigabyte TRX40 Aorus Pro WiFi * 0b05:1916 - ASUS ROG Zenith II * 0b05:1917 - ASUS ROG Strix * 0db0:0d64 - MSI TRX40 Creator * 0db0:543d - MSI TRX40 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206543 Cc: Link: https://lore.kernel.org/r/20200408140449.22319-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_maps.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index eaa03acd4686..26ce6838e842 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -363,6 +363,14 @@ static const struct usbmix_name_map dell_alc4020_map[] = { { 0 } }; +/* Some mobos shipped with a dummy HD-audio show the invalid GET_MIN/GET_MAX + * response for Input Gain Pad (id=19, control=12). Skip it. + */ +static const struct usbmix_name_map asus_rog_map[] = { + { 19, NULL, 12 }, /* FU, Input Gain Pad */ + {} +}; + /* * Control map entries */ @@ -482,6 +490,26 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x1020), .map = bose_companion5_map, }, + { /* Gigabyte TRX40 Aorus Pro WiFi */ + .id = USB_ID(0x0414, 0xa002), + .map = asus_rog_map, + }, + { /* ASUS ROG Zenith II */ + .id = USB_ID(0x0b05, 0x1916), + .map = asus_rog_map, + }, + { /* ASUS ROG Strix */ + .id = USB_ID(0x0b05, 0x1917), + .map = asus_rog_map, + }, + { /* MSI TRX40 Creator */ + .id = USB_ID(0x0db0, 0x0d64), + .map = asus_rog_map, + }, + { /* MSI TRX40 */ + .id = USB_ID(0x0db0, 0x543d), + .map = asus_rog_map, + }, { 0 } /* terminator */ }; -- GitLab From 7b53723ea89cb3754dcb1d7e8b3b305b650cb532 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Apr 2020 16:04:49 +0200 Subject: [PATCH 0918/1278] ALSA: hda: Add driver blacklist commit 3c6fd1f07ed03a04debbb9a9d782205f1ef5e2ab upstream. The recent AMD platform exposes an HD-audio bus but without any actual codecs, which is internally tied with a USB-audio device, supposedly. It results in "no codecs" error of HD-audio bus driver, and it's nothing but a waste of resources. This patch introduces a static blacklist table for skipping such a known bogus PCI SSID entry. As of writing this patch, the known SSIDs are: * 1043:874f - ASUS ROG Zenith II / Strix * 1462:cb59 - MSI TRX40 Creator * 1462:cb60 - MSI TRX40 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206543 Cc: Link: https://lore.kernel.org/r/20200408140449.22319-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 890793ad85ca..5729e1fe69e9 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2177,6 +2177,17 @@ static const struct hdac_io_ops pci_hda_io_ops = { .dma_free_pages = dma_free_pages, }; +/* Blacklist for skipping the whole probe: + * some HD-audio PCI entries are exposed without any codecs, and such devices + * should be ignored from the beginning. + */ +static const struct snd_pci_quirk driver_blacklist[] = { + SND_PCI_QUIRK(0x1043, 0x874f, "ASUS ROG Zenith II / Strix", 0), + SND_PCI_QUIRK(0x1462, 0xcb59, "MSI TRX40 Creator", 0), + SND_PCI_QUIRK(0x1462, 0xcb60, "MSI TRX40", 0), + {} +}; + static const struct hda_controller_ops pci_hda_ops = { .disable_msi_reset_irq = disable_msi_reset_irq, .substream_alloc_pages = substream_alloc_pages, @@ -2196,6 +2207,11 @@ static int azx_probe(struct pci_dev *pci, bool schedule_probe; int err; + if (snd_pci_quirk_lookup(pci, driver_blacklist)) { + dev_info(&pci->dev, "Skipping the blacklisted device\n"); + return -ENODEV; + } + if (dev >= SNDRV_CARDS) return -ENODEV; if (!enable[dev]) { -- GitLab From cbeab3f615cf5d76e266e689b6ea19e0bb59a6a9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Apr 2020 10:44:01 +0200 Subject: [PATCH 0919/1278] ALSA: hda: Fix potential access overflow in beep helper commit 0ad3f0b384d58f3bd1f4fb87d0af5b8f6866f41a upstream. The beep control helper function blindly stores the values in two stereo channels no matter whether the actual control is mono or stereo. This is practically harmless, but it annoys the recently introduced sanity check, resulting in an error when the checker is enabled. This patch corrects the behavior to store only on the defined array member. Fixes: 0401e8548eac ("ALSA: hda - Move beep helper functions to hda_beep.c") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207139 Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20200407084402.25589-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_beep.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c index c397e7da0eac..7ccfb09535e1 100644 --- a/sound/pci/hda/hda_beep.c +++ b/sound/pci/hda/hda_beep.c @@ -310,8 +310,12 @@ int snd_hda_mixer_amp_switch_get_beep(struct snd_kcontrol *kcontrol, { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); struct hda_beep *beep = codec->beep; + int chs = get_amp_channels(kcontrol); + if (beep && (!beep->enabled || !ctl_has_mute(kcontrol))) { - ucontrol->value.integer.value[0] = + if (chs & 1) + ucontrol->value.integer.value[0] = beep->enabled; + if (chs & 2) ucontrol->value.integer.value[1] = beep->enabled; return 0; } -- GitLab From c9f2ce5f04b6a407623e0f270cb345fdd6a351d1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Apr 2020 10:44:02 +0200 Subject: [PATCH 0920/1278] ALSA: ice1724: Fix invalid access for enumerated ctl items commit c47914c00be346bc5b48c48de7b0da5c2d1a296c upstream. The access to Analog Capture Source control value implemented in prodigy_hifi.c is wrong, as caught by the recently introduced sanity check; it should be accessing value.enumerated.item[] instead of value.integer.value[]. This patch corrects the wrong access pattern. Fixes: 6b8d6e5518e2 ("[ALSA] ICE1724: Added support for Audiotrak Prodigy 7.1 HiFi & HD2, Hercules Fortissimo IV") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207139 Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20200407084402.25589-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ice1712/prodigy_hifi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/ice1712/prodigy_hifi.c b/sound/pci/ice1712/prodigy_hifi.c index 2697402b5195..41f6450a2539 100644 --- a/sound/pci/ice1712/prodigy_hifi.c +++ b/sound/pci/ice1712/prodigy_hifi.c @@ -569,7 +569,7 @@ static int wm_adc_mux_enum_get(struct snd_kcontrol *kcontrol, struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol); mutex_lock(&ice->gpio_mutex); - ucontrol->value.integer.value[0] = wm_get(ice, WM_ADC_MUX) & 0x1f; + ucontrol->value.enumerated.item[0] = wm_get(ice, WM_ADC_MUX) & 0x1f; mutex_unlock(&ice->gpio_mutex); return 0; } @@ -583,7 +583,7 @@ static int wm_adc_mux_enum_put(struct snd_kcontrol *kcontrol, mutex_lock(&ice->gpio_mutex); oval = wm_get(ice, WM_ADC_MUX); - nval = (oval & 0xe0) | ucontrol->value.integer.value[0]; + nval = (oval & 0xe0) | ucontrol->value.enumerated.item[0]; if (nval != oval) { wm_put(ice, WM_ADC_MUX, nval); change = 1; -- GitLab From b53356c9939bd672040a3aaa6e201d21cc06e6ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 3 Apr 2020 09:25:15 +0200 Subject: [PATCH 0921/1278] ALSA: pcm: oss: Fix regression by buffer overflow fix commit ae769d3556644888c964635179ef192995f40793 upstream. The recent fix for the OOB access in PCM OSS plugins (commit f2ecf903ef06: "ALSA: pcm: oss: Avoid plugin buffer overflow") caused a regression on OSS applications. The patch introduced the size check in client and slave size calculations to limit to each plugin's buffer size, but I overlooked that some code paths call those without allocating the buffer but just for estimation. This patch fixes the bug by skipping the size check for those code paths while keeping checking in the actual transfer calls. Fixes: f2ecf903ef06 ("ALSA: pcm: oss: Avoid plugin buffer overflow") Tested-and-reported-by: Jari Ruusu Cc: Link: https://lore.kernel.org/r/20200403072515.25539-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_plugin.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 40d2d39151bf..3ecc070738e8 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -196,7 +196,9 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) return 0; } -snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t drv_frames) +static snd_pcm_sframes_t plug_client_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t drv_frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev, *plugin_next; int stream; @@ -209,7 +211,7 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_last(plug); while (plugin && drv_frames > 0) { - if (drv_frames > plugin->buf_frames) + if (check_size && drv_frames > plugin->buf_frames) drv_frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) @@ -222,7 +224,7 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p plugin_next = plugin->next; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); - if (drv_frames > plugin->buf_frames) + if (check_size && drv_frames > plugin->buf_frames) drv_frames = plugin->buf_frames; plugin = plugin_next; } @@ -231,7 +233,9 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p return drv_frames; } -snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t clt_frames) +static snd_pcm_sframes_t plug_slave_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t clt_frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev, *plugin_next; snd_pcm_sframes_t frames; @@ -252,14 +256,14 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc if (frames < 0) return frames; } - if (frames > plugin->buf_frames) + if (check_size && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin = plugin_next; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_last(plug); while (plugin) { - if (frames > plugin->buf_frames) + if (check_size && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { @@ -274,6 +278,18 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc return frames; } +snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t drv_frames) +{ + return plug_client_size(plug, drv_frames, false); +} + +snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t clt_frames) +{ + return plug_slave_size(plug, clt_frames, false); +} + static int snd_pcm_plug_formats(const struct snd_mask *mask, snd_pcm_format_t format) { @@ -629,7 +645,7 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st src_channels = dst_channels; plugin = next; } - return snd_pcm_plug_client_size(plug, frames); + return plug_client_size(plug, frames, true); } snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, struct snd_pcm_plugin_channel *dst_channels_final, snd_pcm_uframes_t size) @@ -639,7 +655,7 @@ snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, str snd_pcm_sframes_t frames = size; int err; - frames = snd_pcm_plug_slave_size(plug, frames); + frames = plug_slave_size(plug, frames, true); if (frames < 0) return frames; -- GitLab From f47413b98c0a6378760205657bfff1da5e8da11f Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Mon, 30 Mar 2020 12:09:37 -0400 Subject: [PATCH 0922/1278] ALSA: doc: Document PC Beep Hidden Register on Realtek ALC256 commit f128090491c3f5aacef91a863f8c52abf869c436 upstream. This codec (among others) has a hidden set of audio routes, apparently designed to allow PC Beep output without a mixer widget on the output path, which are controlled by an undocumented Realtek vendor register. The default configuration of these routes means that certain inputs aren't accessible, necessitating driver control of the register. However, Realtek has provided no documentation of the register, instead opting to fix issues by providing magic numbers, most of which have been at least somewhat erroneous. These magic numbers then get copied by others into model-specific fixups, leading to a fragmented and buggy set of configurations. To get out of this situation, I've reverse engineered the register by flipping bits and observing how the codec's behavior changes. This commit documents my findings. It does not change any code. Cc: stable@vger.kernel.org Signed-off-by: Thomas Hebb Link: https://lore.kernel.org/r/bd69dfdeaf40ff31c4b7b797c829bb320031739c.1585584498.git.tommyhebb@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- Documentation/sound/hd-audio/index.rst | 1 + .../sound/hd-audio/realtek-pc-beep.rst | 129 ++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 Documentation/sound/hd-audio/realtek-pc-beep.rst diff --git a/Documentation/sound/hd-audio/index.rst b/Documentation/sound/hd-audio/index.rst index f8a72ffffe66..6e12de9fc34e 100644 --- a/Documentation/sound/hd-audio/index.rst +++ b/Documentation/sound/hd-audio/index.rst @@ -8,3 +8,4 @@ HD-Audio models controls dp-mst + realtek-pc-beep diff --git a/Documentation/sound/hd-audio/realtek-pc-beep.rst b/Documentation/sound/hd-audio/realtek-pc-beep.rst new file mode 100644 index 000000000000..be47c6f76a6e --- /dev/null +++ b/Documentation/sound/hd-audio/realtek-pc-beep.rst @@ -0,0 +1,129 @@ +=============================== +Realtek PC Beep Hidden Register +=============================== + +This file documents the "PC Beep Hidden Register", which is present in certain +Realtek HDA codecs and controls a muxer and pair of passthrough mixers that can +route audio between pins but aren't themselves exposed as HDA widgets. As far +as I can tell, these hidden routes are designed to allow flexible PC Beep output +for codecs that don't have mixer widgets in their output paths. Why it's easier +to hide a mixer behind an undocumented vendor register than to just expose it +as a widget, I have no idea. + +Register Description +==================== + +The register is accessed via processing coefficient 0x36 on NID 20h. Bits not +identified below have no discernible effect on my machine, a Dell XPS 13 9350:: + + MSB LSB + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | |h|S|L| | B |R| | Known bits + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + |0|0|1|1| 0x7 |0|0x0|1| 0x7 | Reset value + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +1Ah input select (B): 2 bits + When zero, expose the PC Beep line (from the internal beep generator, when + enabled with the Set Beep Generation verb on NID 01h, or else from the + external PCBEEP pin) on the 1Ah pin node. When nonzero, expose the headphone + jack (or possibly Line In on some machines) input instead. If PC Beep is + selected, the 1Ah boost control has no effect. + +Amplify 1Ah loopback, left (L): 1 bit + Amplify the left channel of 1Ah before mixing it into outputs as specified + by h and S bits. Does not affect the level of 1Ah exposed to other widgets. + +Amplify 1Ah loopback, right (R): 1 bit + Amplify the right channel of 1Ah before mixing it into outputs as specified + by h and S bits. Does not affect the level of 1Ah exposed to other widgets. + +Loopback 1Ah to 21h [active low] (h): 1 bit + When zero, mix 1Ah (possibly with amplification, depending on L and R bits) + into 21h (headphone jack on my machine). Mixed signal respects the mute + setting on 21h. + +Loopback 1Ah to 14h (S): 1 bit + When one, mix 1Ah (possibly with amplification, depending on L and R bits) + into 14h (internal speaker on my machine). Mixed signal **ignores** the mute + setting on 14h and is present whenever 14h is configured as an output. + +Path diagrams +============= + +1Ah input selection (DIV is the PC Beep divider set on NID 01h):: + + + | | | + +--DIV--+--!DIV--+ {1Ah boost control} + | | + +--(b == 0)--+--(b != 0)--+ + | + >1Ah (Beep/Headphone Mic/Line In)< + +Loopback of 1Ah to 21h/14h:: + + <1Ah (Beep/Headphone Mic/Line In)> + | + {amplify if L/R} + | + +-----!h-----+-----S-----+ + | | + {21h mute control} | + | | + >21h (Headphone)< >14h (Internal Speaker)< + +Background +========== + +All Realtek HDA codecs have a vendor-defined widget with node ID 20h which +provides access to a bank of registers that control various codec functions. +Registers are read and written via the standard HDA processing coefficient +verbs (Set/Get Coefficient Index, Set/Get Processing Coefficient). The node is +named "Realtek Vendor Registers" in public datasheets' verb listings and, +apart from that, is entirely undocumented. + +This particular register, exposed at coefficient 0x36 and named in commits from +Realtek, is of note: unlike most registers, which seem to control detailed +amplifier parameters not in scope of the HDA specification, it controls audio +routing which could just as easily have been defined using standard HDA mixer +and selector widgets. + +Specifically, it selects between two sources for the input pin widget with Node +ID (NID) 1Ah: the widget's signal can come either from an audio jack (on my +laptop, a Dell XPS 13 9350, it's the headphone jack, but comments in Realtek +commits indicate that it might be a Line In on some machines) or from the PC +Beep line (which is itself multiplexed between the codec's internal beep +generator and external PCBEEP pin, depending on if the beep generator is +enabled via verbs on NID 01h). Additionally, it can mix (with optional +amplification) that signal onto the 21h and/or 14h output pins. + +The register's reset value is 0x3717, corresponding to PC Beep on 1Ah that is +then amplified and mixed into both the headphones and the speakers. Not only +does this violate the HDA specification, which says that "[a vendor defined +beep input pin] connection may be maintained *only* while the Link reset +(**RST#**) is asserted", it means that we cannot ignore the register if we care +about the input that 1Ah would otherwise expose or if the PCBEEP trace is +poorly shielded and picks up chassis noise (both of which are the case on my +machine). + +Unfortunately, there are lots of ways to get this register configuration wrong. +Linux, it seems, has gone through most of them. For one, the register resets +after S3 suspend: judging by existing code, this isn't the case for all vendor +registers, and it's led to some fixes that improve behavior on cold boot but +don't last after suspend. Other fixes have successfully switched the 1Ah input +away from PC Beep but have failed to disable both loopback paths. On my +machine, this means that the headphone input is amplified and looped back to +the headphone output, which uses the exact same pins! As you might expect, this +causes terrible headphone noise, the character of which is controlled by the +1Ah boost control. (If you've seen instructions online to fix XPS 13 headphone +noise by changing "Headphone Mic Boost" in ALSA, now you know why.) + +The information here has been obtained through black-box reverse engineering of +the ALC256 codec's behavior and is not guaranteed to be correct. It likely +also applies for the ALC255, ALC257, ALC235, and ALC236, since those codecs +seem to be close relatives of the ALC256. (They all share one initialization +function.) Additionally, other codecs like the ALC225 and ALC285 also have this +register, judging by existing fixups in ``patch_realtek.c``, but specific +data (e.g. node IDs, bit positions, pin mappings) for those codecs may differ +from what I've described here. -- GitLab From c3c960145eb8942de21af2e18cc4e1512bebec4b Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Mon, 30 Mar 2020 12:09:38 -0400 Subject: [PATCH 0923/1278] ALSA: hda/realtek - Set principled PC Beep configuration for ALC256 commit c44737449468a0bdc50e09ec75e530f208391561 upstream. The Realtek PC Beep Hidden Register[1] is currently set by patch_realtek.c in two different places: In alc_fill_eapd_coef(), it's set to the value 0x5757, corresponding to non-beep input on 1Ah and no 1Ah loopback to either headphones or speakers. (Although, curiously, the loopback amp is still enabled.) This write was added fairly recently by commit e3743f431143 ("ALSA: hda/realtek - Dell headphone has noise on unmute for ALC236") and is a safe default. However, it happens in the wrong place: alc_fill_eapd_coef() runs on module load and cold boot but not on S3 resume, meaning the register loses its value after suspend. Conversely, in alc256_init(), the register is updated to unset bit 13 (disable speaker loopback) and set bit 5 (set non-beep input on 1Ah). Although this write does run on S3 resume, it's not quite enough to fix up the register's default value of 0x3717. What's missing is a set of bit 14 to disable headphone loopback. Without that, we end up with a feedback loop where the headphone jack is being driven by amplified samples of itself[2]. This change eliminates the update in alc256_init() and replaces it with the 0x5757 write from alc_fill_eapd_coef(). Kailang says that 0x5757 is supposed to be the codec's default value, so using it will make debugging easier for Realtek. Affects the ALC255, ALC256, ALC257, ALC235, and ALC236 codecs. [1] Newly documented in Documentation/sound/hd-audio/realtek-pc-beep.rst [2] Setting the "Headphone Mic Boost" control from userspace changes this feedback loop and has been a widely-shared workaround for headphone noise on laptops like the Dell XPS 13 9350. This commit eliminates the feedback loop and makes the workaround unnecessary. Fixes: e1e8c1fdce8b ("ALSA: hda/realtek - Dell headphone has noise on unmute for ALC236") Cc: stable@vger.kernel.org Signed-off-by: Thomas Hebb Link: https://lore.kernel.org/r/bf22b417d1f2474b12011c2a39ed6cf8b06d3bf5.1585584498.git.tommyhebb@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b500dad33ea9..3fded87817c6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -333,7 +333,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0233: case 0x10ec0235: + case 0x10ec0236: case 0x10ec0255: + case 0x10ec0256: case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: @@ -345,11 +347,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; - case 0x10ec0236: - case 0x10ec0256: - alc_write_coef_idx(codec, 0x36, 0x5757); - alc_update_coef_idx(codec, 0x10, 1<<9, 0); - break; case 0x10ec0275: alc_update_coef_idx(codec, 0xe, 0, 1<<0); break; @@ -3122,7 +3119,13 @@ static void alc256_init(struct hda_codec *codec) alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 1 << 15); /* Clear bit */ alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 0 << 15); - alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/ + /* + * Expose headphone mic (or possibly Line In on some machines) instead + * of PC Beep on 1Ah, and disable 1Ah loopback for all outputs. See + * Documentation/sound/hd-audio/realtek-pc-beep.rst for details of + * this register. + */ + alc_write_coef_idx(codec, 0x36, 0x5757); } static void alc256_shutup(struct hda_codec *codec) -- GitLab From 49174d5d54b2d0b1b11a807d6b06b507ad715bd9 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 2 Mar 2020 14:56:52 +0100 Subject: [PATCH 0924/1278] media: ti-vpe: cal: fix disable_irqs to only the intended target commit 1db56284b9da9056093681f28db48a09a243274b upstream. disable_irqs() was mistakenly disabling all interrupts when called. This cause all port stream to stop even if only stopping one of them. Cc: stable Signed-off-by: Benoit Parrot Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/ti-vpe/cal.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/ti-vpe/cal.c b/drivers/media/platform/ti-vpe/cal.c index 42e383a48ffe..b6dcae1ecc1b 100644 --- a/drivers/media/platform/ti-vpe/cal.c +++ b/drivers/media/platform/ti-vpe/cal.c @@ -544,16 +544,16 @@ static void enable_irqs(struct cal_ctx *ctx) static void disable_irqs(struct cal_ctx *ctx) { + u32 val; + /* Disable IRQ_WDMA_END 0/1 */ - reg_write_field(ctx->dev, - CAL_HL_IRQENABLE_CLR(2), - CAL_HL_IRQ_CLEAR, - CAL_HL_IRQ_MASK(ctx->csi2_port)); + val = 0; + set_field(&val, CAL_HL_IRQ_CLEAR, CAL_HL_IRQ_MASK(ctx->csi2_port)); + reg_write(ctx->dev, CAL_HL_IRQENABLE_CLR(2), val); /* Disable IRQ_WDMA_START 0/1 */ - reg_write_field(ctx->dev, - CAL_HL_IRQENABLE_CLR(3), - CAL_HL_IRQ_CLEAR, - CAL_HL_IRQ_MASK(ctx->csi2_port)); + val = 0; + set_field(&val, CAL_HL_IRQ_CLEAR, CAL_HL_IRQ_MASK(ctx->csi2_port)); + reg_write(ctx->dev, CAL_HL_IRQENABLE_CLR(3), val); /* Todo: Add VC_IRQ and CSI2_COMPLEXIO_IRQ handling */ reg_write(ctx->dev, CAL_CSI2_VC_IRQENABLE(1), 0); } -- GitLab From c9c13860da59b8b80224cada557eb36f4a4ac3b2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 5 Mar 2020 13:24:25 +0100 Subject: [PATCH 0925/1278] acpi/x86: ignore unspecified bit positions in the ACPI global lock field commit ecb9c790999fd6c5af0f44783bd0217f0b89ec2b upstream. The value in "new" is constructed from "old" such that all bits defined as reserved by the ACPI spec[1] are left untouched. But if those bits do not happen to be all zero, "new < 3" will not evaluate to true. The firmware of the laptop(s) Medion MD63490 / Akoya P15648 comes with garbage inside the "FACS" ACPI table. The starting value is old=0x4944454d, therefore new=0x4944454e, which is >= 3. Mask off the reserved bits. [1] https://uefi.org/sites/default/files/resources/ACPI_6_2.pdf Link: https://bugzilla.kernel.org/show_bug.cgi?id=206553 Cc: All applicable Signed-off-by: Jan Engelhardt Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6dda3595acf8..40d7072be709 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1738,7 +1738,7 @@ int __acpi_acquire_global_lock(unsigned int *lock) new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); val = cmpxchg(lock, old, new); } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; + return ((new & 0x3) < 3) ? -1 : 0; } int __acpi_release_global_lock(unsigned int *lock) -- GitLab From fa545a5f3bb5e6d2cd76dd776af529caa183fc7e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 3 Apr 2020 22:51:33 +0200 Subject: [PATCH 0926/1278] thermal: devfreq_cooling: inline all stubs for CONFIG_DEVFREQ_THERMAL=n commit 3f5b9959041e0db6dacbea80bb833bff5900999f upstream. When CONFIG_DEVFREQ_THERMAL is disabled all functions except of_devfreq_cooling_register_power() were already inlined. Also inline the last function to avoid compile errors when multiple drivers call of_devfreq_cooling_register_power() when CONFIG_DEVFREQ_THERMAL is not set. Compilation failed with the following message: multiple definition of `of_devfreq_cooling_register_power' (which then lists all usages of of_devfreq_cooling_register_power()) Thomas Zimmermann reported this problem [0] on a kernel config with CONFIG_DRM_LIMA={m,y}, CONFIG_DRM_PANFROST={m,y} and CONFIG_DEVFREQ_THERMAL=n after both, the lima and panfrost drivers gained devfreq cooling support. [0] https://www.spinics.net/lists/dri-devel/msg252825.html Fixes: a76caf55e5b356 ("thermal: Add devfreq cooling") Cc: stable@vger.kernel.org Reported-by: Thomas Zimmermann Signed-off-by: Martin Blumenstingl Tested-by: Thomas Zimmermann Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200403205133.1101808-1-martin.blumenstingl@googlemail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/devfreq_cooling.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 4635f95000a4..79a6e37a1d6f 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -75,7 +75,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); #else /* !CONFIG_DEVFREQ_THERMAL */ -struct thermal_cooling_device * +static inline struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power) { -- GitLab From 2e68019fb4e20281072f7b3dd4db863c9e9f01d5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 3 Apr 2020 07:33:20 -0700 Subject: [PATCH 0927/1278] nvme-fc: Revert "add module to ops template to allow module references" commit 8c5c660529209a0e324c1c1a35ce3f83d67a2aa5 upstream. The original patch was to resolve the lldd being able to be unloaded while being used to talk to the boot device of the system. However, the end result of the original patch is that any driver unload while a nvme controller is live via the lldd is now being prohibited. Given the module reference, the module teardown routine can't be called, thus there's no way, other than manual actions to terminate the controllers. Fixes: 863fbae929c7 ("nvme_fc: add module to ops template to allow module references") Cc: # v5.4+ Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/fc.c | 14 ++------------ drivers/nvme/target/fcloop.c | 1 - drivers/scsi/lpfc/lpfc_nvme.c | 2 -- drivers/scsi/qla2xxx/qla_nvme.c | 1 - include/linux/nvme-fc-driver.h | 4 ---- 5 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9e4d2ecf736d..058d542647dd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -337,8 +337,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary || - !template->module) { + !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; goto out_reghost_failed; } @@ -1763,7 +1762,6 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); - struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -1789,7 +1787,6 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); - module_put(lport->ops->module); } static void @@ -2768,15 +2765,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } - if (!try_module_get(lport->ops->module)) { - ret = -EUNATCH; - goto out_free_ctrl; - } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_mod_put; + goto out_free_ctrl; } ctrl->ctrl.opts = opts; @@ -2923,8 +2915,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); -out_mod_put: - module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index b8fe8702065b..096523d8dd42 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -693,7 +693,6 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { - .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index af937b91765e..fcf4b4175d77 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1591,8 +1591,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { - .module = THIS_MODULE, - /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 7dceed021236..6b33a1f24f56 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -578,7 +578,6 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { - .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index e9c3b98df3e2..a726f96010d5 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -279,8 +279,6 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * - * @module: The LLDD module using the interface - * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -394,8 +392,6 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { - struct module *module; - /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); -- GitLab From fac29b1f331dc4f87046282a365cfc21f856e40d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 13 Mar 2020 17:53:47 +0800 Subject: [PATCH 0928/1278] PCI/ASPM: Clear the correct bits when enabling L1 substates commit 58a3862a10a317a81097ab0c78aecebabb1704f5 upstream. In pcie_config_aspm_l1ss(), we cleared the wrong bits when enabling ASPM L1 Substates. Instead of the L1.x enable bits (PCI_L1SS_CTL1_L1SS_MASK, 0xf), we cleared the Link Activation Interrupt Enable bit (PCI_L1SS_CAP_L1_PM_SS, 0x10). Clear the L1.x enable bits before writing the new L1.x configuration. [bhelgaas: changelog] Fixes: aeda9adebab8 ("PCI/ASPM: Configure L1 substate settings") Link: https://lore.kernel.org/r/1584093227-1292-1-git-send-email-yangyicong@hisilicon.com Signed-off-by: Yicong Yang Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.11+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 6b4e82a4b64e..6f58767b5190 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -693,9 +693,9 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) /* Enable what we need to enable */ pci_clear_and_set_dword(parent, up_cap_ptr + PCI_L1SS_CTL1, - PCI_L1SS_CAP_L1_PM_SS, val); + PCI_L1SS_CTL1_L1SS_MASK, val); pci_clear_and_set_dword(child, dw_cap_ptr + PCI_L1SS_CTL1, - PCI_L1SS_CAP_L1_PM_SS, val); + PCI_L1SS_CTL1_L1SS_MASK, val); } static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) -- GitLab From 50a58e3542530eca3f3840acc89a151300ae397e Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 24 Feb 2020 15:23:36 +0530 Subject: [PATCH 0929/1278] PCI: endpoint: Fix for concurrent memory allocation in OB address region commit 04e046ca57ebed3943422dee10eec9e73aec081e upstream. pci-epc-mem uses a bitmap to manage the Endpoint outbound (OB) address region. This address region will be shared by multiple endpoint functions (in the case of multi function endpoint) and it has to be protected from concurrent access to avoid updating an inconsistent state. Use a mutex to protect bitmap updates to prevent the memory allocation API from returning incorrect addresses. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/pci-epc-mem.c | 10 ++++++++-- include/linux/pci-epc.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c index 83b7d5d3fc3e..60fbfe92e0ef 100644 --- a/drivers/pci/endpoint/pci-epc-mem.c +++ b/drivers/pci/endpoint/pci-epc-mem.c @@ -90,6 +90,7 @@ int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size, mem->page_size = page_size; mem->pages = pages; mem->size = size; + mutex_init(&mem->lock); epc->mem = mem; @@ -133,7 +134,7 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size) { int pageno; - void __iomem *virt_addr; + void __iomem *virt_addr = NULL; struct pci_epc_mem *mem = epc->mem; unsigned int page_shift = ilog2(mem->page_size); int order; @@ -141,15 +142,18 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, size = ALIGN(size, mem->page_size); order = pci_epc_mem_get_order(mem, size); + mutex_lock(&mem->lock); pageno = bitmap_find_free_region(mem->bitmap, mem->pages, order); if (pageno < 0) - return NULL; + goto ret; *phys_addr = mem->phys_base + (pageno << page_shift); virt_addr = ioremap(*phys_addr, size); if (!virt_addr) bitmap_release_region(mem->bitmap, pageno, order); +ret: + mutex_unlock(&mem->lock); return virt_addr; } EXPORT_SYMBOL_GPL(pci_epc_mem_alloc_addr); @@ -175,7 +179,9 @@ void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, pageno = (phys_addr - mem->phys_base) >> page_shift; size = ALIGN(size, mem->page_size); order = pci_epc_mem_get_order(mem, size); + mutex_lock(&mem->lock); bitmap_release_region(mem->bitmap, pageno, order); + mutex_unlock(&mem->lock); } EXPORT_SYMBOL_GPL(pci_epc_mem_free_addr); diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index f7a04e1af112..abbc74621f38 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -63,6 +63,7 @@ struct pci_epc_ops { * @bitmap: bitmap to manage the PCI address space * @pages: number of bits representing the address region * @page_size: size of each page + * @lock: mutex to protect bitmap */ struct pci_epc_mem { phys_addr_t phys_base; @@ -70,6 +71,8 @@ struct pci_epc_mem { unsigned long *bitmap; size_t page_size; int pages; + /* mutex to protect against concurrent access for memory allocation*/ + struct mutex lock; }; /** -- GitLab From e0c85c527d584a7891635a517987b0f9d7575768 Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Fri, 28 Feb 2020 12:41:51 +0800 Subject: [PATCH 0930/1278] KEYS: reaching the keys quotas correctly commit 2e356101e72ab1361821b3af024d64877d9a798d upstream. Currently, when we add a new user key, the calltrace as below: add_key() key_create_or_update() key_alloc() __key_instantiate_and_link generic_key_instantiate key_payload_reserve ...... Since commit a08bf91ce28e ("KEYS: allow reaching the keys quotas exactly"), we can reach max bytes/keys in key_alloc, but we forget to remove this limit when we reserver space for payload in key_payload_reserve. So we can only reach max keys but not max bytes when having delta between plen and type->def_datalen. Remove this limit when instantiating the key, so we can keep consistent with key_alloc. Also, fix the similar problem in keyctl_chown_key(). Fixes: 0b77f5bfb45c ("keys: make the keyring quotas controllable through /proc/sys") Fixes: a08bf91ce28e ("KEYS: allow reaching the keys quotas exactly") Cc: stable@vger.kernel.org # 5.0.x Cc: Eric Biggers Signed-off-by: Yang Xu Reviewed-by: Jarkko Sakkinen Reviewed-by: Eric Biggers Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/key.c | 2 +- security/keys/keyctl.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/security/keys/key.c b/security/keys/key.c index 17244f5f54c6..5f4cb271464a 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -383,7 +383,7 @@ int key_payload_reserve(struct key *key, size_t datalen) spin_lock(&key->user->lock); if (delta > 0 && - (key->user->qnbytes + delta >= maxbytes || + (key->user->qnbytes + delta > maxbytes || key->user->qnbytes + delta < key->user->qnbytes)) { ret = -EDQUOT; } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index ca31af186abd..e00e20204de0 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -882,8 +882,8 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) key_quota_root_maxbytes : key_quota_maxbytes; spin_lock(&newowner->lock); - if (newowner->qnkeys + 1 >= maxkeys || - newowner->qnbytes + key->quotalen >= maxbytes || + if (newowner->qnkeys + 1 > maxkeys || + newowner->qnbytes + key->quotalen > maxbytes || newowner->qnbytes + key->quotalen < newowner->qnbytes) goto quota_overrun; -- GitLab From 1bc389539138520a765c931e35cc8bfe920f1221 Mon Sep 17 00:00:00 2001 From: Sungbo Eo Date: Sat, 21 Mar 2020 22:38:42 +0900 Subject: [PATCH 0931/1278] irqchip/versatile-fpga: Apply clear-mask earlier commit 6a214a28132f19ace3d835a6d8f6422ec80ad200 upstream. Clear its own IRQs before the parent IRQ get enabled, so that the remaining IRQs do not accidentally interrupt the parent IRQ controller. This patch also fixes a reboot bug on OX820 SoC, where the remaining rps-timer IRQ raises a GIC interrupt that is left pending. After that, the rps-timer IRQ is cleared during driver initialization, and there's no IRQ left in rps-irq when local_irq_enable() is called, which evokes an error message "unexpected IRQ trap". Fixes: bdd272cbb97a ("irqchip: versatile FPGA: support cascaded interrupts from DT") Signed-off-by: Sungbo Eo Signed-off-by: Marc Zyngier Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200321133842.2408823-1-mans0n@gorani.run Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-versatile-fpga.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 70e2cfff8175..f1386733d3bc 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -212,6 +212,9 @@ int __init fpga_irq_of_init(struct device_node *node, if (of_property_read_u32(node, "valid-mask", &valid_mask)) valid_mask = 0; + writel(clear_mask, base + IRQ_ENABLE_CLEAR); + writel(clear_mask, base + FIQ_ENABLE_CLEAR); + /* Some chips are cascaded from a parent IRQ */ parent_irq = irq_of_parse_and_map(node, 0); if (!parent_irq) { @@ -221,9 +224,6 @@ int __init fpga_irq_of_init(struct device_node *node, fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node); - writel(clear_mask, base + IRQ_ENABLE_CLEAR); - writel(clear_mask, base + FIQ_ENABLE_CLEAR); - /* * On Versatile AB/PB, some secondary interrupts have a direct * pass-thru to the primary controller for IRQs 20 and 22-31 which need -- GitLab From 10d0da30ec935de07431c87aaac07a052934ca3f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Jan 2019 14:18:42 -0600 Subject: [PATCH 0932/1278] MIPS: OCTEON: irq: Fix potential NULL pointer dereference commit 792a402c2840054533ef56279c212ef6da87d811 upstream. There is a potential NULL pointer dereference in case kzalloc() fails and returns NULL. Fix this by adding a NULL check on *cd* This bug was detected with the help of Coccinelle. Fixes: 64b139f97c01 ("MIPS: OCTEON: irq: add CIB and other fixes") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/octeon-irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index b3aec101a65d..a27b3d70393f 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2199,6 +2199,9 @@ static int octeon_irq_cib_map(struct irq_domain *d, } cd = kzalloc(sizeof(*cd), GFP_KERNEL); + if (!cd) + return -ENOMEM; + cd->host_data = host_data; cd->bit = hw; -- GitLab From 8003b03b1e0511fa915842c76050e7e2463fe1f6 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Sat, 29 Feb 2020 17:13:47 +0100 Subject: [PATCH 0933/1278] ath9k: Handle txpower changes even when TPC is disabled commit 968ae2caad0782db5dbbabb560d3cdefd2945d38 upstream. When TPC is disabled IEEE80211_CONF_CHANGE_POWER event can be handled to reconfigure HW's maximum txpower. This fixes 0dBm txpower setting when user attaches to an interface for the first time with the following scenario: ieee80211_do_open() ath9k_add_interface() ath9k_set_txpower() /* Set TX power with not yet initialized sc->hw->conf.power_level */ ieee80211_hw_config() /* Iniatilize sc->hw->conf.power_level and raise IEEE80211_CONF_CHANGE_POWER */ ath9k_config() /* IEEE80211_CONF_CHANGE_POWER is ignored */ This issue can be reproduced with the following: $ modprobe -r ath9k $ modprobe ath9k $ wpa_supplicant -i wlan0 -c /tmp/wpa.conf & $ iw dev /* Here TX power is either 0 or 3 depending on RF chain */ $ killall wpa_supplicant $ iw dev /* TX power goes back to calibrated value and subsequent calls will be fine */ Fixes: 283dd11994cde ("ath9k: add per-vif TX power capability") Cc: stable@vger.kernel.org Signed-off-by: Remi Pommarel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 72ad84fde5c1..8e084670c3c2 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1456,6 +1456,9 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) ath_chanctx_set_channel(sc, ctx, &hw->conf.chandef); } + if (changed & IEEE80211_CONF_CHANGE_POWER) + ath9k_set_txpower(sc, NULL); + mutex_unlock(&sc->mutex); ath9k_ps_restore(sc); -- GitLab From 28c63ef17d620f0e95458c56c5d839ea3de3e500 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Mar 2020 19:01:04 -0500 Subject: [PATCH 0934/1278] signal: Extend exec_id to 64bits commit d1e7fd6462ca9fc76650fbe6ca800e35b24267da upstream. Replace the 32bit exec_id with a 64bit exec_id to make it impossible to wrap the exec_id counter. With care an attacker can cause exec_id wrap and send arbitrary signals to a newly exec'd parent. This bypasses the signal sending checks if the parent changes their credentials during exec. The severity of this problem can been seen that in my limited testing of a 32bit exec_id it can take as little as 19s to exec 65536 times. Which means that it can take as little as 14 days to wrap a 32bit exec_id. Adam Zabrocki has succeeded wrapping the self_exe_id in 7 days. Even my slower timing is in the uptime of a typical server. Which means self_exec_id is simply a speed bump today, and if exec gets noticably faster self_exec_id won't even be a speed bump. Extending self_exec_id to 64bits introduces a problem on 32bit architectures where reading self_exec_id is no longer atomic and can take two read instructions. Which means that is is possible to hit a window where the read value of exec_id does not match the written value. So with very lucky timing after this change this still remains expoiltable. I have updated the update of exec_id on exec to use WRITE_ONCE and the read of exec_id in do_notify_parent to use READ_ONCE to make it clear that there is no locking between these two locations. Link: https://lore.kernel.org/kernel-hardening/20200324215049.GA3710@pi3.com.pl Fixes: 2.3.23pre2 Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- include/linux/sched.h | 4 ++-- kernel/signal.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 7def97f6aac2..f687e7d59beb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1373,7 +1373,7 @@ void setup_new_exec(struct linux_binprm * bprm) /* An exec changes our domain. We are no longer part of the thread group */ - current->self_exec_id++; + WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1); flush_signal_handlers(current, 0); } EXPORT_SYMBOL(setup_new_exec); diff --git a/include/linux/sched.h b/include/linux/sched.h index b06577652643..99650f05c271 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -839,8 +839,8 @@ struct task_struct { struct seccomp seccomp; /* Thread group tracking: */ - u32 parent_exec_id; - u32 self_exec_id; + u64 parent_exec_id; + u64 self_exec_id; /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock; diff --git a/kernel/signal.c b/kernel/signal.c index c066168f8854..deb36b35c30b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1675,7 +1675,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) * This is only possible if parent == real_parent. * Check if it has changed security domain. */ - if (tsk->parent_exec_id != tsk->parent->self_exec_id) + if (tsk->parent_exec_id != READ_ONCE(tsk->parent->self_exec_id)) sig = SIGCHLD; } -- GitLab From ddcc66f15195b2b96a54599ce390b3a1c26041e6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 22:36:37 +0100 Subject: [PATCH 0935/1278] x86/entry/32: Add missing ASM_CLAC to general_protection entry commit 3d51507f29f2153a658df4a0674ec5b592b62085 upstream. All exception entry points must have ASM_CLAC right at the beginning. The general_protection entry is missing one. Fixes: e59d1b0a2419 ("x86-32, smap: Add STAC/CLAC instructions to 32-bit kernel entry") Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Alexandre Chartre Reviewed-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200225220216.219537887@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 49adabd94f88..c19974a49378 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1057,6 +1057,7 @@ ENTRY(int3) END(int3) ENTRY(general_protection) + ASM_CLAC pushl $do_general_protection jmp common_exception END(general_protection) -- GitLab From 803e9e93b079f6ac40485c27a4179e26acd7dc75 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 Mar 2020 22:27:35 -0800 Subject: [PATCH 0936/1278] KVM: nVMX: Properly handle userspace interrupt window request commit a1c77abb8d93381e25a8d2df3a917388244ba776 upstream. Return true for vmx_interrupt_allowed() if the vCPU is in L2 and L1 has external interrupt exiting enabled. IRQs are never blocked in hardware if the CPU is in the guest (L2 from L1's perspective) when IRQs trigger VM-Exit. The new check percolates up to kvm_vcpu_ready_for_interrupt_injection() and thus vcpu_run(), and so KVM will exit to userspace if userspace has requested an interrupt window (to inject an IRQ into L1). Remove the @external_intr param from vmx_check_nested_events(), which is actually an indicator that userspace wants an interrupt window, e.g. it's named @req_int_win further up the stack. Injecting a VM-Exit into L1 to try and bounce out to L0 userspace is all kinds of broken and is no longer necessary. Remove the hack in nested_vmx_vmexit() that attempted to workaround the breakage in vmx_check_nested_events() by only filling interrupt info if there's an actual interrupt pending. The hack actually made things worse because it caused KVM to _never_ fill interrupt info when the LAPIC resides in userspace (kvm_cpu_has_interrupt() queries interrupt.injected, which is always cleared by prepare_vmcs12() before reaching the hack in nested_vmx_vmexit()). Fixes: 6550c4df7e50 ("KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit"") Cc: stable@vger.kernel.org Cc: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx.c | 27 +++++++++++---------------- arch/x86/kvm/x86.c | 10 +++++----- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2cdf654ed132..9529fe69e1d9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1032,7 +1032,7 @@ struct kvm_x86_ops { bool (*mpx_supported)(void); bool (*xsaves_supported)(void); - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f85680b86524..8704623304d3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6198,8 +6198,13 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - return (!to_vmx(vcpu)->nested.nested_run_pending && - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + if (to_vmx(vcpu)->nested.nested_run_pending) + return false; + + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return true; + + return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } @@ -11659,7 +11664,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, } } -static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) +static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; @@ -11697,8 +11702,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && - nested_exit_on_intr(vcpu)) { + if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) { if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); @@ -12254,17 +12258,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - /* - * TODO: SDM says that with acknowledge interrupt on - * exit, bit 31 of the VM-exit interrupt information - * (valid interrupt) is always set to 1 on - * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't - * need kvm_cpu_has_interrupt(). See the commit - * message for details. - */ - if (nested_exit_intr_ack_set(vcpu) && - exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && - kvm_cpu_has_interrupt(vcpu)) { + if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); vmcs12->vm_exit_intr_info = irq | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6851636edab..6375beaf2af4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6638,7 +6638,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); } -static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) +static int inject_pending_event(struct kvm_vcpu *vcpu) { int r; @@ -6665,7 +6665,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + r = kvm_x86_ops->check_nested_events(vcpu); if (r != 0) return r; } @@ -6706,7 +6706,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) * KVM_REQ_EVENT only on certain events and not unconditionally? */ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + r = kvm_x86_ops->check_nested_events(vcpu); if (r != 0) return r; } @@ -7152,7 +7152,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } - if (inject_pending_event(vcpu, req_int_win) != 0) + if (inject_pending_event(vcpu) != 0) req_immediate_exit = true; else { /* Enable NMI/IRQ window open exits if needed. @@ -7360,7 +7360,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) - kvm_x86_ops->check_nested_events(vcpu, false); + kvm_x86_ops->check_nested_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); -- GitLab From 2b463cdfe7260474875c4c03479158451b616016 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 3 Apr 2020 17:30:46 +0200 Subject: [PATCH 0937/1278] KVM: s390: vsie: Fix region 1 ASCE sanity shadow address checks commit a1d032a49522cb5368e5dfb945a85899b4c74f65 upstream. In case we have a region 1 the following calculation (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11) results in 64. As shifts beyond the size are undefined the compiler is free to use instructions like sllg. sllg will only use 6 bits of the shift value (here 64) resulting in no shift at all. That means that ALL addresses will be rejected. The can result in endless loops, e.g. when prefix cannot get mapped. Fixes: 4be130a08420 ("s390/mm: add shadow gmap support") Tested-by: Janosch Frank Reported-by: Janosch Frank Cc: # v4.8+ Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200403153050.20569-2-david@redhat.com Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger [borntraeger@de.ibm.com: fix patch description, remove WARN_ON_ONCE] Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/gmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index ec9292917d3f..e297efa6e648 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -762,14 +762,18 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { + const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table; if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; - if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) + + if (asce_type != _ASCE_TYPE_REGION1 && + gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; + table = gmap->table; switch (gmap->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: -- GitLab From fc6da9e3901a37c53b22c4edc6a3114e324b70f0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 3 Apr 2020 17:30:47 +0200 Subject: [PATCH 0938/1278] KVM: s390: vsie: Fix delivery of addressing exceptions commit 4d4cee96fb7a3cc53702a9be8299bf525be4ee98 upstream. Whenever we get an -EFAULT, we failed to read in guest 2 physical address space. Such addressing exceptions are reported via a program intercept to the nested hypervisor. We faked the intercept, we have to return to guest 2. Instead, right now we would be returning -EFAULT from the intercept handler, eventually crashing the VM. the correct thing to do is to return 1 as rc == 1 is the internal representation of "we have to go back into g2". Addressing exceptions can only happen if the g2->g3 page tables reference invalid g2 addresses (say, either a table or the final page is not accessible - so something that basically never happens in sane environments. Identified by manual code inspection. Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Cc: # v4.8+ Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200403153050.20569-3-david@redhat.com Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger [borntraeger@de.ibm.com: fix patch description] Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/vsie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 061906f98dc5..0120383219c0 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1027,6 +1027,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->iprcc = PGM_ADDRESSING; scb_s->pgmilc = 4; scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); + rc = 1; } return rc; } -- GitLab From 57d45fd95665d2bb0cc1ac026efbee5fcc19f103 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:15 -0800 Subject: [PATCH 0939/1278] KVM: x86: Allocate new rmap and large page tracking when moving memslot commit edd4fa37baa6ee8e44dc65523b27bd6fe44c94de upstream. Reallocate a rmap array and recalcuate large page compatibility when moving an existing memslot to correctly handle the alignment properties of the new memslot. The number of rmap entries required at each level is dependent on the alignment of the memslot's base gfn with respect to that level, e.g. moving a large-page aligned memslot so that it becomes unaligned will increase the number of rmap entries needed at the now unaligned level. Not updating the rmap array is the most obvious bug, as KVM accesses garbage data beyond the end of the rmap. KVM interprets the bad data as pointers, leading to non-canonical #GPs, unexpected #PFs, etc... general protection fault: 0000 [#1] SMP CPU: 0 PID: 1909 Comm: move_memory_reg Not tainted 5.4.0-rc7+ #139 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:rmap_get_first+0x37/0x50 [kvm] Code: <48> 8b 3b 48 85 ff 74 ec e8 6c f4 ff ff 85 c0 74 e3 48 89 d8 5b c3 RSP: 0018:ffffc9000021bbc8 EFLAGS: 00010246 RAX: ffff00617461642e RBX: ffff00617461642e RCX: 0000000000000012 RDX: ffff88827400f568 RSI: ffffc9000021bbe0 RDI: ffff88827400f570 RBP: 0010000000000000 R08: ffffc9000021bd00 R09: ffffc9000021bda8 R10: ffffc9000021bc48 R11: 0000000000000000 R12: 0030000000000000 R13: 0000000000000000 R14: ffff88827427d700 R15: ffffc9000021bce8 FS: 00007f7eda014700(0000) GS:ffff888277a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7ed9216ff8 CR3: 0000000274391003 CR4: 0000000000162eb0 Call Trace: kvm_mmu_slot_set_dirty+0xa1/0x150 [kvm] __kvm_set_memory_region.part.64+0x559/0x960 [kvm] kvm_set_memory_region+0x45/0x60 [kvm] kvm_vm_ioctl+0x30f/0x920 [kvm] do_vfs_ioctl+0xa1/0x620 ksys_ioctl+0x66/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4c/0x170 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f7ed9911f47 Code: <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 21 6f 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc00937498 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000001ab0010 RCX: 00007f7ed9911f47 RDX: 0000000001ab1350 RSI: 000000004020ae46 RDI: 0000000000000004 RBP: 000000000000000a R08: 0000000000000000 R09: 00007f7ed9214700 R10: 00007f7ed92149d0 R11: 0000000000000246 R12: 00000000bffff000 R13: 0000000000000003 R14: 00007f7ed9215000 R15: 0000000000000000 Modules linked in: kvm_intel kvm irqbypass ---[ end trace 0c5f570b3358ca89 ]--- The disallow_lpage tracking is more subtle. Failure to update results in KVM creating large pages when it shouldn't, either due to stale data or again due to indexing beyond the end of the metadata arrays, which can lead to memory corruption and/or leaking data to guest/userspace. Note, the arrays for the old memslot are freed by the unconditional call to kvm_free_memslot() in __kvm_set_memory_region(). Fixes: 05da45583de9b ("KVM: MMU: large page support") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Peter Xu Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6375beaf2af4..5f44827e4962 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8584,6 +8584,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, { int i; + /* + * Clear out the previous array pointers for the KVM_MR_MOVE case. The + * old arrays will be freed by __kvm_set_memory_region() if installing + * the new memslot is successful. + */ + memset(&slot->arch, 0, sizeof(slot->arch)); + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { struct kvm_lpage_info *linfo; unsigned long ugfn; @@ -8657,6 +8664,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { + if (change == KVM_MR_MOVE) + return kvm_arch_create_memslot(kvm, memslot, + mem->memory_size >> PAGE_SHIFT); + return 0; } -- GitLab From 7ce9bf3a75b7365120432dd48fa141b20104877f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 21 Mar 2020 12:37:49 -0700 Subject: [PATCH 0940/1278] KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support commit 31603d4fc2bb4f0815245d496cb970b27b4f636a upstream. VMCLEAR all in-use VMCSes during a crash, even if kdump's NMI shootdown interrupted a KVM update of the percpu in-use VMCS list. Because NMIs are not blocked by disabling IRQs, it's possible that crash_vmclear_local_loaded_vmcss() could be called while the percpu list of VMCSes is being modified, e.g. in the middle of list_add() in vmx_vcpu_load_vmcs(). This potential corner case was called out in the original commit[*], but the analysis of its impact was wrong. Skipping the VMCLEARs is wrong because it all but guarantees that a loaded, and therefore cached, VMCS will live across kexec and corrupt memory in the new kernel. Corruption will occur because the CPU's VMCS cache is non-coherent, i.e. not snooped, and so the writeback of VMCS memory on its eviction will overwrite random memory in the new kernel. The VMCS will live because the NMI shootdown also disables VMX, i.e. the in-progress VMCLEAR will #UD, and existing Intel CPUs do not flush the VMCS cache on VMXOFF. Furthermore, interrupting list_add() and list_del() is safe due to crash_vmclear_local_loaded_vmcss() using forward iteration. list_add() ensures the new entry is not visible to forward iteration unless the entire add completes, via WRITE_ONCE(prev->next, new). A bad "prev" pointer could be observed if the NMI shootdown interrupted list_del() or list_add(), but list_for_each_entry() does not consume ->prev. In addition to removing the temporary disabling of VMCLEAR, open code loaded_vmcs_init() in __loaded_vmcs_clear() and reorder VMCLEAR so that the VMCS is deleted from the list only after it's been VMCLEAR'd. Deleting the VMCS before VMCLEAR would allow a race where the NMI shootdown could arrive between list_del() and vmcs_clear() and thus neither flow would execute a successful VMCLEAR. Alternatively, more code could be moved into loaded_vmcs_init(), but that gets rather silly as the only other user, alloc_loaded_vmcs(), doesn't need the smp_wmb() and would need to work around the list_del(). Update the smp_*() comments related to the list manipulation, and opportunistically reword them to improve clarity. [*] https://patchwork.kernel.org/patch/1675731/#3720461 Fixes: 8f536b7697a0 ("KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200321193751.24985-2-sean.j.christopherson@intel.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 67 +++++++++++----------------------------------- 1 file changed, 16 insertions(+), 51 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8704623304d3..5e76ff10f7f5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1674,43 +1674,15 @@ static void vmcs_load(struct vmcs *vmcs) } #ifdef CONFIG_KEXEC_CORE -/* - * This bitmap is used to indicate whether the vmclear - * operation is enabled on all cpus. All disabled by - * default. - */ -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; - -static inline void crash_enable_local_vmclear(int cpu) -{ - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline void crash_disable_local_vmclear(int cpu) -{ - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline int crash_local_vmclear_enabled(int cpu) -{ - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - static void crash_vmclear_local_loaded_vmcss(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; - if (!crash_local_vmclear_enabled(cpu)) - return; - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), loaded_vmcss_on_cpu_link) vmcs_clear(v->vmcs); } -#else -static inline void crash_enable_local_vmclear(int cpu) { } -static inline void crash_disable_local_vmclear(int cpu) { } #endif /* CONFIG_KEXEC_CORE */ static void __loaded_vmcs_clear(void *arg) @@ -1722,19 +1694,24 @@ static void __loaded_vmcs_clear(void *arg) return; /* vcpu migration can race with cpu offline */ if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) per_cpu(current_vmcs, cpu) = NULL; - crash_disable_local_vmclear(cpu); + + vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); + list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); /* - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link - * is before setting loaded_vmcs->vcpu to -1 which is done in - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist - * then adds the vmcs into percpu list before it is deleted. + * Ensure all writes to loaded_vmcs, including deleting it from its + * current percpu list, complete before setting loaded_vmcs->vcpu to + * -1, otherwise a different cpu can see vcpu == -1 first and add + * loaded_vmcs to its percpu list before it's deleted from this cpu's + * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). */ smp_wmb(); - loaded_vmcs_init(loaded_vmcs); - crash_enable_local_vmclear(cpu); + loaded_vmcs->cpu = -1; + loaded_vmcs->launched = 0; } static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) @@ -2497,18 +2474,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!already_loaded) { loaded_vmcs_clear(vmx->loaded_vmcs); local_irq_disable(); - crash_disable_local_vmclear(cpu); /* - * Read loaded_vmcs->cpu should be before fetching - * loaded_vmcs->loaded_vmcss_on_cpu_link. - * See the comments in __loaded_vmcs_clear(). + * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to + * this cpu's percpu list, otherwise it may not yet be deleted + * from its previous cpu's percpu list. Pairs with the + * smb_wmb() in __loaded_vmcs_clear(). */ smp_rmb(); list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, &per_cpu(loaded_vmcss_on_cpu, cpu)); - crash_enable_local_vmclear(cpu); local_irq_enable(); } @@ -3804,17 +3780,6 @@ static int hardware_enable(void) INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - /* - * Now we can enable the vmclear operation in kdump - * since the loaded_vmcss_on_cpu list on this cpu - * has been initialized. - * - * Though the cpu is not in VMX operation now, there - * is no problem to enable the vmclear operation - * for the loaded_vmcss_on_cpu list is empty! - */ - crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); test_bits = FEATURE_CONTROL_LOCKED; -- GitLab From 2c0bf2560ca18abe1451db23b8ef8700c67cc44f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 1 Apr 2020 10:13:48 +0200 Subject: [PATCH 0941/1278] KVM: VMX: fix crash cleanup when KVM wasn't used commit dbef2808af6c594922fe32833b30f55f35e9da6d upstream. If KVM wasn't used at all before we crash the cleanup procedure fails with BUG: unable to handle page fault for address: ffffffffffffffc8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 23215067 P4D 23215067 PUD 23217067 PMD 0 Oops: 0000 [#8] SMP PTI CPU: 0 PID: 3542 Comm: bash Kdump: loaded Tainted: G D 5.6.0-rc2+ #823 RIP: 0010:crash_vmclear_local_loaded_vmcss.cold+0x19/0x51 [kvm_intel] The root cause is that loaded_vmcss_on_cpu list is not yet initialized, we initialize it in hardware_enable() but this only happens when we start a VM. Previously, we used to have a bitmap with enabled CPUs and that was preventing [masking] the issue. Initialized loaded_vmcss_on_cpu list earlier, right before we assign crash_vmclear_loaded_vmcss pointer. blocked_vcpu_on_cpu list and blocked_vcpu_on_cpu_lock are moved altogether for consistency. Fixes: 31603d4fc2bb ("KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support") Signed-off-by: Vitaly Kuznetsov Message-Id: <20200401081348.1345307-1-vkuznets@redhat.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5e76ff10f7f5..f8e3f3c48283 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3776,10 +3776,6 @@ static int hardware_enable(void) if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); test_bits = FEATURE_CONTROL_LOCKED; @@ -12900,7 +12896,7 @@ module_exit(vmx_exit) static int __init vmx_init(void) { - int r; + int r, cpu; r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); @@ -12922,6 +12918,12 @@ static int __init vmx_init(void) } } + for_each_possible_cpu(cpu) { + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + } + #ifdef CONFIG_KEXEC_CORE rcu_assign_pointer(crash_vmclear_loaded_vmcss, crash_vmclear_local_loaded_vmcss); -- GitLab From 620b7966d4b8674e55c42342f731e5c17ec069a0 Mon Sep 17 00:00:00 2001 From: Yilu Lin Date: Wed, 18 Mar 2020 11:59:19 +0800 Subject: [PATCH 0942/1278] CIFS: Fix bug which the return value by asynchronous read is error commit 97adda8b3ab703de8e4c8d27646ddd54fe22879c upstream. This patch is used to fix the bug in collect_uncached_read_data() that rc is automatically converted from a signed number to an unsigned number when the CIFS asynchronous read fails. It will cause ctx->rc is error. Example: Share a directory and create a file on the Windows OS. Mount the directory to the Linux OS using CIFS. On the CIFS client of the Linux OS, invoke the pread interface to deliver the read request. The size of the read length plus offset of the read request is greater than the maximum file size. In this case, the CIFS server on the Windows OS returns a failure message (for example, the return value of smb2.nt_status is STATUS_INVALID_PARAMETER). After receiving the response message, the CIFS client parses smb2.nt_status to STATUS_INVALID_PARAMETER and converts it to the Linux error code (rdata->result=-22). Then the CIFS client invokes the collect_uncached_read_data function to assign the value of rdata->result to rc, that is, rc=rdata->result=-22. The type of the ctx->total_len variable is unsigned integer, the type of the rc variable is integer, and the type of the ctx->rc variable is ssize_t. Therefore, during the ternary operation, the value of rc is automatically converted to an unsigned number. The final result is ctx->rc=4294967274. However, the expected result is ctx->rc=-22. Signed-off-by: Yilu Lin Signed-off-by: Steve French CC: Stable Acked-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5e75c5f77f4c..662977b8d6ae 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3303,7 +3303,7 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx) if (rc == -ENODATA) rc = 0; - ctx->rc = (rc == 0) ? ctx->total_len : rc; + ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; mutex_unlock(&ctx->aio_mutex); -- GitLab From 25f78b3ce3d5c2c0d08fe62187971f2036fb550e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 4 Mar 2020 11:18:23 -0500 Subject: [PATCH 0943/1278] btrfs: drop block from cache on error in relocation commit 8e19c9732ad1d127b5575a10f4fbcacf740500ff upstream. If we have an error while building the backref tree in relocation we'll process all the pending edges and then free the node. However if we integrated some edges into the cache we'll lose our link to those edges by simply freeing this node, which means we'll leak memory and references to any roots that we've found. Instead we need to use remove_backref_node(), which walks through all of the edges that are still linked to this node and free's them up and drops any root references we may be holding. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 04db7c5f5ca4..f5f3408b32ea 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1194,7 +1194,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, free_backref_node(cache, lower); } - free_backref_node(cache, node); + remove_backref_node(cache, node); return ERR_PTR(err); } ASSERT(!node || !node->detached); -- GitLab From 1c7d62cf043816d0a1884e6961aa0b84ac067154 Mon Sep 17 00:00:00 2001 From: Rosioru Dragos Date: Tue, 25 Feb 2020 17:05:52 +0200 Subject: [PATCH 0944/1278] crypto: mxs-dcp - fix scatterlist linearization for hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fa03481b6e2e82355c46644147b614f18c7a8161 upstream. The incorrect traversal of the scatterlist, during the linearization phase lead to computing the hash value of the wrong input buffer. New implementation uses scatterwalk_map_and_copy() to address this issue. Cc: Fixes: 15b59e7c3733 ("crypto: mxs - Add Freescale MXS DCP driver") Signed-off-by: Rosioru Dragos Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/mxs-dcp.c | 58 +++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index e1e1e8110790..5a4b8aee22a8 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -25,6 +25,7 @@ #include #include #include +#include #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -621,49 +622,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; uint8_t *out_buf = sdcp->coh->sha_out_buf; - uint8_t *src_buf; - struct scatterlist *src; - unsigned int i, len, clen; + unsigned int i, len, clen, oft = 0; int ret; int fin = rctx->fini; if (fin) rctx->fini = 0; - for_each_sg(req->src, src, nents, i) { - src_buf = sg_virt(src); - len = sg_dma_len(src); - - do { - if (actx->fill + len > DCP_BUF_SZ) - clen = DCP_BUF_SZ - actx->fill; - else - clen = len; - - memcpy(in_buf + actx->fill, src_buf, clen); - len -= clen; - src_buf += clen; - actx->fill += clen; + src = req->src; + len = req->nbytes; - /* - * If we filled the buffer and still have some - * more data, submit the buffer. - */ - if (len && actx->fill == DCP_BUF_SZ) { - ret = mxs_dcp_run_sha(req); - if (ret) - return ret; - actx->fill = 0; - rctx->init = 0; - } - } while (len); + while (len) { + if (actx->fill + len > DCP_BUF_SZ) + clen = DCP_BUF_SZ - actx->fill; + else + clen = len; + + scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen, + 0); + + len -= clen; + oft += clen; + actx->fill += clen; + + /* + * If we filled the buffer and still have some + * more data, submit the buffer. + */ + if (len && actx->fill == DCP_BUF_SZ) { + ret = mxs_dcp_run_sha(req); + if (ret) + return ret; + actx->fill = 0; + rctx->init = 0; + } } if (fin) { -- GitLab From 731a69866a37a13de543028f05ce65e3d4861522 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 13 Apr 2019 10:04:49 +0200 Subject: [PATCH 0945/1278] ALSA: hda: Initialize power_state field properly commit 183ab39eb0ea9879bb68422a83e65f750f3192f0 upstream. The recent commit 98081ca62cba ("ALSA: hda - Record the current power state before suspend/resume calls") made the HD-audio driver to store the PM state in power_state field. This forgot, however, the initialization at power up. Although the codec drivers usually don't need to refer to this field in the normal operation, let's initialize it properly for consistency. Fixes: 98081ca62cba ("ALSA: hda - Record the current power state before suspend/resume calls") Signed-off-by: Takashi Iwai Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a6f7561e7bb9..7d65fe31c825 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -942,6 +942,7 @@ int snd_hda_codec_new(struct hda_bus *bus, struct snd_card *card, /* power-up all before initialization */ hda_set_power_state(codec, AC_PWRST_D0); + codec->core.dev.power.power_state = PMSG_ON; snd_hda_codec_proc_new(codec); -- GitLab From 85503bc92e7e1b2dbbe5e5a9929fba25d7abf3ec Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 12 Sep 2018 13:21:48 -0700 Subject: [PATCH 0946/1278] net: rtnl_configure_link: fix dev flags changes arg to __dev_notify_flags commit 56a49d7048703f5ffdb84d3a0ee034108fba6850 upstream. This fix addresses https://bugzilla.kernel.org/show_bug.cgi?id=201071 Commit 5025f7f7d506 wrongly relied on __dev_change_flags to notify users of dev flag changes in the case when dev->rtnl_link_state = RTNL_LINK_INITIALIZED. Fix it by indicating flag changes explicitly to __dev_notify_flags. Fixes: 5025f7f7d506 ("rtnetlink: add rtnl_link_state check in rtnl_configure_link") Reported-By: Liam mcbirnie Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7c479c1ffd77..cb15338cfda4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2424,7 +2424,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { - __dev_notify_flags(dev, old_flags, 0U); + __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags)); } else { dev->rtnl_link_state = RTNL_LINK_INITIALIZED; __dev_notify_flags(dev, old_flags, ~0U); -- GitLab From b97e6f384e817d4717e68526b7759e6aabd7372e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 18 Feb 2019 12:56:44 +0000 Subject: [PATCH 0947/1278] powerpc/pseries: Drop pointless static qualifier in vpa_debugfs_init() commit 11dd34f3eae5a468013bb161a1dcf1fecd2ca321 upstream. There is no need to have the 'struct dentry *vpa_dir' variable static since new value always be assigned before use it. Fixes: c6c26fb55e8e ("powerpc/pseries: Export raw per-CPU VPA data via debugfs") Signed-off-by: YueHaibing Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190218125644.87448-1-yuehaibing@huawei.com Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/lpar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index c0ae3847b8db..215b14a373cb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1060,7 +1060,7 @@ static int __init vpa_debugfs_init(void) { char name[16]; long i; - static struct dentry *vpa_dir; + struct dentry *vpa_dir; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; -- GitLab From 1733d2a94f6414ba905d91ff14322093fda7c398 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 17 Jan 2019 02:10:59 -0800 Subject: [PATCH 0948/1278] x86/speculation: Remove redundant arch_smt_update() invocation commit 34d66caf251df91ff27b24a3a786810d29989eca upstream. With commit a74cfffb03b7 ("x86/speculation: Rework SMT state change"), arch_smt_update() is invoked from each individual CPU hotplug function. Therefore the extra arch_smt_update() call in the sysfs SMT control is redundant. Fixes: a74cfffb03b7 ("x86/speculation: Rework SMT state change") Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: Cc: Cc: Link: https://lkml.kernel.org/r/e2e064f2-e8ef-42ca-bf4f-76b612964752@default Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 96c0a868232e..d8c77bfb6e7e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2089,10 +2089,8 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) { + if (!ret) cpu_smt_control = ctrlval; - arch_smt_update(); - } cpu_maps_update_done(); return ret; } @@ -2103,7 +2101,6 @@ int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; - arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) -- GitLab From 0a4513400796d1ae0562c3f3783428e74c684c8b Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Wed, 25 Mar 2020 12:31:54 +0200 Subject: [PATCH 0949/1278] tools: gpio: Fix out-of-tree build regression commit 82f04bfe2aff428b063eefd234679b2d693228ed upstream. Commit 0161a94e2d1c7 ("tools: gpio: Correctly add make dependencies for gpio_utils") added a make rule for gpio-utils-in.o but used $(output) instead of the correct $(OUTPUT) for the output directory, breaking out-of-tree build (O=xx) with the following error: No rule to make target 'out/tools/gpio/gpio-utils-in.o', needed by 'out/tools/gpio/lsgpio-in.o'. Stop. Fix that. Fixes: 0161a94e2d1c ("tools: gpio: Correctly add make dependencies for gpio_utils") Cc: Cc: Laura Abbott Signed-off-by: Anssi Hannula Link: https://lore.kernel.org/r/20200325103154.32235-1-anssi.hannula@bitwise.fi Reviewed-by: Bartosz Golaszewski Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- tools/gpio/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 6a73c06e069c..3dbf7e8b07a5 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -35,7 +35,7 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h prepare: $(OUTPUT)include/linux/gpio.h -GPIO_UTILS_IN := $(output)gpio-utils-in.o +GPIO_UTILS_IN := $(OUTPUT)gpio-utils-in.o $(GPIO_UTILS_IN): prepare FORCE $(Q)$(MAKE) $(build)=gpio-utils -- GitLab From f5808d6a6aef27f0d21f632ff1b4c0963a5fedfb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 15 Feb 2019 14:44:12 -0800 Subject: [PATCH 0950/1278] mm: Use fixed constant in page_frag_alloc instead of size + 1 commit 8644772637deb121f7ac2df690cbf83fa63d3b70 upstream. This patch replaces the size + 1 value introduced with the recent fix for 1 byte allocs with a constant value. The idea here is to reduce code overhead as the previous logic would have to read size into a register, then increment it, and write it back to whatever field was being used. By using a constant we can avoid those memory reads and arithmetic operations in favor of just encoding the maximum value into the operation itself. Fixes: 2c2ade81741c ("mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6f71518a4558..08af4e3de6fb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4325,11 +4325,11 @@ void *page_frag_alloc(struct page_frag_cache *nc, /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size); + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size + 1; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->offset = size; } @@ -4345,10 +4345,10 @@ void *page_frag_alloc(struct page_frag_cache *nc, size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size + 1); + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size + 1; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = size - fragsz; } -- GitLab From 7db01bc082e77c8c40bf8af6afbc21e4e8356919 Mon Sep 17 00:00:00 2001 From: "Shetty, Harshini X (EXT-Sony Mobile)" Date: Tue, 17 Mar 2020 09:15:45 +0000 Subject: [PATCH 0951/1278] dm verity fec: fix memory leak in verity_fec_dtr commit 75fa601934fda23d2f15bf44b09c2401942d8e15 upstream. Fix below kmemleak detected in verity_fec_ctr. output_pool is allocated for each dm-verity-fec device. But it is not freed when dm-table for the verity target is removed. Hence free the output mempool in destructor function verity_fec_dtr. unreferenced object 0xffffffffa574d000 (size 4096): comm "init", pid 1667, jiffies 4294894890 (age 307.168s) hex dump (first 32 bytes): 8e 36 00 98 66 a8 0b 9b 00 00 00 00 00 00 00 00 .6..f........... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000060e82407>] __kmalloc+0x2b4/0x340 [<00000000dd99488f>] mempool_kmalloc+0x18/0x20 [<000000002560172b>] mempool_init_node+0x98/0x118 [<000000006c3574d2>] mempool_init+0x14/0x20 [<0000000008cb266e>] verity_fec_ctr+0x388/0x3b0 [<000000000887261b>] verity_ctr+0x87c/0x8d0 [<000000002b1e1c62>] dm_table_add_target+0x174/0x348 [<000000002ad89eda>] table_load+0xe4/0x328 [<000000001f06f5e9>] dm_ctl_ioctl+0x3b4/0x5a0 [<00000000bee5fbb7>] do_vfs_ioctl+0x5dc/0x928 [<00000000b475b8f5>] __arm64_sys_ioctl+0x70/0x98 [<000000005361e2e8>] el0_svc_common+0xa0/0x158 [<000000001374818f>] el0_svc_handler+0x6c/0x88 [<000000003364e9f4>] el0_svc+0x8/0xc [<000000009d84cec9>] 0xffffffffffffffff Fixes: a739ff3f543af ("dm verity: add support for forward error correction") Depends-on: 6f1c819c219f7 ("dm: convert to bioset_init()/mempool_init()") Cc: stable@vger.kernel.org Signed-off-by: Harshini Shetty Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index e13f90832b6b..285148100cde 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -552,6 +552,7 @@ void verity_fec_dtr(struct dm_verity *v) mempool_destroy(f->rs_pool); mempool_destroy(f->prealloc_pool); mempool_destroy(f->extra_pool); + mempool_destroy(f->output_pool); kmem_cache_destroy(f->cache); if (f->data_bufio) -- GitLab From abf346bbe93974abeddff7a42e7dcff4ef6aa75d Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 12 Mar 2020 18:44:56 +0100 Subject: [PATCH 0952/1278] scsi: zfcp: fix missing erp_lock in port recovery trigger for point-to-point commit 819732be9fea728623e1ed84eba28def7384ad1f upstream. v2.6.27 commit cc8c282963bd ("[SCSI] zfcp: Automatically attach remote ports") introduced zfcp automatic port scan. Before that, the user had to use the sysfs attribute "port_add" of an FCP device (adapter) to add and open remote (target) ports, even for the remote peer port in point-to-point topology. That code path did a proper port open recovery trigger taking the erp_lock. Since above commit, a new helper function zfcp_erp_open_ptp_port() performed an UNlocked port open recovery trigger. This can race with other parallel recovery triggers. In zfcp_erp_action_enqueue() this could corrupt e.g. adapter->erp_total_count or adapter->erp_ready_head. As already found for fabric topology in v4.17 commit fa89adba1941 ("scsi: zfcp: fix infinite iteration on ERP ready list"), there was an endless loop during tracing of rport (un)block. A subsequent v4.18 commit 9e156c54ace3 ("scsi: zfcp: assert that the ERP lock is held when tracing a recovery trigger") introduced a lockdep assertion for that case. As a side effect, that lockdep assertion now uncovered the unlocked code path for PtP. It is from within an adapter ERP action: zfcp_erp_strategy[1479] intentionally DROPs erp lock around zfcp_erp_strategy_do_action() zfcp_erp_strategy_do_action[1441] NO erp lock zfcp_erp_adapter_strategy[876] NO erp lock zfcp_erp_adapter_strategy_open[855] NO erp lock zfcp_erp_adapter_strategy_open_fsf[806]NO erp lock zfcp_erp_adapter_strat_fsf_xconf[772] erp lock only around zfcp_erp_action_to_running(), BUT *_not_* around zfcp_erp_enqueue_ptp_port() zfcp_erp_enqueue_ptp_port[728] BUG: *_not_* taking erp lock _zfcp_erp_port_reopen[432] assumes to be called with erp lock zfcp_erp_action_enqueue[314] assumes to be called with erp lock zfcp_dbf_rec_trig[288] _checks_ to be called with erp lock: lockdep_assert_held(&adapter->erp_lock); It causes the following lockdep warning: WARNING: CPU: 2 PID: 775 at drivers/s390/scsi/zfcp_dbf.c:288 zfcp_dbf_rec_trig+0x16a/0x188 no locks held by zfcperp0.0.17c0/775. Fix this by using the proper locked recovery trigger helper function. Link: https://lore.kernel.org/r/20200312174505.51294-2-maier@linux.ibm.com Fixes: cc8c282963bd ("[SCSI] zfcp: Automatically attach remote ports") Cc: #v2.6.27+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_erp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 8f90e4cea254..168f7c84edba 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -747,7 +747,7 @@ static void zfcp_erp_enqueue_ptp_port(struct zfcp_adapter *adapter) adapter->peer_d_id); if (IS_ERR(port)) /* error or port already attached */ return; - _zfcp_erp_port_reopen(port, 0, "ereptp1"); + zfcp_erp_port_reopen(port, 0, "ereptp1"); } static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action) -- GitLab From fb6acfefa6f0585f0082d63f858b0a73a1f92d07 Mon Sep 17 00:00:00 2001 From: Fredrik Strupe Date: Wed, 8 Apr 2020 13:29:41 +0200 Subject: [PATCH 0953/1278] arm64: armv8_deprecated: Fix undef_hook mask for thumb setend commit fc2266011accd5aeb8ebc335c381991f20e26e33 upstream. For thumb instructions, call_undef_hook() in traps.c first reads a u16, and if the u16 indicates a T32 instruction (u16 >= 0xe800), a second u16 is read, which then makes up the the lower half-word of a T32 instruction. For T16 instructions, the second u16 is not read, which makes the resulting u32 opcode always have the upper half set to 0. However, having the upper half of instr_mask in the undef_hook set to 0 masks out the upper half of all thumb instructions - both T16 and T32. This results in trapped T32 instructions with the lower half-word equal to the T16 encoding of setend (b650) being matched, even though the upper half-word is not 0000 and thus indicates a T32 opcode. An example of such a T32 instruction is eaa0b650, which should raise a SIGILL since T32 instructions with an eaa prefix are unallocated as per Arm ARM, but instead works as a SETEND because the second half-word is set to b650. This patch fixes the issue by extending instr_mask to include the upper u32 half, which will still match T16 instructions where the upper half is 0, but not T32 instructions. Fixes: 2d888f48e056 ("arm64: Emulate SETEND for AArch32 tasks") Cc: # 4.0.x- Reviewed-by: Suzuki K Poulose Signed-off-by: Fredrik Strupe Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/armv8_deprecated.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index a4dc115d7659..092046704cbc 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -607,7 +607,7 @@ static struct undef_hook setend_hooks[] = { }, { /* Thumb mode */ - .instr_mask = 0x0000fff7, + .instr_mask = 0xfffffff7, .instr_val = 0x0000b650, .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), -- GitLab From 98587165f7ad0d91cc7f76d9ef591a2cb743209e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 31 Oct 2018 17:55:02 -0700 Subject: [PATCH 0954/1278] rtc: omap: Use define directive for PIN_CONFIG_ACTIVE_HIGH commit c50156526a2f7176b50134e3e5fb108ba09791b2 upstream. Clang warns when one enumerated type is implicitly converted to another: drivers/rtc/rtc-omap.c:574:21: warning: implicit conversion from enumeration type 'enum rtc_pin_config_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] {"ti,active-high", PIN_CONFIG_ACTIVE_HIGH, 0}, ~ ^~~~~~~~~~~~~~~~~~~~~~ drivers/rtc/rtc-omap.c:579:12: warning: implicit conversion from enumeration type 'enum rtc_pin_config_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] PCONFDUMP(PIN_CONFIG_ACTIVE_HIGH, "input active high", NULL, false), ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/pinctrl/pinconf-generic.h:163:11: note: expanded from macro 'PCONFDUMP' .param = a, .display = b, .format = c, .has_arg = d \ ^ 2 warnings generated. It is expected that pinctrl drivers can extend pin_config_param because of the gap between PIN_CONFIG_END and PIN_CONFIG_MAX so this conversion isn't an issue. Most drivers that take advantage of this define the PIN_CONFIG variables as constants, rather than enumerated values. Do the same thing here so that Clang no longer warns. Link: https://github.com/ClangBuiltLinux/linux/issues/144 Signed-off-by: Nathan Chancellor Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-omap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index ae6506a8b4f5..b25a2ba5ac48 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -559,9 +559,7 @@ static const struct pinctrl_ops rtc_pinctrl_ops = { .dt_free_map = pinconf_generic_dt_free_map, }; -enum rtc_pin_config_param { - PIN_CONFIG_ACTIVE_HIGH = PIN_CONFIG_END + 1, -}; +#define PIN_CONFIG_ACTIVE_HIGH (PIN_CONFIG_END + 1) static const struct pinconf_generic_params rtc_params[] = { {"ti,active-high", PIN_CONFIG_ACTIVE_HIGH, 0}, -- GitLab From c60aee180ba11443df2630f3e23e2915e3aacc17 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Apr 2020 10:07:16 -0400 Subject: [PATCH 0955/1278] NFS: Fix a page leak in nfs_destroy_unlinked_subrequests() commit add42de31721fa29ed77a7ce388674d69f9d31a4 upstream. When we detach a subrequest from the list, we must also release the reference it holds to the parent. Fixes: 5b2b5187fa85 ("NFS: Fix nfs_page_group_destroy() and nfs_lock_and_join_requests() race cases") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 89f36040adf6..7b6bda68aa86 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -422,6 +422,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, } subreq->wb_head = subreq; + nfs_release_request(old_head); if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { nfs_release_request(subreq); -- GitLab From 0ee19b5a3ed490889259979370e89f081890cb83 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 21 Feb 2020 23:32:58 -0500 Subject: [PATCH 0956/1278] ext4: fix a data race at inode->i_blocks commit 28936b62e71e41600bab319f262ea9f9b1027629 upstream. inode->i_blocks could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in ext4_do_update_inode [ext4] / inode_add_bytes write to 0xffff9a00d4b982d0 of 8 bytes by task 22100 on cpu 118: inode_add_bytes+0x65/0xf0 __inode_add_bytes at fs/stat.c:689 (inlined by) inode_add_bytes at fs/stat.c:702 ext4_mb_new_blocks+0x418/0xca0 [ext4] ext4_ext_map_blocks+0x1a6b/0x27b0 [ext4] ext4_map_blocks+0x1a9/0x950 [ext4] _ext4_get_block+0xfc/0x270 [ext4] ext4_get_block_unwritten+0x33/0x50 [ext4] __block_write_begin_int+0x22e/0xae0 __block_write_begin+0x39/0x50 ext4_write_begin+0x388/0xb50 [ext4] ext4_da_write_begin+0x35f/0x8f0 [ext4] generic_perform_write+0x15d/0x290 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffff9a00d4b982d0 of 8 bytes by task 8 on cpu 65: ext4_do_update_inode+0x4a0/0xf60 [ext4] ext4_inode_blocks_set at fs/ext4/inode.c:4815 ext4_mark_iloc_dirty+0xaf/0x160 [ext4] ext4_mark_inode_dirty+0x129/0x3e0 [ext4] ext4_convert_unwritten_extents+0x253/0x2d0 [ext4] ext4_convert_unwritten_io_end_vec+0xc5/0x150 [ext4] ext4_end_io_rsv_work+0x22c/0x350 [ext4] process_one_work+0x54f/0xb90 worker_thread+0x80/0x5f0 kthread+0x1cd/0x1f0 ret_from_fork+0x27/0x50 4 locks held by kworker/u256:0/8: #0: ffff9a025abc4328 ((wq_completion)ext4-rsv-conversion){+.+.}, at: process_one_work+0x443/0xb90 #1: ffffab5a862dbe20 ((work_completion)(&ei->i_rsv_conversion_work)){+.+.}, at: process_one_work+0x443/0xb90 #2: ffff9a025a9d0f58 (jbd2_handle){++++}, at: start_this_handle+0x1c1/0x9d0 [jbd2] #3: ffff9a00d4b985d8 (&(&ei->i_raw_lock)->rlock){+.+.}, at: ext4_do_update_inode+0xaa/0xf60 [ext4] irq event stamp: 3009267 hardirqs last enabled at (3009267): [] __find_get_block+0x107/0x790 hardirqs last disabled at (3009266): [] __find_get_block+0x49/0x790 softirqs last enabled at (3009230): [] __do_softirq+0x34c/0x57c softirqs last disabled at (3009223): [] irq_exit+0xa2/0xc0 Reported by Kernel Concurrency Sanitizer on: CPU: 65 PID: 8 Comm: kworker/u256:0 Tainted: G L 5.6.0-rc2-next-20200221+ #7 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 Workqueue: ext4-rsv-conversion ext4_end_io_rsv_work [ext4] The plain read is outside of inode->i_lock critical section which results in a data race. Fix it by adding READ_ONCE() there. Link: https://lore.kernel.org/r/20200222043258.2279-1-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1e2edebd0929..eafe4a41eb50 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4981,7 +4981,7 @@ static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode_info *ei) { struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = inode->i_blocks; + u64 i_blocks = READ_ONCE(inode->i_blocks); struct super_block *sb = inode->i_sb; if (i_blocks <= ~0U) { -- GitLab From 8fbb1f423298090dfb58e8eb323367e1f66211a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:47 -0700 Subject: [PATCH 0957/1278] fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once() commit 26c5d78c976ca298e59a56f6101a97b618ba3539 upstream. After request_module(), nothing is stopping the module from being unloaded until someone takes a reference to it via try_get_module(). The WARN_ONCE() in get_fs_type() is thus user-reachable, via userspace running 'rmmod' concurrently. Since WARN_ONCE() is for kernel bugs only, not for user-reachable situations, downgrade this warning to pr_warn_once(). Keep it printed once only, since the intent of this warning is to detect a bug in modprobe at boot time. Printing the warning more than once wouldn't really provide any useful extra information. Fixes: 41124db869b7 ("fs: warn in case userspace lied about modprobe return") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Jessica Yu Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Cc: [4.13+] Link: http://lkml.kernel.org/r/20200312202552.241885-3-ebiggers@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/filesystems.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index f2728a4a03a1..8fb7cda40997 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -279,7 +279,9 @@ struct file_system_type *get_fs_type(const char *name) fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); - WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + if (!fs) + pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", + len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { -- GitLab From 0353bad3913e41ee766a74ff2d6159e148b357ba Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 10 Apr 2020 14:32:38 -0700 Subject: [PATCH 0958/1278] ocfs2: no need try to truncate file beyond i_size commit 783fda856e1034dee90a873f7654c418212d12d7 upstream. Linux fallocate(2) with FALLOC_FL_PUNCH_HOLE mode set, its offset can exceed the inode size. Ocfs2 now doesn't allow that offset beyond inode size. This restriction is not necessary and violates fallocate(2) semantics. If fallocate(2) offset is beyond inode size, just return success and do nothing further. Otherwise, ocfs2 will crash the kernel. kernel BUG at fs/ocfs2//alloc.c:7264! ocfs2_truncate_inline+0x20f/0x360 [ocfs2] ocfs2_remove_inode_range+0x23c/0xcb0 [ocfs2] __ocfs2_change_file_space+0x4a5/0x650 [ocfs2] ocfs2_fallocate+0x83/0xa0 [ocfs2] vfs_fallocate+0x148/0x230 SyS_fallocate+0x48/0x80 do_syscall_64+0x79/0x170 Signed-off-by: Changwei Ge Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Link: http://lkml.kernel.org/r/20200407082754.17565-1-chge@linux.alibaba.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index addd7c5f2d3e..bed54e8adcf9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7240,6 +7240,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inline_data *idata = &di->id2.i_data; + /* No need to punch hole beyond i_size. */ + if (start >= i_size_read(inode)) + return 0; + if (end > i_size_read(inode)) end = i_size_read(inode); -- GitLab From d1c7651d8b49d1e3a8ad824c2f191d9e9dffb441 Mon Sep 17 00:00:00 2001 From: Sam Lunt Date: Fri, 31 Jan 2020 12:11:23 -0600 Subject: [PATCH 0959/1278] perf tools: Support Python 3.8+ in Makefile commit b9c9ce4e598e012ca7c1813fae2f4d02395807de upstream. Python 3.8 changed the output of 'python-config --ldflags' to no longer include the '-lpythonX.Y' flag (this apparently fixed an issue loading modules with a statically linked Python executable). The libpython feature check in linux/build/feature fails if the Python library is not included in FEATURE_CHECK_LDFLAGS-libpython variable. This adds a check in the Makefile to determine if PYTHON_CONFIG accepts the '--embed' flag and passes that flag alongside '--ldflags' if so. tools/perf is the only place the libpython feature check is used. Signed-off-by: Sam Lunt Tested-by: He Zhe Link: http://lore.kernel.org/lkml/c56be2e1-8111-9dfe-8298-f7d0f9ab7431@windriver.com Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: trivial@kernel.org Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20200131181123.tmamivhq4b7uqasr@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.config | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b97e31498ff7..8baaf9797101 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -179,8 +179,17 @@ strip-libs = $(filter-out -l%,$(1)) PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +# Python 3.8 changed the output of `python-config --ldflags` to not include the +# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for +# libpython fails if that flag is not included in LDFLAGS +ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0) + PYTHON_CONFIG_LDFLAGS := --ldflags --embed +else + PYTHON_CONFIG_LDFLAGS := --ldflags +endif + ifdef PYTHON_CONFIG - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) + PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) -- GitLab From 63579acd788ec978591b345b5eaa55937116116b Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Tue, 3 Mar 2020 16:42:01 +0100 Subject: [PATCH 0960/1278] s390/diag: fix display of diagnose call statistics commit 6c7c851f1b666a8a455678a0b480b9162de86052 upstream. Show the full diag statistic table and not just parts of it. The issue surfaced in a KVM guest with a number of vcpus defined smaller than NR_DIAG_STAT. Fixes: 1ec2772e0c3c ("s390/diag: add a statistic for diagnose calls") Cc: stable@vger.kernel.org Signed-off-by: Michael Mueller Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 53a5316cc4b7..35c842aa8705 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -79,7 +79,7 @@ static int show_diag_stat(struct seq_file *m, void *v) static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) { - return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL; } static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) -- GitLab From 5ea982de5957542160f5e965e36e2cbb434151dc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Apr 2020 13:23:06 -0700 Subject: [PATCH 0961/1278] Input: i8042 - add Acer Aspire 5738z to nomux list commit ebc68cedec4aead47d8d11623d013cca9bf8e825 upstream. The Acer Aspire 5738z has a button to disable (and re-enable) the touchpad next to the touchpad. When this button is pressed a LED underneath indicates that the touchpad is disabled (and an event is send to userspace and GNOME shows its touchpad enabled / disable OSD thingie). So far so good, but after re-enabling the touchpad it no longer works. The laptop does not have an external ps2 port, so mux mode is not needed and disabling mux mode fixes the touchpad no longer working after toggling it off and back on again, so lets add this laptop model to the nomux list. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200331123947.318908-1-hdegoede@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 136f6e7bf797..0d0f977a2f39 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -534,6 +534,17 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"), }, }, + { + /* + * Acer Aspire 5738z + * Touchpad stops working in mux mode when dis- + re-enabled + * with the touchpad enable/disable toggle hotkey + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5738"), + }, + }, { } }; -- GitLab From ad2259b81d4f032d1a28bbcfbb3bab360336acfa Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:43 -0700 Subject: [PATCH 0962/1278] kmod: make request_module() return an error when autoloading is disabled commit d7d27cfc5cf0766a26a8f56868c5ad5434735126 upstream. Patch series "module autoloading fixes and cleanups", v5. This series fixes a bug where request_module() was reporting success to kernel code when module autoloading had been completely disabled via 'echo > /proc/sys/kernel/modprobe'. It also addresses the issues raised on the original thread (https://lkml.kernel.org/lkml/20200310223731.126894-1-ebiggers@kernel.org/T/#u) bydocumenting the modprobe sysctl, adding a self-test for the empty path case, and downgrading a user-reachable WARN_ONCE(). This patch (of 4): It's long been possible to disable kernel module autoloading completely (while still allowing manual module insertion) by setting /proc/sys/kernel/modprobe to the empty string. This can be preferable to setting it to a nonexistent file since it avoids the overhead of an attempted execve(), avoids potential deadlocks, and avoids the call to security_kernel_module_request() and thus on SELinux-based systems eliminates the need to write SELinux rules to dontaudit module_request. However, when module autoloading is disabled in this way, request_module() returns 0. This is broken because callers expect 0 to mean that the module was successfully loaded. Apparently this was never noticed because this method of disabling module autoloading isn't used much, and also most callers don't use the return value of request_module() since it's always necessary to check whether the module registered its functionality or not anyway. But improperly returning 0 can indeed confuse a few callers, for example get_fs_type() in fs/filesystems.c where it causes a WARNING to be hit: if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); } This is easily reproduced with: echo > /proc/sys/kernel/modprobe mount -t NONEXISTENT none / It causes: request_module fs-NONEXISTENT succeeded, but still no fs? WARNING: CPU: 1 PID: 1106 at fs/filesystems.c:275 get_fs_type+0xd6/0xf0 [...] This should actually use pr_warn_once() rather than WARN_ONCE(), since it's also user-reachable if userspace immediately unloads the module. Regardless, request_module() should correctly return an error when it fails. So let's make it return -ENOENT, which matches the error when the modprobe binary doesn't exist. I've also sent patches to document and test this case. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Reviewed-by: Jessica Yu Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Ben Hutchings Cc: Josh Triplett Cc: Link: http://lkml.kernel.org/r/20200310223731.126894-1-ebiggers@kernel.org Link: http://lkml.kernel.org/r/20200312202552.241885-1-ebiggers@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/kmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index bc6addd9152b..a2de58de6ab6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ static int call_modprobe(char *module_name, int wait) * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); -- GitLab From 3bf2149f029d4c2f92b7f80a37e7c8c90ca89493 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 6 Feb 2020 17:26:21 +1100 Subject: [PATCH 0963/1278] cpufreq: powernv: Fix use-after-free commit d0a72efac89d1c35ac55197895201b7b94c5e6ef upstream. The cpufreq driver has a use-after-free that we can hit if: a) There's an OCC message pending when the notifier is registered, and b) The cpufreq driver fails to register with the core. When a) occurs the notifier schedules a workqueue item to handle the message. The backing work_struct is located on chips[].throttle and when b) happens we clean up by freeing the array. Once we get to the (now free) queued item and the kernel crashes. Fixes: c5e29ea7ac14 ("cpufreq: powernv: Fix bugs in powernv_cpufreq_{init/exit}") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Oliver O'Halloran Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200206062622.28235-1-oohall@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/powernv-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 33854bf127f9..25c9a6cdd861 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1041,6 +1041,12 @@ static int init_chip_info(void) static inline void clean_chip_info(void) { + int i; + + /* flush any pending work items */ + if (chips) + for (i = 0; i < nr_chips; i++) + cancel_work_sync(&chips[i].throttle); kfree(chips); } -- GitLab From df54882393ca5fa7ad98e53ce32411a77fecd055 Mon Sep 17 00:00:00 2001 From: Simon Gander Date: Fri, 10 Apr 2020 14:32:16 -0700 Subject: [PATCH 0964/1278] hfsplus: fix crash and filesystem corruption when deleting files commit 25efb2ffdf991177e740b2f63e92b4ec7d310a92 upstream. When removing files containing extended attributes, the hfsplus driver may remove the wrong entries from the attributes b-tree, causing major filesystem damage and in some cases even kernel crashes. To remove a file, all its extended attributes have to be removed as well. The driver does this by looking up all keys in the attributes b-tree with the cnid of the file. Each of these entries then gets deleted using the key used for searching, which doesn't contain the attribute's name when it should. Since the key doesn't contain the name, the deletion routine will not find the correct entry and instead remove the one in front of it. If parent nodes have to be modified, these become corrupt as well. This causes invalid links and unsorted entries that not even macOS's fsck_hfs is able to fix. To fix this, modify the search key before an entry is deleted from the attributes b-tree by copying the found entry's key into the search key, therefore ensuring that the correct entry gets removed from the tree. Signed-off-by: Simon Gander Signed-off-by: Andrew Morton Reviewed-by: Anton Altaparmakov Cc: Link: http://lkml.kernel.org/r/20200327155541.1521-1-simon@tuxera.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/attributes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index e6d554476db4..eeebe80c6be4 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -292,6 +292,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, return -ENOENT; } + /* Avoid btree corruption */ + hfs_bnode_read(fd->bnode, fd->search_key, + fd->keyoffset, fd->keylength); + err = hfs_brec_remove(fd); if (err) return err; -- GitLab From 5a294c1f8b1278886bd42273d6211dc75323cb80 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 27 Mar 2019 17:02:54 +0800 Subject: [PATCH 0965/1278] libata: Return correct status in sata_pmp_eh_recover_pm() when ATA_DFLAG_DETACH is set commit 8305f72f952cff21ce8109dc1ea4b321c8efc5af upstream. During system resume from suspend, this can be observed on ASM1062 PMP controller: ata10.01: SATA link down (SStatus 0 SControl 330) ata10.02: hard resetting link ata10.02: SATA link down (SStatus 0 SControl 330) ata10.00: configured for UDMA/133 Kernel panic - not syncing: stack-protector: Kernel in: sata_pmp_eh_recover+0xa2b/0xa40 CPU: 2 PID: 230 Comm: scsi_eh_9 Tainted: P OE #49-Ubuntu Hardware name: System manufacturer System Product 1001 12/10/2017 Call Trace: dump_stack+0x63/0x8b panic+0xe4/0x244 ? sata_pmp_eh_recover+0xa2b/0xa40 __stack_chk_fail+0x19/0x20 sata_pmp_eh_recover+0xa2b/0xa40 ? ahci_do_softreset+0x260/0x260 [libahci] ? ahci_do_hardreset+0x140/0x140 [libahci] ? ata_phys_link_offline+0x60/0x60 ? ahci_stop_engine+0xc0/0xc0 [libahci] sata_pmp_error_handler+0x22/0x30 ahci_error_handler+0x45/0x80 [libahci] ata_scsi_port_error_handler+0x29b/0x770 ? ata_scsi_cmd_error_handler+0x101/0x140 ata_scsi_error+0x95/0xd0 ? scsi_try_target_reset+0x90/0x90 scsi_error_handler+0xd0/0x5b0 kthread+0x121/0x140 ? scsi_eh_get_sense+0x200/0x200 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x22/0x40 Kernel Offset: 0xcc00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Since sata_pmp_eh_recover_pmp() doens't set rc when ATA_DFLAG_DETACH is set, sata_pmp_eh_recover() continues to run. During retry it triggers the stack protector. Set correct rc in sata_pmp_eh_recover_pmp() to let sata_pmp_eh_recover() jump to pmp_fail directly. BugLink: https://bugs.launchpad.net/bugs/1821434 Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-pmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 85aa76116a30..7924d0635718 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -764,6 +764,7 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap, if (dev->flags & ATA_DFLAG_DETACH) { detach = 1; + rc = -ENODEV; goto fail; } -- GitLab From 66a93f57330d3e172bf2c3da29f0501634bfac7a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 18 Apr 2019 16:51:17 +1000 Subject: [PATCH 0966/1278] powerpc/powernv/idle: Restore AMR/UAMOR/AMOR after idle commit 53a712bae5dd919521a58d7bad773b949358add0 upstream. In order to implement KUAP (Kernel Userspace Access Protection) on Power9 we will be using the AMR, and therefore indirectly the UAMOR/AMOR. So save/restore these regs in the idle code. Signed-off-by: Michael Ellerman [ajd: Backport to 4.14 tree, CVE-2020-11669] Signed-off-by: Andrew Donnellan Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/idle_book3s.S | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 74fc20431082..01b823bdb49c 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -163,8 +163,11 @@ core_idle_lock_held: bne- core_idle_lock_held blr -/* Reuse an unused pt_regs slot for IAMR */ +/* Reuse some unused pt_regs slots for AMR/IAMR/UAMOR/UAMOR */ +#define PNV_POWERSAVE_AMR _TRAP #define PNV_POWERSAVE_IAMR _DAR +#define PNV_POWERSAVE_UAMOR _DSISR +#define PNV_POWERSAVE_AMOR RESULT /* * Pass requested state in r3: @@ -198,8 +201,16 @@ pnv_powersave_common: SAVE_NVGPRS(r1) BEGIN_FTR_SECTION + mfspr r4, SPRN_AMR mfspr r5, SPRN_IAMR + mfspr r6, SPRN_UAMOR + std r4, PNV_POWERSAVE_AMR(r1) std r5, PNV_POWERSAVE_IAMR(r1) + std r6, PNV_POWERSAVE_UAMOR(r1) +BEGIN_FTR_SECTION_NESTED(42) + mfspr r7, SPRN_AMOR + std r7, PNV_POWERSAVE_AMOR(r1) +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfcr r5 @@ -951,12 +962,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_GPR(2, r1) BEGIN_FTR_SECTION - /* IAMR was saved in pnv_powersave_common() */ + /* These regs were saved in pnv_powersave_common() */ + ld r4, PNV_POWERSAVE_AMR(r1) ld r5, PNV_POWERSAVE_IAMR(r1) + ld r6, PNV_POWERSAVE_UAMOR(r1) + mtspr SPRN_AMR, r4 mtspr SPRN_IAMR, r5 + mtspr SPRN_UAMOR, r6 +BEGIN_FTR_SECTION_NESTED(42) + ld r7, PNV_POWERSAVE_AMOR(r1) + mtspr SPRN_AMOR, r7 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) /* - * We don't need an isync here because the upcoming mtmsrd is - * execution synchronizing. + * We don't need an isync here after restoring IAMR because the upcoming + * mtmsrd is execution synchronizing. */ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -- GitLab From f4ebfe21e1433700884f71996341724a69395ef1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 Mar 2020 22:47:19 +1100 Subject: [PATCH 0967/1278] powerpc/64/tm: Don't let userspace set regs->trap via sigreturn commit c7def7fbdeaa25feaa19caf4a27c5d10bd8789e4 upstream. In restore_tm_sigcontexts() we take the trap value directly from the user sigcontext with no checking: err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); This means we can be in the kernel with an arbitrary regs->trap value. Although that's not immediately problematic, there is a risk we could trigger one of the uses of CHECK_FULL_REGS(): #define CHECK_FULL_REGS(regs) BUG_ON(regs->trap & 1) It can also cause us to unnecessarily save non-volatile GPRs again in save_nvgprs(), which shouldn't be problematic but is still wrong. It's also possible it could trick the syscall restart machinery, which relies on regs->trap not being == 0xc00 (see 9a81c16b5275 ("powerpc: fix double syscall restarts")), though I haven't been able to make that happen. Finally it doesn't match the behaviour of the non-TM case, in restore_sigcontext() which zeroes regs->trap. So change restore_tm_sigcontexts() to zero regs->trap. This was discovered while testing Nick's upcoming rewrite of the syscall entry path. In that series the call to save_nvgprs() prior to signal handling (do_notify_resume()) is removed, which leaves the low-bit of regs->trap uncleared which can then trigger the FULL_REGS() WARNs in setup_tm_sigcontexts(). Fixes: 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200401023836.3286664-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index b75bf6e74209..3e8edb1387cc 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -469,8 +469,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); + /* Don't allow userspace to set the trap value */ + regs->trap = 0; + /* These regs are not checkpointed; they can go in 'regs'. */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); -- GitLab From 891b0bdceb8845536b9a4df86aa3842f968aff0d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 13 Mar 2020 15:18:42 +0530 Subject: [PATCH 0968/1278] powerpc/hash64/devmap: Use H_PAGE_THP_HUGE when setting up huge devmap PTE entries commit 36b78402d97a3b9aeab136feb9b00d8647ec2c20 upstream. H_PAGE_THP_HUGE is used to differentiate between a THP hugepage and hugetlb hugepage entries. The difference is WRT how we handle hash fault on these address. THP address enables MPSS in segments. We want to manage devmap hugepage entries similar to THP pt entries. Hence use H_PAGE_THP_HUGE for devmap huge PTE entries. With current code while handling hash PTE fault, we do set is_thp = true when finding devmap PTE huge PTE entries. Current code also does the below sequence we setting up huge devmap entries. entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); In that case we would find both H_PAGE_THP_HUGE and PAGE_DEVMAP set for huge devmap PTE entries. This results in false positive error like below. kernel BUG at /home/kvaneesh/src/linux/mm/memory.c:4321! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 56 PID: 67996 Comm: t_mmap_dio Not tainted 5.6.0-rc4-59640-g371c804dedbc #128 .... NIP [c00000000044c9e4] __follow_pte_pmd+0x264/0x900 LR [c0000000005d45f8] dax_writeback_one+0x1a8/0x740 Call Trace: str_spec.74809+0x22ffb4/0x2d116c (unreliable) dax_writeback_one+0x1a8/0x740 dax_writeback_mapping_range+0x26c/0x700 ext4_dax_writepages+0x150/0x5a0 do_writepages+0x68/0x180 __filemap_fdatawrite_range+0x138/0x180 file_write_and_wait_range+0xa4/0x110 ext4_sync_file+0x370/0x6e0 vfs_fsync_range+0x70/0xf0 sys_msync+0x220/0x2e0 system_call+0x5c/0x68 This is because our pmd_trans_huge check doesn't exclude _PAGE_DEVMAP. To make this all consistent, update pmd_mkdevmap to set H_PAGE_THP_HUGE and pmd_trans_huge check now excludes _PAGE_DEVMAP correctly. Fixes: ebd31197931d ("powerpc/mm: Add devmap support for ppc64") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200313094842.351830-1-aneesh.kumar@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 6 ++++++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 8 +++++++- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +++- arch/powerpc/include/asm/book3s/64/radix.h | 5 +++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 197ced1eaaa0..4a16115b47eb 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -108,6 +108,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int hash__has_transparent_hugepage(void); #endif +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + BUG(); + return pmd; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8d40cf03cb67..2194866225f8 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -181,7 +181,7 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == (_PAGE_PTE | H_PAGE_THP_HUGE)); } @@ -209,6 +209,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 4dd13b503dbb..bcb79a96a6c8 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1179,7 +1179,9 @@ extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); + if (radix_enabled()) + return radix__pmd_mkdevmap(pmd); + return hash__pmd_mkdevmap(pmd); } static inline int pmd_devmap(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 19c44e1495ae..7a1fc49aaf99 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -289,6 +289,11 @@ extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int radix__has_transparent_hugepage(void); #endif +static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); +} + extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); -- GitLab From 46d7381f78c069dded3135a970c3488478903ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 6 Mar 2020 16:01:40 +0100 Subject: [PATCH 0969/1278] powerpc/xive: Use XIVE_BAD_IRQ instead of zero to catch non configured IPIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b1a504a6500df50e83b701b7946b34fce27ad8a3 upstream. When a CPU is brought up, an IPI number is allocated and recorded under the XIVE CPU structure. Invalid IPI numbers are tracked with interrupt number 0x0. On the PowerNV platform, the interrupt number space starts at 0x10 and this works fine. However, on the sPAPR platform, it is possible to allocate the interrupt number 0x0 and this raises an issue when CPU 0 is unplugged. The XIVE spapr driver tracks allocated interrupt numbers in a bitmask and it is not correctly updated when interrupt number 0x0 is freed. It stays allocated and it is then impossible to reallocate. Fix by using the XIVE_BAD_IRQ value instead of zero on both platforms. Reported-by: David Gibson Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Tested-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306150143.5551-2-clg@kaod.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xive/common.c | 12 +++--------- arch/powerpc/sysdev/xive/native.c | 4 ++-- arch/powerpc/sysdev/xive/spapr.c | 4 ++-- arch/powerpc/sysdev/xive/xive-internal.h | 7 +++++++ 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a820370883d9..b7ae5a027714 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -72,13 +72,6 @@ static u32 xive_ipi_irq; /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); -/* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this - */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) - /* An invalid CPU target */ #define XIVE_INVALID_TARGET (-1) @@ -1073,7 +1066,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; /* Grab an IPI from the backend, this will populate xc->hw_ipi */ @@ -1110,7 +1103,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; /* Mask the IPI */ @@ -1266,6 +1259,7 @@ static int xive_prepare_cpu(unsigned int cpu) if (np) xc->chip_id = of_get_ibm_chip_id(np); of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index aac61374afeb..30cdcbfa1c04 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -310,7 +310,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -318,7 +318,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(1); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 7fc41bf30fd5..10235098a726 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -443,11 +443,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index f34abed0c05f..48808dbb25dc 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -9,6 +9,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP -- GitLab From 840d3adab3a8751c0aefae553a82835a60821ca3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Feb 2020 19:38:27 +0000 Subject: [PATCH 0970/1278] powerpc/kprobes: Ignore traps that happened in real mode commit 21f8b2fa3ca5b01f7a2b51b89ce97a3705a15aa0 upstream. When a program check exception happens while MMU translation is disabled, following Oops happens in kprobe_handler() in the following code: } else if (*addr != BREAKPOINT_INSTRUCTION) { BUG: Unable to handle kernel data access on read at 0x0000e268 Faulting instruction address: 0xc000ec34 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=16K PREEMPT CMPC885 Modules linked in: CPU: 0 PID: 429 Comm: cat Not tainted 5.6.0-rc1-s3k-dev-00824-g84195dc6c58a #3267 NIP: c000ec34 LR: c000ecd8 CTR: c019cab8 REGS: ca4d3b58 TRAP: 0300 Not tainted (5.6.0-rc1-s3k-dev-00824-g84195dc6c58a) MSR: 00001032 CR: 2a4d3c52 XER: 00000000 DAR: 0000e268 DSISR: c0000000 GPR00: c000b09c ca4d3c10 c66d0620 00000000 ca4d3c60 00000000 00009032 00000000 GPR08: 00020000 00000000 c087de44 c000afe0 c66d0ad0 100d3dd6 fffffff3 00000000 GPR16: 00000000 00000041 00000000 ca4d3d70 00000000 00000000 0000416d 00000000 GPR24: 00000004 c53b6128 00000000 0000e268 00000000 c07c0000 c07bb6fc ca4d3c60 NIP [c000ec34] kprobe_handler+0x128/0x290 LR [c000ecd8] kprobe_handler+0x1cc/0x290 Call Trace: [ca4d3c30] [c000b09c] program_check_exception+0xbc/0x6fc [ca4d3c50] [c000e43c] ret_from_except_full+0x0/0x4 --- interrupt: 700 at 0xe268 Instruction dump: 913e0008 81220000 38600001 3929ffff 91220000 80010024 bb410008 7c0803a6 38210020 4e800020 38600000 4e800020 <813b0000> 6d2a7fe0 2f8a0008 419e0154 ---[ end trace 5b9152d4cdadd06d ]--- kprobe is not prepared to handle events in real mode and functions running in real mode should have been blacklisted, so kprobe_handler() can safely bail out telling 'this trap is not mine' for any trap that happened while in real-mode. If the trap happened with MSR_IR or MSR_DR cleared, return 0 immediately. Reported-by: Larry Finger Fixes: 6cc89bad60a6 ("powerpc/kprobes: Invoke handlers directly") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Christophe Leroy Reviewed-by: Masami Hiramatsu Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/424331e2006e7291a1bfe40e7f3fa58825f565e1.1582054578.git.christophe.leroy@c-s.fr Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 10b46b35c059..07d3f3b40246 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -279,6 +279,9 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; + if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing -- GitLab From 13368c1ff61c9bdf6725f4a9f48df6404d55fc76 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 27 Mar 2020 05:52:43 -0400 Subject: [PATCH 0971/1278] scsi: mpt3sas: Fix kernel panic observed on soft HBA unplug commit cc41f11a21a51d6869d71e525a7264c748d7c0d7 upstream. Generic protection fault type kernel panic is observed when user performs soft (ordered) HBA unplug operation while IOs are running on drives connected to HBA. When user performs ordered HBA removal operation, the kernel calls PCI device's .remove() call back function where driver is flushing out all the outstanding SCSI IO commands with DID_NO_CONNECT host byte and also unmaps sg buffers allocated for these IO commands. However, in the ordered HBA removal case (unlike of real HBA hot removal), HBA device is still alive and hence HBA hardware is performing the DMA operations to those buffers on the system memory which are already unmapped while flushing out the outstanding SCSI IO commands and this leads to kernel panic. Don't flush out the outstanding IOs from .remove() path in case of ordered removal since HBA will be still alive in this case and it can complete the outstanding IOs. Flush out the outstanding IOs only in case of 'physical HBA hot unplug' where there won't be any communication with the HBA. During shutdown also it is possible that HBA hardware can perform DMA operations on those outstanding IO buffers which are completed with DID_NO_CONNECT by the driver from .shutdown(). So same above fix is applied in shutdown path as well. It is safe to drop the outstanding commands when HBA is inaccessible such as when permanent PCI failure happens, when HBA is in non-operational state, or when someone does a real HBA hot unplug operation. Since driver knows that HBA is inaccessible during these cases, it is safe to drop the outstanding commands instead of waiting for SCSI error recovery to kick in and clear these outstanding commands. Link: https://lore.kernel.org/r/1585302763-23007-1-git-send-email-sreekanth.reddy@broadcom.com Fixes: c666d3be99c0 ("scsi: mpt3sas: wait for and flush running commands on shutdown/unload") Cc: stable@vger.kernel.org #v4.14.174+ Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 9ef0c6265cd2..400c055167b0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -8280,8 +8280,8 @@ static void scsih_remove(struct pci_dev *pdev) ioc->remove_host = 1; - mpt3sas_wait_for_commands_to_complete(ioc); - _scsih_flush_running_cmds(ioc); + if (!pci_device_is_present(pdev)) + _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); @@ -8354,8 +8354,8 @@ scsih_shutdown(struct pci_dev *pdev) ioc->remove_host = 1; - mpt3sas_wait_for_commands_to_complete(ioc); - _scsih_flush_running_cmds(ioc); + if (!pci_device_is_present(pdev)) + _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); -- GitLab From ef2999853ab520842a7bf6d5a50de71fdcdedc7c Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Wed, 4 Sep 2019 14:11:07 +0000 Subject: [PATCH 0972/1278] powerpc: Add attributes for setjmp/longjmp commit aa497d4352414aad22e792b35d0aaaa12bbc37c5 upstream. The setjmp function should be declared as "returns_twice", or bad things can happen[1]. This does not actually change generated code in my testing. The longjmp function should be declared as "noreturn", so that the compiler can optimise calls to it better. This makes the generated code a little shorter. 1: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-returns_005ftwice-function-attribute Signed-off-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c02ce4a573f3bac907e2c70957a2d1275f910013.1567605586.git.segher@kernel.crashing.org Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setjmp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index 279d03a1eec6..d930f5607ef2 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -12,7 +12,7 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *); -extern void longjmp(long *, long); +extern long setjmp(long *) __attribute__((returns_twice)); +extern void longjmp(long *, long) __attribute__((noreturn)); #endif /* _ASM_POWERPC_SETJMP_H */ -- GitLab From 7934eafe52a4f9b1e31a18ab6d20ac122f3ed3a0 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Mon, 30 Mar 2020 10:03:56 +0200 Subject: [PATCH 0973/1278] powerpc: Make setjmp/longjmp signature standard commit c17eb4dca5a353a9dbbb8ad6934fe57af7165e91 upstream. Declaring setjmp()/longjmp() as taking longs makes the signature non-standard, and makes clang complain. In the past, this has been worked around by adding -ffreestanding to the compile flags. The implementation looks like it only ever propagates the value (in longjmp) or sets it to 1 (in setjmp), and we only call longjmp with integer parameters. This allows removing -ffreestanding from the compilation flags. Fixes: c9029ef9c957 ("powerpc: Avoid clang warnings around setjmp and longjmp") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Clement Courbet Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200330080400.124803-1-courbet@google.com Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setjmp.h | 6 ++++-- arch/powerpc/kernel/Makefile | 3 --- arch/powerpc/xmon/Makefile | 3 --- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index d930f5607ef2..6941fe202bc8 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -12,7 +12,9 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *) __attribute__((returns_twice)); -extern void longjmp(long *, long) __attribute__((noreturn)); +typedef long jmp_buf[JMP_BUF_LEN]; + +extern int setjmp(jmp_buf env) __attribute__((returns_twice)); +extern void longjmp(jmp_buf env, int val) __attribute__((noreturn)); #endif /* _ASM_POWERPC_SETJMP_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 5607ce67d178..681f966b7211 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,9 +5,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Avoid clang warnings around longjmp/setjmp declarations -CFLAGS_crash.o += -ffreestanding - subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ifeq ($(CONFIG_PPC64),y) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index a60c44b4a3e5..93974b0a5a99 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Avoid clang warnings around longjmp/setjmp declarations -subdir-ccflags-y := -ffreestanding - subdir-ccflags-$(CONFIG_PPC_WERROR) += -Werror GCOV_PROFILE := n -- GitLab From 049be58476ade49dc3798e7ae62f4ca7f195b004 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 28 Feb 2020 13:04:36 +0000 Subject: [PATCH 0974/1278] Btrfs: fix crash during unmount due to race with delayed inode workers [ Upstream commit f0cc2cd70164efe8f75c5d99560f0f69969c72e4 ] During unmount we can have a job from the delayed inode items work queue still running, that can lead to at least two bad things: 1) A crash, because the worker can try to create a transaction just after the fs roots were freed; 2) A transaction leak, because the worker can create a transaction before the fs roots are freed and just after we committed the last transaction and after we stopped the transaction kthread. A stack trace example of the crash: [79011.691214] kernel BUG at lib/radix-tree.c:982! [79011.692056] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [79011.693180] CPU: 3 PID: 1394 Comm: kworker/u8:2 Tainted: G W 5.6.0-rc2-btrfs-next-54 #2 (...) [79011.696789] Workqueue: btrfs-delayed-meta btrfs_work_helper [btrfs] [79011.697904] RIP: 0010:radix_tree_tag_set+0xe7/0x170 (...) [79011.702014] RSP: 0018:ffffb3c84a317ca0 EFLAGS: 00010293 [79011.702949] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [79011.704202] RDX: ffffb3c84a317cb0 RSI: ffffb3c84a317ca8 RDI: ffff8db3931340a0 [79011.705463] RBP: 0000000000000005 R08: 0000000000000005 R09: ffffffff974629d0 [79011.706756] R10: ffffb3c84a317bc0 R11: 0000000000000001 R12: ffff8db393134000 [79011.708010] R13: ffff8db3931340a0 R14: ffff8db393134068 R15: 0000000000000001 [79011.709270] FS: 0000000000000000(0000) GS:ffff8db3b6a00000(0000) knlGS:0000000000000000 [79011.710699] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [79011.711710] CR2: 00007f22c2a0a000 CR3: 0000000232ad4005 CR4: 00000000003606e0 [79011.712958] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [79011.714205] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [79011.715448] Call Trace: [79011.715925] record_root_in_trans+0x72/0xf0 [btrfs] [79011.716819] btrfs_record_root_in_trans+0x4b/0x70 [btrfs] [79011.717925] start_transaction+0xdd/0x5c0 [btrfs] [79011.718829] btrfs_async_run_delayed_root+0x17e/0x2b0 [btrfs] [79011.719915] btrfs_work_helper+0xaa/0x720 [btrfs] [79011.720773] process_one_work+0x26d/0x6a0 [79011.721497] worker_thread+0x4f/0x3e0 [79011.722153] ? process_one_work+0x6a0/0x6a0 [79011.722901] kthread+0x103/0x140 [79011.723481] ? kthread_create_worker_on_cpu+0x70/0x70 [79011.724379] ret_from_fork+0x3a/0x50 (...) The following diagram shows a sequence of steps that lead to the crash during ummount of the filesystem: CPU 1 CPU 2 CPU 3 btrfs_punch_hole() btrfs_btree_balance_dirty() btrfs_balance_delayed_items() --> sees fs_info->delayed_root->items with value 200, which is greater than BTRFS_DELAYED_BACKGROUND (128) and smaller than BTRFS_DELAYED_WRITEBACK (512) btrfs_wq_run_delayed_node() --> queues a job for fs_info->delayed_workers to run btrfs_async_run_delayed_root() btrfs_async_run_delayed_root() --> job queued by CPU 1 --> starts picking and running delayed nodes from the prepare_list list close_ctree() btrfs_delete_unused_bgs() btrfs_commit_super() btrfs_join_transaction() --> gets transaction N btrfs_commit_transaction(N) --> set transaction state to TRANTS_STATE_COMMIT_START btrfs_first_prepared_delayed_node() --> picks delayed node X through the prepared_list list btrfs_run_delayed_items() btrfs_first_delayed_node() --> also picks delayed node X but through the node_list list __btrfs_commit_inode_delayed_items() --> runs all delayed items from this node and drops the node's item count to 0 through call to btrfs_release_delayed_inode() --> finishes running any remaining delayed nodes --> finishes transaction commit --> stops cleaner and transaction threads btrfs_free_fs_roots() --> frees all roots and removes them from the radix tree fs_info->fs_roots_radix btrfs_join_transaction() start_transaction() btrfs_record_root_in_trans() record_root_in_trans() radix_tree_tag_set() --> crashes because the root is not in the radix tree anymore If the worker is able to call btrfs_join_transaction() before the unmount task frees the fs roots, we end up leaking a transaction and all its resources, since after the call to btrfs_commit_super() and stopping the transaction kthread, we don't expect to have any transaction open anymore. When this situation happens the worker has a delayed node that has no more items to run, since the task calling btrfs_run_delayed_items(), which is doing a transaction commit, picks the same node and runs all its items first. We can not wait for the worker to complete when running delayed items through btrfs_run_delayed_items(), because we call that function in several phases of a transaction commit, and that could cause a deadlock because the worker calls btrfs_join_transaction() and the task doing the transaction commit may have already set the transaction state to TRANS_STATE_COMMIT_DOING. Also it's not possible to get into a situation where only some of the items of a delayed node are added to the fs/subvolume tree in the current transaction and the remaining ones in the next transaction, because when running the items of a delayed inode we lock its mutex, effectively waiting for the worker if the worker is running the items of the delayed node already. Since this can only cause issues when unmounting a filesystem, fix it in a simple way by waiting for any jobs on the delayed workers queue before calling btrfs_commit_supper() at close_ctree(). This works because at this point no one can call btrfs_btree_balance_dirty() or btrfs_balance_delayed_items(), and if we end up waiting for any worker to complete, btrfs_commit_super() will commit the transaction created by the worker. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/async-thread.c | 8 ++++++++ fs/btrfs/async-thread.h | 2 ++ fs/btrfs/disk-io.c | 13 +++++++++++++ 3 files changed, 23 insertions(+) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 72d7589072f5..92615badc173 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -447,3 +447,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work) { set_bit(WORK_HIGH_PRIO_BIT, &work->flags); } + +void btrfs_flush_workqueue(struct btrfs_workqueue *wq) +{ + if (wq->high) + flush_workqueue(wq->high->normal_wq); + + flush_workqueue(wq->normal->normal_wq); +} diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index fc957e00cef1..2a25aef6ef2a 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -85,4 +85,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work); struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work); struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq); bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq); +void btrfs_flush_workqueue(struct btrfs_workqueue *wq); + #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b4fee5c79f9..096c015b22a4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3725,6 +3725,19 @@ void close_ctree(struct btrfs_fs_info *fs_info) */ btrfs_delete_unused_bgs(fs_info); + /* + * There might be existing delayed inode workers still running + * and holding an empty delayed inode item. We must wait for + * them to complete first because they can create a transaction. + * This happens when someone calls btrfs_balance_delayed_items() + * and then a transaction commit runs the same delayed nodes + * before any delayed worker has done something with the nodes. + * We must wait for any worker here and not at transaction + * commit time since that could cause a deadlock. + * This is a very rare case. + */ + btrfs_flush_workqueue(fs_info->delayed_workers); + ret = btrfs_commit_super(fs_info); if (ret) btrfs_err(fs_info, "commit super ret %d", ret); -- GitLab From 8807401c6fef447364959d58512a15834d841d1c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 19 Mar 2020 10:11:32 -0400 Subject: [PATCH 0975/1278] btrfs: use nofs allocations for running delayed items [ Upstream commit 351cbf6e4410e7ece05e35d0a07320538f2418b4 ] Zygo reported the following lockdep splat while testing the balance patches ====================================================== WARNING: possible circular locking dependency detected 5.6.0-c6f0579d496a+ #53 Not tainted ------------------------------------------------------ kswapd0/1133 is trying to acquire lock: ffff888092f622c0 (&delayed_node->mutex){+.+.}, at: __btrfs_release_delayed_node+0x7c/0x5b0 but task is already holding lock: ffffffff8fc5f860 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.91+0x29/0x30 fs_reclaim_acquire+0x19/0x20 kmem_cache_alloc_trace+0x32/0x740 add_block_entry+0x45/0x260 btrfs_ref_tree_mod+0x6e2/0x8b0 btrfs_alloc_tree_block+0x789/0x880 alloc_tree_block_no_bg_flush+0xc6/0xf0 __btrfs_cow_block+0x270/0x940 btrfs_cow_block+0x1ba/0x3a0 btrfs_search_slot+0x999/0x1030 btrfs_insert_empty_items+0x81/0xe0 btrfs_insert_delayed_items+0x128/0x7d0 __btrfs_run_delayed_items+0xf4/0x2a0 btrfs_run_delayed_items+0x13/0x20 btrfs_commit_transaction+0x5cc/0x1390 insert_balance_item.isra.39+0x6b2/0x6e0 btrfs_balance+0x72d/0x18d0 btrfs_ioctl_balance+0x3de/0x4c0 btrfs_ioctl+0x30ab/0x44a0 ksys_ioctl+0xa1/0xe0 __x64_sys_ioctl+0x43/0x50 do_syscall_64+0x77/0x2c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (&delayed_node->mutex){+.+.}: __lock_acquire+0x197e/0x2550 lock_acquire+0x103/0x220 __mutex_lock+0x13d/0xce0 mutex_lock_nested+0x1b/0x20 __btrfs_release_delayed_node+0x7c/0x5b0 btrfs_remove_delayed_node+0x49/0x50 btrfs_evict_inode+0x6fc/0x900 evict+0x19a/0x2c0 dispose_list+0xa0/0xe0 prune_icache_sb+0xbd/0xf0 super_cache_scan+0x1b5/0x250 do_shrink_slab+0x1f6/0x530 shrink_slab+0x32e/0x410 shrink_node+0x2a5/0xba0 balance_pgdat+0x4bd/0x8a0 kswapd+0x35a/0x800 kthread+0x1e9/0x210 ret_from_fork+0x3a/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&delayed_node->mutex); lock(fs_reclaim); lock(&delayed_node->mutex); *** DEADLOCK *** 3 locks held by kswapd0/1133: #0: ffffffff8fc5f860 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 #1: ffffffff8fc380d8 (shrinker_rwsem){++++}, at: shrink_slab+0x1e8/0x410 #2: ffff8881e0e6c0e8 (&type->s_umount_key#42){++++}, at: trylock_super+0x1b/0x70 stack backtrace: CPU: 2 PID: 1133 Comm: kswapd0 Not tainted 5.6.0-c6f0579d496a+ #53 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack+0xc1/0x11a print_circular_bug.isra.38.cold.57+0x145/0x14a check_noncircular+0x2a9/0x2f0 ? print_circular_bug.isra.38+0x130/0x130 ? stack_trace_consume_entry+0x90/0x90 ? save_trace+0x3cc/0x420 __lock_acquire+0x197e/0x2550 ? btrfs_inode_clear_file_extent_range+0x9b/0xb0 ? register_lock_class+0x960/0x960 lock_acquire+0x103/0x220 ? __btrfs_release_delayed_node+0x7c/0x5b0 __mutex_lock+0x13d/0xce0 ? __btrfs_release_delayed_node+0x7c/0x5b0 ? __asan_loadN+0xf/0x20 ? pvclock_clocksource_read+0xeb/0x190 ? __btrfs_release_delayed_node+0x7c/0x5b0 ? mutex_lock_io_nested+0xc20/0xc20 ? __kasan_check_read+0x11/0x20 ? check_chain_key+0x1e6/0x2e0 mutex_lock_nested+0x1b/0x20 ? mutex_lock_nested+0x1b/0x20 __btrfs_release_delayed_node+0x7c/0x5b0 btrfs_remove_delayed_node+0x49/0x50 btrfs_evict_inode+0x6fc/0x900 ? btrfs_setattr+0x840/0x840 ? do_raw_spin_unlock+0xa8/0x140 evict+0x19a/0x2c0 dispose_list+0xa0/0xe0 prune_icache_sb+0xbd/0xf0 ? invalidate_inodes+0x310/0x310 super_cache_scan+0x1b5/0x250 do_shrink_slab+0x1f6/0x530 shrink_slab+0x32e/0x410 ? do_shrink_slab+0x530/0x530 ? do_shrink_slab+0x530/0x530 ? __kasan_check_read+0x11/0x20 ? mem_cgroup_protected+0x13d/0x260 shrink_node+0x2a5/0xba0 balance_pgdat+0x4bd/0x8a0 ? mem_cgroup_shrink_node+0x490/0x490 ? _raw_spin_unlock_irq+0x27/0x40 ? finish_task_switch+0xce/0x390 ? rcu_read_lock_bh_held+0xb0/0xb0 kswapd+0x35a/0x800 ? _raw_spin_unlock_irqrestore+0x4c/0x60 ? balance_pgdat+0x8a0/0x8a0 ? finish_wait+0x110/0x110 ? __kasan_check_read+0x11/0x20 ? __kthread_parkme+0xc6/0xe0 ? balance_pgdat+0x8a0/0x8a0 kthread+0x1e9/0x210 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x3a/0x50 This is because we hold that delayed node's mutex while doing tree operations. Fix this by just wrapping the searches in nofs. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/delayed-inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 87414fc9e268..416fb50a5378 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -18,6 +18,7 @@ */ #include +#include #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" @@ -833,11 +834,14 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; + unsigned int nofs_flag; char *ptr; int ret; + nofs_flag = memalloc_nofs_save(); ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key, delayed_item->data_len); + memalloc_nofs_restore(nofs_flag); if (ret < 0 && ret != -EEXIST) return ret; @@ -966,6 +970,7 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *node) { struct btrfs_delayed_item *curr, *prev; + unsigned int nofs_flag; int ret = 0; do_again: @@ -974,7 +979,9 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, if (!curr) goto delete_fail; + nofs_flag = memalloc_nofs_save(); ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1); + memalloc_nofs_restore(nofs_flag); if (ret < 0) goto delete_fail; else if (ret > 0) { @@ -1041,6 +1048,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; + unsigned int nofs_flag; int mod; int ret; @@ -1053,7 +1061,9 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, else mod = 1; + nofs_flag = memalloc_nofs_save(); ret = btrfs_lookup_inode(trans, root, path, &key, mod); + memalloc_nofs_restore(nofs_flag); if (ret > 0) { btrfs_release_path(path); return -ENOENT; @@ -1104,7 +1114,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, key.type = BTRFS_INODE_EXTREF_KEY; key.offset = -1; + + nofs_flag = memalloc_nofs_save(); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + memalloc_nofs_restore(nofs_flag); if (ret < 0) goto err_out; ASSERT(ret); -- GitLab From 814a0291c0e09a4b7d13a53c8884b05f9d1adf85 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 24 Mar 2020 21:22:45 +0800 Subject: [PATCH 0976/1278] dm zoned: remove duplicate nr_rnd_zones increase in dmz_init_zone() [ Upstream commit b8fdd090376a7a46d17db316638fe54b965c2fb0 ] zmd->nr_rnd_zones was increased twice by mistake. The other place it is increased in dmz_init_zone() is the only one needed: 1131 zmd->nr_useable_zones++; 1132 if (dmz_is_rnd(zone)) { 1133 zmd->nr_rnd_zones++; ^^^ Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Bob Liu Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-zoned-metadata.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index e3b67b145027..4d658a0c6025 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1105,7 +1105,6 @@ static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone, if (blkz->type == BLK_ZONE_TYPE_CONVENTIONAL) { set_bit(DMZ_RND, &zone->flags); - zmd->nr_rnd_zones++; } else if (blkz->type == BLK_ZONE_TYPE_SEQWRITE_REQ || blkz->type == BLK_ZONE_TYPE_SEQWRITE_PREF) { set_bit(DMZ_SEQ, &zone->flags); -- GitLab From cbf12bd2856cffade67191af36115ea9760ff2f8 Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Fri, 28 Feb 2020 12:46:48 +0200 Subject: [PATCH 0977/1278] crypto: caam - update xts sector size for large input length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f142b6a7b573bde6cff926f246da05652c61eb4 ] Since in the software implementation of XTS-AES there is no notion of sector every input length is processed the same way. CAAM implementation has the notion of sector which causes different results between the software implementation and the one in CAAM for input lengths bigger than 512 bytes. Increase sector size to maximum value on 16 bits. Fixes: c6415a6016bf ("crypto: caam - add support for acipher xts(aes)") Cc: # v4.12+ Signed-off-by: Andrei Botila Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/caam/caamalg_desc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index b23c7b72525c..a3d507fb9ea5 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1280,7 +1280,13 @@ EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_givencap); */ void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); @@ -1332,7 +1338,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_encap); */ void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); -- GitLab From 135a45d56f958528c2f9415fb058eb0536582fb4 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 22 Jan 2020 14:43:20 -0500 Subject: [PATCH 0978/1278] drm/dp_mst: Fix clearing payload state on topology disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8732fe46b20c951493bfc4dba0ad08efdf41de81 ] The issues caused by: commit 64e62bdf04ab ("drm/dp_mst: Remove VCPI while disabling topology mgr") Prompted me to take a closer look at how we clear the payload state in general when disabling the topology, and it turns out there's actually two subtle issues here. The first is that we're not grabbing &mgr.payload_lock when clearing the payloads in drm_dp_mst_topology_mgr_set_mst(). Seeing as the canonical lock order is &mgr.payload_lock -> &mgr.lock (because we always want &mgr.lock to be the inner-most lock so topology validation always works), this makes perfect sense. It also means that -technically- there could be racing between someone calling drm_dp_mst_topology_mgr_set_mst() to disable the topology, along with a modeset occurring that's modifying the payload state at the same time. The second is the more obvious issue that Wayne Lin discovered, that we're not clearing proposed_payloads when disabling the topology. I actually can't see any obvious places where the racing caused by the first issue would break something, and it could be that some of our higher-level locks already prevent this by happenstance, but better safe then sorry. So, let's make it so that drm_dp_mst_topology_mgr_set_mst() first grabs &mgr.payload_lock followed by &mgr.lock so that we never race when modifying the payload state. Then, we also clear proposed_payloads to fix the original issue of enabling a new topology with a dirty payload state. This doesn't clear any of the drm_dp_vcpi structures, but those are getting destroyed along with the ports anyway. Changes since v1: * Use sizeof(mgr->payloads[0])/sizeof(mgr->proposed_vcpis[0]) instead - vsyrjala Cc: Sean Paul Cc: Wayne Lin Cc: Ville Syrjälä Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Lyude Paul Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200122194321.14953-1-lyude@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index f0d819fc16cd..db0e9ce57e29 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2038,6 +2038,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms int ret = 0; struct drm_dp_mst_branch *mstb = NULL; + mutex_lock(&mgr->payload_lock); mutex_lock(&mgr->lock); if (mst_state == mgr->mst_state) goto out_unlock; @@ -2096,7 +2097,10 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms /* this can fail if the device is gone */ drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); ret = 0; - memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload)); + memset(mgr->payloads, 0, + mgr->max_payloads * sizeof(mgr->payloads[0])); + memset(mgr->proposed_vcpis, 0, + mgr->max_payloads * sizeof(mgr->proposed_vcpis[0])); mgr->payload_mask = 0; set_bit(0, &mgr->payload_mask); mgr->vcpi_mask = 0; @@ -2104,6 +2108,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms out_unlock: mutex_unlock(&mgr->lock); + mutex_unlock(&mgr->payload_lock); if (mstb) drm_dp_put_mst_branch_device(mstb); return ret; -- GitLab From 099aab3d422839d5ee7b6b376b16a8583d3b050a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 2 Feb 2020 17:16:31 +0000 Subject: [PATCH 0979/1278] drm: Remove PageReserved manipulation from drm_pci_alloc [ Upstream commit ea36ec8623f56791c6ff6738d0509b7920f85220 ] drm_pci_alloc/drm_pci_free are very thin wrappers around the core dma facilities, and we have no special reason within the drm layer to behave differently. In particular, since commit de09d31dd38a50fdce106c15abd68432eebbd014 Author: Kirill A. Shutemov Date: Fri Jan 15 16:51:42 2016 -0800 page-flags: define PG_reserved behavior on compound pages As far as I can see there's no users of PG_reserved on compound pages. Let's use PF_NO_COMPOUND here. it has been illegal to combine GFP_COMP with SetPageReserved, so lets stop doing both and leave the dma layer to its own devices. Reported-by: Taketo Kabe Bug: https://gitlab.freedesktop.org/drm/intel/issues/1027 Fixes: de09d31dd38a ("page-flags: define PG_reserved behavior on compound pages") Signed-off-by: Chris Wilson Cc: # v4.5+ Reviewed-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20200202171635.4039044-1-chris@chris-wilson.co.uk Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_pci.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 1235c9877d6f..2078d7706a67 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -46,8 +46,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align) { drm_dma_handle_t *dmah; - unsigned long addr; - size_t sz; /* pci_alloc_consistent only guarantees alignment to the smallest * PAGE_SIZE order which is greater than or equal to the requested size. @@ -61,22 +59,13 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali return NULL; dmah->size = size; - dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size, &dmah->busaddr, GFP_KERNEL | __GFP_COMP); + dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size, &dmah->busaddr, GFP_KERNEL); if (dmah->vaddr == NULL) { kfree(dmah); return NULL; } - memset(dmah->vaddr, 0, size); - - /* XXX - Is virt_to_page() legal for consistent mem? */ - /* Reserve */ - for (addr = (unsigned long)dmah->vaddr, sz = size; - sz > 0; addr += PAGE_SIZE, sz -= PAGE_SIZE) { - SetPageReserved(virt_to_page((void *)addr)); - } - return dmah; } @@ -89,19 +78,9 @@ EXPORT_SYMBOL(drm_pci_alloc); */ void __drm_legacy_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah) { - unsigned long addr; - size_t sz; - - if (dmah->vaddr) { - /* XXX - Is virt_to_page() legal for consistent mem? */ - /* Unreserve */ - for (addr = (unsigned long)dmah->vaddr, sz = dmah->size; - sz > 0; addr += PAGE_SIZE, sz -= PAGE_SIZE) { - ClearPageReserved(virt_to_page((void *)addr)); - } + if (dmah->vaddr) dma_free_coherent(&dev->pdev->dev, dmah->size, dmah->vaddr, dmah->busaddr); - } } /** -- GitLab From abd1348c09724ebfec03067b723343dc15157bf4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Mar 2020 16:34:48 +0900 Subject: [PATCH 0980/1278] ftrace/kprobe: Show the maxactive number on kprobe_events [ Upstream commit 6a13a0d7b4d1171ef9b80ad69abc37e1daa941b3 ] Show maxactive parameter on kprobe_events. This allows user to save the current configuration and restore it without losing maxactive parameter. Link: http://lkml.kernel.org/r/4762764a-6df7-bc93-ed60-e336146dce1f@gmail.com Link: http://lkml.kernel.org/r/158503528846.22706.5549974121212526020.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 696ced4fb1d76 ("tracing/kprobes: expose maxactive for kretprobe in kprobe_events") Reported-by: Taeung Song Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_kprobe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ea20274a105a..d66aed6e9c75 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -877,6 +877,8 @@ static int probes_seq_show(struct seq_file *m, void *v) int i; seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); + if (trace_kprobe_is_return(tk) && tk->rp.maxactive) + seq_printf(m, "%d", tk->rp.maxactive); seq_printf(m, ":%s/%s", tk->tp.call.class->system, trace_event_name(&tk->tp.call)); -- GitLab From 63e715f351d254a7bb0926bc9738dfb154fe2abf Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 3 Apr 2020 17:04:08 +0800 Subject: [PATCH 0981/1278] ipmi: fix hung processes in __get_guid() [ Upstream commit 32830a0534700f86366f371b150b17f0f0d140d7 ] The wait_event() function is used to detect command completion. When send_guid_cmd() returns an error, smi_send() has not been called to send data. Therefore, wait_event() should not be used on the error path, otherwise it will cause the following warning: [ 1361.588808] systemd-udevd D 0 1501 1436 0x00000004 [ 1361.588813] ffff883f4b1298c0 0000000000000000 ffff883f4b188000 ffff887f7e3d9f40 [ 1361.677952] ffff887f64bd4280 ffffc90037297a68 ffffffff8173ca3b ffffc90000000010 [ 1361.767077] 00ffc90037297ad0 ffff887f7e3d9f40 0000000000000286 ffff883f4b188000 [ 1361.856199] Call Trace: [ 1361.885578] [] ? __schedule+0x23b/0x780 [ 1361.951406] [] schedule+0x36/0x80 [ 1362.010979] [] get_guid+0x118/0x150 [ipmi_msghandler] [ 1362.091281] [] ? prepare_to_wait_event+0x100/0x100 [ 1362.168533] [] ipmi_register_smi+0x405/0x940 [ipmi_msghandler] [ 1362.258337] [] try_smi_init+0x529/0x950 [ipmi_si] [ 1362.334521] [] ? std_irq_setup+0xd0/0xd0 [ipmi_si] [ 1362.411701] [] init_ipmi_si+0x492/0x9e0 [ipmi_si] [ 1362.487917] [] ? ipmi_pci_probe+0x280/0x280 [ipmi_si] [ 1362.568219] [] do_one_initcall+0x50/0x180 [ 1362.636109] [] ? kmem_cache_alloc_trace+0x142/0x190 [ 1362.714330] [] do_init_module+0x5f/0x200 [ 1362.781208] [] load_module+0x1898/0x1de0 [ 1362.848069] [] ? __symbol_put+0x60/0x60 [ 1362.913886] [] ? security_kernel_post_read_file+0x6b/0x80 [ 1362.998514] [] SYSC_finit_module+0xe5/0x120 [ 1363.068463] [] ? SYSC_finit_module+0xe5/0x120 [ 1363.140513] [] SyS_finit_module+0xe/0x10 [ 1363.207364] [] do_syscall_64+0x74/0x180 Fixes: 50c812b2b951 ("[PATCH] ipmi: add full sysfs support") Signed-off-by: Wen Yang Cc: Corey Minyard Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: openipmi-developer@lists.sourceforge.net Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 2.6.17- Message-Id: <20200403090408.58745-1-wenyang@linux.alibaba.com> Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_msghandler.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c82d9fd2f05a..f72a272eeb9b 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2647,7 +2647,9 @@ get_guid(ipmi_smi_t intf) if (rv) /* Send failed, no GUID available. */ intf->bmc->guid_set = 0; - wait_event(intf->waitq, intf->bmc->guid_set != 2); + else + wait_event(intf->waitq, intf->bmc->guid_set != 2); + intf->null_user_handler = NULL; } -- GitLab From 995f8a96818a951f84878e4d0f18703ad90486d7 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 23 Jan 2020 11:19:25 +0000 Subject: [PATCH 0982/1278] powerpc/fsl_booke: Avoid creating duplicate tlb1 entry [ Upstream commit aa4113340ae6c2811e046f08c2bc21011d20a072 ] In the current implementation, the call to loadcam_multi() is wrapped between switch_to_as1() and restore_to_as0() calls so, when it tries to create its own temporary AS=1 TLB1 entry, it ends up duplicating the existing one created by switch_to_as1(). Add a check to skip creating the temporary entry if already running in AS=1. Fixes: d9e1831a4202 ("powerpc/85xx: Load all early TLB entries at once") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Laurentiu Tudor Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200123111914.2565-1-laurentiu.tudor@nxp.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/tlb_nohash_low.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 048b8e9f4492..63964af9a162 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -400,7 +400,7 @@ _GLOBAL(set_context) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 + * Must preserve r7, r8, r9, r10 and r11 */ _GLOBAL(loadcam_entry) mflr r5 @@ -436,6 +436,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) */ _GLOBAL(loadcam_multi) mflr r8 + /* Don't switch to AS=1 if already there */ + mfmsr r11 + andi. r11,r11,MSR_IS + bne 10f /* * Set up temporary TLB entry that is the same as what we're @@ -461,6 +465,7 @@ _GLOBAL(loadcam_multi) mtmsr r6 isync +10: mr r9,r3 add r10,r3,r4 2: bl loadcam_entry @@ -469,6 +474,10 @@ _GLOBAL(loadcam_multi) mr r3,r9 blt 2b + /* Don't return to AS=0 if we were in AS=1 at function start */ + andi. r11,r11,MSR_IS + bne 3f + /* Return to AS=0 and clear the temporary entry */ mfmsr r6 rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) @@ -484,6 +493,7 @@ _GLOBAL(loadcam_multi) tlbwe isync +3: mtlr r8 blr #endif -- GitLab From 026c7b1515e430e4bf7e085f6f7dba0018a07c67 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 14 Sep 2018 23:43:37 -0700 Subject: [PATCH 0983/1278] misc: echo: Remove unnecessary parentheses and simplify check for zero [ Upstream commit 85dc2c65e6c975baaf36ea30f2ccc0a36a8c8add ] Clang warns when multiple pairs of parentheses are used for a single conditional statement. drivers/misc/echo/echo.c:384:27: warning: equality comparison with extraneous parentheses [-Wparentheses-equality] if ((ec->nonupdate_dwell == 0)) { ~~~~~~~~~~~~~~~~~~~~^~~~ drivers/misc/echo/echo.c:384:27: note: remove extraneous parentheses around the comparison to silence this warning if ((ec->nonupdate_dwell == 0)) { ~ ^ ~ drivers/misc/echo/echo.c:384:27: note: use '=' to turn this equality comparison into an assignment if ((ec->nonupdate_dwell == 0)) { ^~ = 1 warning generated. Remove them and while we're at it, simplify the zero check as '!var' is used more than 'var == 0'. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/echo/echo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c index 9597e9523cac..fff13176f9b8 100644 --- a/drivers/misc/echo/echo.c +++ b/drivers/misc/echo/echo.c @@ -454,7 +454,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) */ ec->factor = 0; ec->shift = 0; - if ((ec->nonupdate_dwell == 0)) { + if (!ec->nonupdate_dwell) { int p, logp, shift; /* Determine: -- GitLab From c28ab0252d82190f485435feba008cc83971dc59 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Feb 2020 16:51:58 +0200 Subject: [PATCH 0984/1278] mfd: dln2: Fix sanity checking for endpoints [ Upstream commit fb945c95a482200876993977008b67ea658bd938 ] While the commit 2b8bd606b1e6 ("mfd: dln2: More sanity checking for endpoints") tries to harden the sanity checks it made at the same time a regression, i.e. mixed in and out endpoints. Obviously it should have been not tested on real hardware at that time, but unluckily it didn't happen. So, fix above mentioned typo and make device being enumerated again. While here, introduce an enumerator for magic values to prevent similar issue to happen in the future. Fixes: 2b8bd606b1e6 ("mfd: dln2: More sanity checking for endpoints") Cc: Oliver Neukum Cc: Greg Kroah-Hartman Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/dln2.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 95d0f2df0ad4..672831d5ee32 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -93,6 +93,11 @@ struct dln2_mod_rx_slots { spinlock_t lock; }; +enum dln2_endpoint { + DLN2_EP_OUT = 0, + DLN2_EP_IN = 1, +}; + struct dln2_dev { struct usb_device *usb_dev; struct usb_interface *interface; @@ -740,10 +745,10 @@ static int dln2_probe(struct usb_interface *interface, hostif->desc.bNumEndpoints < 2) return -ENODEV; - epin = &hostif->endpoint[0].desc; - epout = &hostif->endpoint[1].desc; + epout = &hostif->endpoint[DLN2_EP_OUT].desc; if (!usb_endpoint_is_bulk_out(epout)) return -ENODEV; + epin = &hostif->endpoint[DLN2_EP_IN].desc; if (!usb_endpoint_is_bulk_in(epin)) return -ENODEV; -- GitLab From 697c8df388e41b3143a8abf159b96acfdc2e9f37 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 16 Apr 2020 17:57:40 +0200 Subject: [PATCH 0985/1278] amd-xgbe: Use __napi_schedule() in BH context [ Upstream commit d518691cbd3be3dae218e05cca3f3fc9b2f1aa77 ] The driver uses __napi_schedule_irqoff() which is fine as long as it is invoked with disabled interrupts by everybody. Since the commit mentioned below the driver may invoke xgbe_isr_task() in tasklet/softirq context. This may lead to list corruption if another driver uses __napi_schedule_irqoff() in IRQ context. Use __napi_schedule() which safe to use from IRQ and softirq context. Fixes: 85b85c853401d ("amd-xgbe: Re-issue interrupt if interrupt status not cleared") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tom Lendacky Cc: Tom Lendacky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index c65d2cdcc7cf..8556962e6824 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -515,7 +515,7 @@ static void xgbe_isr_task(unsigned long data) xgbe_disable_rx_tx_ints(pdata); /* Turn on polling */ - __napi_schedule_irqoff(&pdata->napi); + __napi_schedule(&pdata->napi); } } else { /* Don't clear Rx/Tx status if doing per channel DMA -- GitLab From 3e5eab7f08af84834d40082eb644bb9dd1960779 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 7 Apr 2020 13:23:21 +0000 Subject: [PATCH 0986/1278] hsr: check protocol version in hsr_newlink() [ Upstream commit 4faab8c446def7667adf1f722456c2f4c304069c ] In the current hsr code, only 0 and 1 protocol versions are valid. But current hsr code doesn't check the version, which is received by userspace. Test commands: ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1 version 4 In the test commands, version 4 is invalid. So, the command should be failed. After this patch, following error will occur. "Error: hsr: Only versions 0..1 are supported." Fixes: ee1c27977284 ("net/hsr: Added support for HSR v1") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_netlink.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 37708dabebd1..606bc7fe5cc7 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -64,10 +64,16 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); - if (!data[IFLA_HSR_VERSION]) + if (!data[IFLA_HSR_VERSION]) { hsr_version = 0; - else + } else { hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); + if (hsr_version > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Only versions 0..1 are supported"); + return -EINVAL; + } + } return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); } -- GitLab From cce0d1bd3fbae5595b16a7b9e4b07b6e7c525e50 Mon Sep 17 00:00:00 2001 From: Taras Chornyi Date: Thu, 9 Apr 2020 20:25:24 +0300 Subject: [PATCH 0987/1278] net: ipv4: devinet: Fix crash when add/del multicast IP with autojoin [ Upstream commit 690cc86321eb9bcee371710252742fb16fe96824 ] When CONFIG_IP_MULTICAST is not set and multicast ip is added to the device with autojoin flag or when multicast ip is deleted kernel will crash. steps to reproduce: ip addr add 224.0.0.0/32 dev eth0 ip addr del 224.0.0.0/32 dev eth0 or ip addr add 224.0.0.0/32 dev eth0 autojoin Unable to handle kernel NULL pointer dereference at virtual address 0000000000000088 pc : _raw_write_lock_irqsave+0x1e0/0x2ac lr : lock_sock_nested+0x1c/0x60 Call trace: _raw_write_lock_irqsave+0x1e0/0x2ac lock_sock_nested+0x1c/0x60 ip_mc_config.isra.28+0x50/0xe0 inet_rtm_deladdr+0x1a8/0x1f0 rtnetlink_rcv_msg+0x120/0x350 netlink_rcv_skb+0x58/0x120 rtnetlink_rcv+0x14/0x20 netlink_unicast+0x1b8/0x270 netlink_sendmsg+0x1a0/0x3b0 ____sys_sendmsg+0x248/0x290 ___sys_sendmsg+0x80/0xc0 __sys_sendmsg+0x68/0xc0 __arm64_sys_sendmsg+0x20/0x30 el0_svc_common.constprop.2+0x88/0x150 do_el0_svc+0x20/0x80 el0_sync_handler+0x118/0x190 el0_sync+0x140/0x180 Fixes: 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on") Signed-off-by: Taras Chornyi Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/devinet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5f020c051af9..096a28f9720d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -579,12 +579,15 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) +static int ip_mc_autojoin_config(struct net *net, bool join, + const struct in_ifaddr *ifa) { +#if defined(CONFIG_IP_MULTICAST) struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ifa->ifa_address, .imr_ifindex = ifa->ifa_dev->dev->ifindex, }; + struct sock *sk = net->ipv4.mc_autojoin_sk; int ret; ASSERT_RTNL(); @@ -597,6 +600,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) release_sock(sk); return ret; +#else + return -EOPNOTSUPP; +#endif } static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -638,7 +644,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, continue; if (ipv4_is_multicast(ifa->ifa_address)) - ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); + ip_mc_autojoin_config(net, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -896,8 +902,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { - int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, - true, ifa); + int ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { inet_free_ifa(ifa); -- GitLab From 41b8beae660f6635782d8052f4fcb65dd4bb0f47 Mon Sep 17 00:00:00 2001 From: Tim Stallard Date: Fri, 3 Apr 2020 21:26:21 +0100 Subject: [PATCH 0988/1278] net: ipv6: do not consider routes via gateways for anycast address check [ Upstream commit 03e2a984b6165621f287fadf5f4b5cd8b58dcaba ] The behaviour for what is considered an anycast address changed in commit 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception"). This now considers the first address in a subnet where there is a route via a gateway to be an anycast address. This breaks path MTU discovery and traceroutes when a host in a remote network uses the address at the start of a prefix (eg 2600:: advertised as 2600::/48 in the DFZ) as ICMP errors will not be sent to anycast addresses. This patch excludes any routes with a gateway, or via point to point links, like the behaviour previously from rt6_is_gw_or_nonexthop in net/ipv6/route.c. This can be tested with: ip link add v1 type veth peer name v2 ip netns add test ip netns exec test ip link set lo up ip link set v2 netns test ip link set v1 up ip netns exec test ip link set v2 up ip addr add 2001:db8::1/64 dev v1 nodad ip addr add 2001:db8:100:: dev lo nodad ip netns exec test ip addr add 2001:db8::2/64 dev v2 nodad ip netns exec test ip route add unreachable 2001:db8:1::1 ip netns exec test ip route add 2001:db8:100::/64 via 2001:db8::1 ip netns exec test sysctl net.ipv6.conf.all.forwarding=1 ip route add 2001:db8:1::1 via 2001:db8::2 ping -I 2001:db8::1 2001:db8:1::1 -c1 ping -I 2001:db8:100:: 2001:db8:1::1 -c1 ip addr delete 2001:db8:100:: dev lo ip netns delete test Currently the first ping will get back a destination unreachable ICMP error, but the second will never get a response, with "icmp6_send: acast source" logged. After this patch, both get destination unreachable ICMP replies. Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception") Signed-off-by: Tim Stallard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_route.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index bee528135cf1..9f7f81117434 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -207,6 +207,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && + !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } -- GitLab From 33fe397c18f4788232793f3fbf5d3156f3100b6f Mon Sep 17 00:00:00 2001 From: Wang Wenhu Date: Wed, 8 Apr 2020 19:53:53 -0700 Subject: [PATCH 0989/1278] net: qrtr: send msgs from local of same id as broadcast [ Upstream commit 6dbf02acef69b0742c238574583b3068afbd227c ] If the local node id(qrtr_local_nid) is not modified after its initialization, it equals to the broadcast node id(QRTR_NODE_BCAST). So the messages from local node should not be taken as broadcast and keep the process going to send them out anyway. The definitions are as follow: static unsigned int qrtr_local_nid = NUMA_NO_NODE; Fixes: fdf5fd397566 ("net: qrtr: Broadcast messages only from control port") Signed-off-by: Wang Wenhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/qrtr/qrtr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 8d9a244f4534..944ce686bfe5 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -710,20 +710,21 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { - enqueue_fn = qrtr_bcast_enqueue; - if (addr->sq_port != QRTR_PORT_CTRL) { + if (addr->sq_port != QRTR_PORT_CTRL && + qrtr_local_nid != QRTR_NODE_BCAST) { release_sock(sk); return -ENOTCONN; } + enqueue_fn = qrtr_bcast_enqueue; } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { - enqueue_fn = qrtr_node_enqueue; node = qrtr_node_lookup(addr->sq_node); if (!node) { release_sock(sk); return -ECONNRESET; } + enqueue_fn = qrtr_node_enqueue; } plen = (len + 3) & ~3; -- GitLab From 6d1c665386c7bfb74ba409487d6e48f4d7db71fe Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 6 Apr 2020 14:39:32 +0300 Subject: [PATCH 0990/1278] net: revert default NAPI poll timeout to 2 jiffies [ Upstream commit a4837980fd9fa4c70a821d11831698901baef56b ] For HZ < 1000 timeout 2000us rounds up to 1 jiffy but expires randomly because next timer interrupt could come shortly after starting softirq. For commonly used CONFIG_HZ=1000 nothing changes. Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") Reported-by: Dmitry Yakunin Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 36d926d2d5f0..4f32a3251b64 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3575,7 +3575,8 @@ EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; -unsigned int __read_mostly netdev_budget_usecs = 2000; +/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ +unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ -- GitLab From e7358097b643b97651c98ff8d75366710aabcf19 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 14 Apr 2020 15:39:52 -0700 Subject: [PATCH 0991/1278] net: stmmac: dwmac-sunxi: Provide TX and RX fifo sizes [ Upstream commit 806fd188ce2a4f8b587e83e73c478e6484fbfa55 ] After commit bfcb813203e619a8960a819bf533ad2a108d8105 ("net: dsa: configure the MTU for switch ports") my Lamobo R1 platform which uses an allwinner,sun7i-a20-gmac compatible Ethernet MAC started to fail by rejecting a MTU of 1536. The reason for that is that the DMA capabilities are not readable on this version of the IP, and there is also no 'tx-fifo-depth' property being provided in Device Tree. The property is documented as optional, and is not provided. Chen-Yu indicated that the FIFO sizes are 4KB for TX and 16KB for RX, so provide these values through platform data as an immediate fix until various Device Tree sources get updated accordingly. Fixes: eaf4fac47807 ("net: stmmac: Do not accept invalid MTU values") Suggested-by: Chen-Yu Tsai Signed-off-by: Florian Fainelli Acked-by: Chen-Yu Tsai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index fc1fa0f9f338..57694eada995 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -155,6 +155,8 @@ static int sun7i_gmac_probe(struct platform_device *pdev) plat_dat->init = sun7i_gmac_init; plat_dat->exit = sun7i_gmac_exit; plat_dat->fix_mac_speed = sun7i_fix_speed; + plat_dat->tx_fifo_size = 4096; + plat_dat->rx_fifo_size = 16384; ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv); if (ret) -- GitLab From 78ef0452f798f0c798e2db80da9adaa6d9f24edb Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 10 Feb 2020 19:40:48 -0800 Subject: [PATCH 0992/1278] scsi: ufs: Fix ufshcd_hold() caused scheduling while atomic commit c63d6099a7959ecc919b2549dc6b71f53521f819 upstream. The async version of ufshcd_hold(async == true), which is only called in queuecommand path as for now, is expected to work in atomic context, thus it should not sleep or schedule out. When it runs into the condition that clocks are ON but link is still in hibern8 state, it should bail out without flushing the clock ungate work. Fixes: f2a785ac2312 ("scsi: ufshcd: Fix race between clk scaling and ungate work") Link: https://lore.kernel.org/r/1581392451-28743-6-git-send-email-cang@codeaurora.org Reviewed-by: Hongwu Su Reviewed-by: Asutosh Das Reviewed-by: Bean Huo Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c35045324695..6dbdddffa0b3 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1448,6 +1448,11 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) */ if (ufshcd_can_hibern8_during_gating(hba) && ufshcd_is_link_hibern8(hba)) { + if (async) { + rc = -EAGAIN; + hba->clk_gating.active_reqs--; + break; + } spin_unlock_irqrestore(hba->host->host_lock, flags); flush_work(&hba->clk_gating.ungate_work); spin_lock_irqsave(hba->host->host_lock, flags); -- GitLab From 07fcda0a2a10b0a956f0b54f5ff809c345b2c512 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Mon, 17 Feb 2020 19:27:06 +0800 Subject: [PATCH 0993/1278] jbd2: improve comments about freeing data buffers whose page mapping is NULL commit 780f66e59231fcf882f36c63f287252ee47cc75a upstream. Improve comments in jbd2_journal_commit_transaction() to describe why we don't need to clear the buffer_mapped bit for freeing file mapping buffers whose page mapping is NULL. Link: https://lore.kernel.org/r/20200217112706.20085-1-yi.zhang@huawei.com Fixes: c96dceeabf76 ("jbd2: do not clear the BH_Mapped flag when forgetting a metadata buffer") Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/commit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 1a4bd8d9636e..6870103a0f59 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -994,9 +994,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) * journalled data) we need to unmap buffer and clear * more bits. We also need to be careful about the check * because the data page mapping can get cleared under - * out hands, which alse need not to clear more bits - * because the page and buffers will be freed and can - * never be reused once we are done with them. + * our hands. Note that if mapping == NULL, we don't + * need to make buffer unmapped because the page is + * already detached from the mapping and buffers cannot + * get reused. */ mapping = READ_ONCE(bh->b_page->mapping); if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { -- GitLab From 642e97e586f9803c339527fd569fae74d4c5b00f Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Wed, 1 Apr 2020 19:01:06 +0200 Subject: [PATCH 0994/1278] pwm: pca9685: Fix PWM/GPIO inter-operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9cc5f232a4b6a0ef6e9b57876d61b88f61bdd7c2 upstream. This driver allows pwms to be requested as gpios via gpiolib. Obviously, it should not be allowed to request a GPIO when its corresponding PWM is already requested (and vice versa). So it requires some exclusion code. Given that the PWMm and GPIO cores are not synchronized with respect to each other, this exclusion code will also require proper synchronization. Such a mechanism was in place, but was inadvertently removed by Uwe's clean-up in commit e926b12c611c ("pwm: Clear chip_data in pwm_put()"). Upon revisiting the synchronization mechanism, we found that theoretically, it could allow two threads to successfully request conflicting PWMs/GPIOs. Replace with a bitmap which tracks PWMs in-use, plus a mutex. As long as PWM and GPIO's respective request/free functions modify the in-use bitmap while holding the mutex, proper synchronization will be guaranteed. Reported-by: YueHaibing Fixes: e926b12c611c ("pwm: Clear chip_data in pwm_put()") Cc: Mika Westerberg Cc: Uwe Kleine-König Cc: YueHaibing Link: https://lkml.org/lkml/2019/5/31/963 Signed-off-by: Sven Van Asbroeck Reviewed-by: Mika Westerberg [cg: Tested on an i.MX6Q board with two NXP PCA9685 chips] Tested-by: Clemens Gruber Reviewed-by: Sven Van Asbroeck # cg's rebase Link: https://lore.kernel.org/lkml/20200330160238.GD2817345@ulmo/ Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-pca9685.c | 85 ++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index e1e5dfcb16f3..259fd58812ae 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * Because the PCA9685 has only one prescaler per chip, changing the period of @@ -85,6 +86,7 @@ struct pca9685 { #if IS_ENABLED(CONFIG_GPIOLIB) struct mutex lock; struct gpio_chip gpio; + DECLARE_BITMAP(pwms_inuse, PCA9685_MAXCHAN + 1); #endif }; @@ -94,51 +96,51 @@ static inline struct pca9685 *to_pca(struct pwm_chip *chip) } #if IS_ENABLED(CONFIG_GPIOLIB) -static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset) +static bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, int pwm_idx) { - struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; + bool is_inuse; mutex_lock(&pca->lock); - - pwm = &pca->chip.pwms[offset]; - - if (pwm->flags & (PWMF_REQUESTED | PWMF_EXPORTED)) { - mutex_unlock(&pca->lock); - return -EBUSY; + if (pwm_idx >= PCA9685_MAXCHAN) { + /* + * "all LEDs" channel: + * pretend already in use if any of the PWMs are requested + */ + if (!bitmap_empty(pca->pwms_inuse, PCA9685_MAXCHAN)) { + is_inuse = true; + goto out; + } + } else { + /* + * regular channel: + * pretend already in use if the "all LEDs" channel is requested + */ + if (test_bit(PCA9685_MAXCHAN, pca->pwms_inuse)) { + is_inuse = true; + goto out; + } } - - pwm_set_chip_data(pwm, (void *)1); - + is_inuse = test_and_set_bit(pwm_idx, pca->pwms_inuse); +out: mutex_unlock(&pca->lock); - pm_runtime_get_sync(pca->chip.dev); - return 0; + return is_inuse; } -static bool pca9685_pwm_is_gpio(struct pca9685 *pca, struct pwm_device *pwm) +static void pca9685_pwm_clear_inuse(struct pca9685 *pca, int pwm_idx) { - bool is_gpio = false; - mutex_lock(&pca->lock); + clear_bit(pwm_idx, pca->pwms_inuse); + mutex_unlock(&pca->lock); +} - if (pwm->hwpwm >= PCA9685_MAXCHAN) { - unsigned int i; - - /* - * Check if any of the GPIOs are requested and in that case - * prevent using the "all LEDs" channel. - */ - for (i = 0; i < pca->gpio.ngpio; i++) - if (gpiochip_is_requested(&pca->gpio, i)) { - is_gpio = true; - break; - } - } else if (pwm_get_chip_data(pwm)) { - is_gpio = true; - } +static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset) +{ + struct pca9685 *pca = gpiochip_get_data(gpio); - mutex_unlock(&pca->lock); - return is_gpio; + if (pca9685_pwm_test_and_set_inuse(pca, offset)) + return -EBUSY; + pm_runtime_get_sync(pca->chip.dev); + return 0; } static int pca9685_pwm_gpio_get(struct gpio_chip *gpio, unsigned int offset) @@ -173,6 +175,7 @@ static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); + pca9685_pwm_clear_inuse(pca, offset); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, @@ -224,12 +227,17 @@ static int pca9685_pwm_gpio_probe(struct pca9685 *pca) return devm_gpiochip_add_data(dev, &pca->gpio, pca); } #else -static inline bool pca9685_pwm_is_gpio(struct pca9685 *pca, - struct pwm_device *pwm) +static inline bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, + int pwm_idx) { return false; } +static inline void +pca9685_pwm_clear_inuse(struct pca9685 *pca, int pwm_idx) +{ +} + static inline int pca9685_pwm_gpio_probe(struct pca9685 *pca) { return 0; @@ -413,7 +421,7 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) { struct pca9685 *pca = to_pca(chip); - if (pca9685_pwm_is_gpio(pca, pwm)) + if (pca9685_pwm_test_and_set_inuse(pca, pwm->hwpwm)) return -EBUSY; pm_runtime_get_sync(chip->dev); @@ -422,8 +430,11 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) static void pca9685_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { + struct pca9685 *pca = to_pca(chip); + pca9685_pwm_disable(chip, pwm); pm_runtime_put(chip->dev); + pca9685_pwm_clear_inuse(pca, pwm->hwpwm); } static const struct pwm_ops pca9685_pwm_ops = { -- GitLab From abe9688ab02de8b26fbacd95bbcc9477ba754ab2 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 28 Mar 2020 14:54:01 -0700 Subject: [PATCH 0995/1278] ext4: fix incorrect group count in ext4_fill_super error message commit df41460a21b06a76437af040d90ccee03888e8e5 upstream. ext4_fill_super doublechecks the number of groups before mounting; if that check fails, the resulting error message prints the group count from the ext4_sb_info sbi, which hasn't been set yet. Print the freshly computed group count instead (which at that point has just been computed in "blocks_count"). Signed-off-by: Josh Triplett Fixes: 4ec1102813798 ("ext4: Add sanity checks for the superblock before mounting the filesystem") Link: https://lore.kernel.org/r/8b957cd1513fcc4550fe675c10bcce2175c33a49.1585431964.git.josh@joshtriplett.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f5646bcad770..f5b8675f9b55 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4100,9 +4100,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - ext4_msg(sb, KERN_WARNING, "groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " "(block count %llu, first data block %u, " - "blocks per group %lu)", sbi->s_groups_count, + "blocks per group %lu)", blocks_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); -- GitLab From 8eaf44acf197051e20edeb9c7673c626e732c618 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 28 Mar 2020 15:34:15 -0700 Subject: [PATCH 0996/1278] ext4: fix incorrect inodes per group in error message commit b9c538da4e52a7b79dfcf4cfa487c46125066dfb upstream. If ext4_fill_super detects an invalid number of inodes per group, the resulting error message printed the number of blocks per group, rather than the number of inodes per group. Fix it to print the correct value. Fixes: cd6bb35bf7f6d ("ext4: use more strict checks for inodes_per_block on mount") Link: https://lore.kernel.org/r/8be03355983a08e5d4eed480944613454d7e2550.1585434649.git.josh@joshtriplett.org Reviewed-by: Andreas Dilger Signed-off-by: Josh Triplett Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f5b8675f9b55..875032fe75c4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3969,7 +3969,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", - sbi->s_blocks_per_group); + sbi->s_inodes_per_group); goto failed_mount; } sbi->s_itb_per_group = sbi->s_inodes_per_group / -- GitLab From cc578c68afa0a98a4a4664a8a3fa18b6c92ed4ff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Nov 2019 11:36:40 +0000 Subject: [PATCH 0997/1278] ASoC: Intel: mrfld: fix incorrect check on p->sink commit f5e056e1e46fcbb5f74ce560792aeb7d57ce79e6 upstream. The check on p->sink looks bogus, I believe it should be p->source since the following code blocks are related to p->source. Fix this by replacing p->sink with p->source. Fixes: 24c8d14192cc ("ASoC: Intel: mrfld: add DSP core controls") Signed-off-by: Colin Ian King Addresses-Coverity: ("Copy-paste error") Link: https://lore.kernel.org/r/20191119113640.166940-1-colin.king@canonical.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst-atom-controls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst-atom-controls.c b/sound/soc/intel/atom/sst-atom-controls.c index 0f3604b55942..6044b3bbb121 100644 --- a/sound/soc/intel/atom/sst-atom-controls.c +++ b/sound/soc/intel/atom/sst-atom-controls.c @@ -1341,7 +1341,7 @@ int sst_send_pipe_gains(struct snd_soc_dai *dai, int stream, int mute) dai->capture_widget->name); w = dai->capture_widget; snd_soc_dapm_widget_for_each_source_path(w, p) { - if (p->connected && !p->connected(w, p->sink)) + if (p->connected && !p->connected(w, p->source)) continue; if (p->connect && p->source->power && -- GitLab From e5c53e095318810e5c58b3f7dfb8f39a862e34c5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 8 Feb 2020 22:07:20 +0000 Subject: [PATCH 0998/1278] ASoC: Intel: mrfld: return error codes when an error occurs commit 3025571edd9df653e1ad649f0638368a39d1bbb5 upstream. Currently function sst_platform_get_resources always returns zero and error return codes set by the function are never returned. Fix this by returning the error return code in variable ret rather than the hard coded zero. Addresses-Coverity: ("Unused value") Fixes: f533a035e4da ("ASoC: Intel: mrfld - create separate module for pci part") Signed-off-by: Colin Ian King Acked-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200208220720.36657-1-colin.king@canonical.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst/sst_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst/sst_pci.c b/sound/soc/intel/atom/sst/sst_pci.c index 6906ee624cf6..438c7bcd8c4c 100644 --- a/sound/soc/intel/atom/sst/sst_pci.c +++ b/sound/soc/intel/atom/sst/sst_pci.c @@ -107,7 +107,7 @@ static int sst_platform_get_resources(struct intel_sst_drv *ctx) dev_dbg(ctx->dev, "DRAM Ptr %p\n", ctx->dram); do_release_regions: pci_release_regions(pci); - return 0; + return ret; } /* -- GitLab From 47019436aecd6641cf6bc47277e11234180217f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 12 Apr 2020 10:13:29 +0200 Subject: [PATCH 0999/1278] ALSA: usb-audio: Don't override ignore_ctl_error value from the map commit 3507245b82b4362dc9721cbc328644905a3efa22 upstream. The mapping table may contain also ignore_ctl_error flag for devices that are known to behave wild. Since this flag always writes the card's own ignore_ctl_error flag, it overrides the value already set by the module option, so it doesn't follow user's expectation. Let's fix the code not to clear the flag that has been set by user. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206873 Cc: Link: https://lore.kernel.org/r/20200412081331.4742-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index e6e4c3b9d9d3..7b75208d5cea 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2342,7 +2342,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer) if (map->id == state.chip->usb_id) { state.map = map->map; state.selector_map = map->selector_map; - mixer->ignore_ctl_error = map->ignore_ctl_error; + mixer->ignore_ctl_error |= map->ignore_ctl_error; break; } } -- GitLab From 7054f86f268c0d9d62b52a4497dd0e8c10a7e5c7 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Tue, 14 Apr 2020 09:51:45 +0800 Subject: [PATCH 1000/1278] tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation commit 0bbe7f719985efd9adb3454679ecef0984cb6800 upstream. Traced event can trigger 'snapshot' operation(i.e. calls snapshot_trigger() or snapshot_count_trigger()) when register_snapshot_trigger() has completed registration but doesn't allocate buffer for 'snapshot' event trigger. In the rare case, 'snapshot' operation always detects the lack of allocated buffer so make register_snapshot_trigger() allocate buffer first. trigger-snapshot.tc in kselftest reproduces the issue on slow vm: ----------------------------------------------------------- cat trace ... ftracetest-3028 [002] .... 236.784290: sched_process_fork: comm=ftracetest pid=3028 child_comm=ftracetest child_pid=3036 <...>-2875 [003] .... 240.460335: tracing_snapshot_instance_cond: *** SNAPSHOT NOT ALLOCATED *** <...>-2875 [003] .... 240.460338: tracing_snapshot_instance_cond: *** stopping trace here! *** ----------------------------------------------------------- Link: http://lkml.kernel.org/r/20200414015145.66236-1-yangx.jy@cn.fujitsu.com Cc: stable@vger.kernel.org Fixes: 93e31ffbf417a ("tracing: Add 'snapshot' event trigger command") Signed-off-by: Xiao Yang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 31e91efe243e..6fb5eb7b57dc 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1075,14 +1075,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { - int ret = register_trigger(glob, ops, data, file); - - if (ret > 0 && tracing_alloc_snapshot_instance(file->tr) != 0) { - unregister_trigger(glob, ops, data, file); - ret = 0; - } + if (tracing_alloc_snapshot_instance(file->tr) != 0) + return 0; - return ret; + return register_trigger(glob, ops, data, file); } static int -- GitLab From 1d518b486efe67998bfc1764a1840aa2728e2d7c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 2 Apr 2020 15:51:18 -0400 Subject: [PATCH 1001/1278] btrfs: check commit root generation in should_ignore_root commit 4d4225fc228e46948486d8b8207955f0c031b92e upstream. Previously we would set the reloc root's last snapshot to transid - 1. However there was a problem with doing this, and we changed it to setting the last snapshot to the generation of the commit node of the fs root. This however broke should_ignore_root(). The assumption is that if we are in a generation newer than when the reloc root was created, then we would find the reloc root through normal backref lookups, and thus can ignore any fs roots we find with an old enough reloc root. Now that the last snapshot could be considerably further in the past than before, we'd end up incorrectly ignoring an fs root. Thus we'd find no nodes for the bytenr we were searching for, and we'd fail to relocate anything. We'd loop through the relocate code again and see that there were still used space in that block group, attempt to relocate those bytenr's again, fail in the same way, and just loop like this forever. This is tricky in that we have to not modify the fs root at all during this time, so we need to have a block group that has data in this fs root that is not shared by any other root, which is why this has been difficult to reproduce. Fixes: 054570a1dc94 ("Btrfs: fix relocation incorrectly dropping data references") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f5f3408b32ea..f4397dd19583 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -538,8 +538,8 @@ static int should_ignore_root(struct btrfs_root *root) if (!reloc_root) return 0; - if (btrfs_root_last_snapshot(&reloc_root->root_item) == - root->fs_info->running_transaction->transid - 1) + if (btrfs_header_generation(reloc_root->commit_root) == + root->fs_info->running_transaction->transid) return 0; /* * if there is reloc tree and it was created in previous -- GitLab From 33e7e10efecd53361df55f7a56b07c79a3e933f5 Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Fri, 10 Apr 2020 15:32:57 +0300 Subject: [PATCH 1002/1278] mac80211_hwsim: Use kstrndup() in place of kasprintf() commit 7ea862048317aa76d0f22334202779a25530980c upstream. syzbot reports a warning: precision 33020 too large WARNING: CPU: 0 PID: 9618 at lib/vsprintf.c:2471 set_precision+0x150/0x180 lib/vsprintf.c:2471 vsnprintf+0xa7b/0x19a0 lib/vsprintf.c:2547 kvasprintf+0xb2/0x170 lib/kasprintf.c:22 kasprintf+0xbb/0xf0 lib/kasprintf.c:59 hwsim_del_radio_nl+0x63a/0x7e0 drivers/net/wireless/mac80211_hwsim.c:3625 genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] ... entry_SYSCALL_64_after_hwframe+0x49/0xbe Thus it seems that kasprintf() with "%.*s" format can not be used for duplicating a string with arbitrary length. Replace it with kstrndup(). Note that later this string is limited to NL80211_WIPHY_NAME_MAXLEN == 64, but the code is simpler this way. Reported-by: syzbot+6693adf1698864d21734@syzkaller.appspotmail.com Reported-by: syzbot+a4aee3f42d7584d76761@syzkaller.appspotmail.com Cc: stable@kernel.org Signed-off-by: Tuomas Tynkkynen Link: https://lore.kernel.org/r/20200410123257.14559-1-tuomas.tynkkynen@iki.fi [johannes: add note about length limit] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 27224dc26413..a8ec5b2c5abb 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3134,9 +3134,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) param.no_vif = true; if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { - hwname = kasprintf(GFP_KERNEL, "%.*s", - nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), - (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + hwname = kstrndup((char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]), + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + GFP_KERNEL); if (!hwname) return -ENOMEM; param.hwname = hwname; @@ -3175,9 +3175,9 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_RADIO_ID]) { idx = nla_get_u32(info->attrs[HWSIM_ATTR_RADIO_ID]); } else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { - hwname = kasprintf(GFP_KERNEL, "%.*s", - nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), - (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + hwname = kstrndup((char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]), + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + GFP_KERNEL); if (!hwname) return -ENOMEM; } else -- GitLab From e4598037e0d31676c98a9d91062c2e37fc2905e0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 31 Mar 2020 12:50:16 +0200 Subject: [PATCH 1003/1278] ext4: do not zeroout extents beyond i_disksize commit 801674f34ecfed033b062a0f217506b93c8d5e8a upstream. We do not want to create initialized extents beyond end of file because for e2fsck it is impossible to distinguish them from a case of corrupted file size / extent tree and so it complains like: Inode 12, i_size is 147456, should be 163840. Fix? no Code in ext4_ext_convert_to_initialized() and ext4_split_convert_extents() try to make sure it does not create initialized extents beyond inode size however they check against inode->i_size which is wrong. They should instead check against EXT4_I(inode)->i_disksize which is the current inode size on disk. That's what e2fsck is going to see in case of crash before all dirty data is written. This bug manifests as generic/456 test failure (with recent enough fstests where fsx got fixed to properly pass FALLOC_KEEP_SIZE_FL flags to the kernel) when run with dioread_lock mount option. CC: stable@vger.kernel.org Fixes: 21ca087a3891 ("ext4: Do not zero out uninitialized extents beyond i_size") Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20200331105016.8674-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 20d68554680f..fa6ae9014e8f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3446,8 +3446,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; @@ -3702,8 +3702,8 @@ static int ext4_split_convert_extents(handle_t *handle, __func__, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* -- GitLab From 02677f42036a8728d8e6dcaf633e04976c7ce51f Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Sun, 3 Dec 2017 21:14:12 -0600 Subject: [PATCH 1004/1278] dm flakey: check for null arg_name in parse_features() [ Upstream commit 7690e25302dc7d0cd42b349e746fe44b44a94f2b ] One can crash dm-flakey by specifying more feature arguments than the number of features supplied. Checking for null in arg_name avoids this. dmsetup create flakey-test --table "0 66076080 flakey /dev/sdb9 0 0 180 2 drop_writes" Signed-off-by: Goldwyn Rodrigues Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-flakey.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b1b68e01b889..53cd31199f21 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -70,6 +70,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, arg_name = dm_shift_arg(as); argc--; + if (!arg_name) { + ti->error = "Insufficient feature arguments"; + return -EINVAL; + } + /* * drop_writes */ -- GitLab From b9e0151c008a6970f3302571debe5b86043c8630 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 13 Dec 2019 16:15:15 -0800 Subject: [PATCH 1005/1278] kvm: x86: Host feature SSBD doesn't imply guest feature SPEC_CTRL_SSBD commit 396d2e878f92ec108e4293f1c77ea3bc90b414ff upstream. The host reports support for the synthetic feature X86_FEATURE_SSBD when any of the three following hardware features are set: CPUID.(EAX=7,ECX=0):EDX.SSBD[bit 31] CPUID.80000008H:EBX.AMD_SSBD[bit 24] CPUID.80000008H:EBX.VIRT_SSBD[bit 25] Either of the first two hardware features implies the existence of the IA32_SPEC_CTRL MSR, but CPUID.80000008H:EBX.VIRT_SSBD[bit 25] does not. Therefore, CPUID.(EAX=7,ECX=0):EDX.SSBD[bit 31] should only be set in the guest if CPUID.(EAX=7,ECX=0):EDX.SSBD[bit 31] or CPUID.80000008H:EBX.AMD_SSBD[bit 24] is set on the host. Fixes: 0c54914d0c52a ("KVM: x86: use Intel speculation bugs and features as derived in generic x86 code") Signed-off-by: Jim Mattson Reviewed-by: Jacob Xu Reviewed-by: Peter Shier Cc: Paolo Bonzini Reported-by: Eric Biggers Signed-off-by: Paolo Bonzini [bwh: Backported to 4.x: adjust indentation] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1152afad524f..6ec1cfd0addd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -494,7 +494,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->edx |= F(SPEC_CTRL); if (boot_cpu_has(X86_FEATURE_STIBP)) entry->edx |= F(INTEL_STIBP); - if (boot_cpu_has(X86_FEATURE_SSBD)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->edx |= F(SPEC_CTRL_SSBD); /* * We emulate ARCH_CAPABILITIES in software even -- GitLab From 1140ef72c9fdd08715cba3119e380c55cc6c1b86 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 13 Mar 2020 18:06:54 +0100 Subject: [PATCH 1006/1278] scsi: target: remove boilerplate code [ Upstream commit e49a7d994379278d3353d7ffc7994672752fb0ad ] iscsit_free_session() is equivalent to iscsit_stop_session() followed by a call to iscsit_close_session(). Link: https://lore.kernel.org/r/20200313170656.9716-2-mlombard@redhat.com Tested-by: Rahul Kundu Signed-off-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target.c | 46 ++--------------------------- drivers/target/iscsi/iscsi_target.h | 1 - 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index fb7bd422e2e1..40993c575017 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4583,49 +4583,6 @@ void iscsit_fail_session(struct iscsi_session *sess) sess->session_state = TARG_SESS_STATE_FAILED; } -int iscsit_free_session(struct iscsi_session *sess) -{ - u16 conn_count = atomic_read(&sess->nconn); - struct iscsi_conn *conn, *conn_tmp = NULL; - int is_last; - - spin_lock_bh(&sess->conn_lock); - atomic_set(&sess->sleep_on_sess_wait_comp, 1); - - list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, - conn_list) { - if (conn_count == 0) - break; - - if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { - is_last = 1; - } else { - iscsit_inc_conn_usage_count(conn_tmp); - is_last = 0; - } - iscsit_inc_conn_usage_count(conn); - - spin_unlock_bh(&sess->conn_lock); - iscsit_cause_connection_reinstatement(conn, 1); - spin_lock_bh(&sess->conn_lock); - - iscsit_dec_conn_usage_count(conn); - if (is_last == 0) - iscsit_dec_conn_usage_count(conn_tmp); - - conn_count--; - } - - if (atomic_read(&sess->nconn)) { - spin_unlock_bh(&sess->conn_lock); - wait_for_completion(&sess->session_wait_comp); - } else - spin_unlock_bh(&sess->conn_lock); - - iscsit_close_session(sess); - return 0; -} - void iscsit_stop_session( struct iscsi_session *sess, int session_sleep, @@ -4710,7 +4667,8 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) list_for_each_entry_safe(se_sess, se_sess_tmp, &free_list, sess_list) { sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; - iscsit_free_session(sess); + iscsit_stop_session(sess, 1, 1); + iscsit_close_session(sess); session_count++; } diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 42de1843aa40..f0d2cbf594c9 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -43,7 +43,6 @@ extern int iscsi_target_rx_thread(void *); extern int iscsit_close_connection(struct iscsi_conn *); extern int iscsit_close_session(struct iscsi_session *); extern void iscsit_fail_session(struct iscsi_session *); -extern int iscsit_free_session(struct iscsi_session *); extern void iscsit_stop_session(struct iscsi_session *, int, int); extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int); -- GitLab From 4d7e37a4040e1189e721e805180e093848510b09 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 13 Mar 2020 18:06:55 +0100 Subject: [PATCH 1007/1278] scsi: target: fix hang when multiple threads try to destroy the same iscsi session [ Upstream commit 57c46e9f33da530a2485fa01aa27b6d18c28c796 ] A number of hangs have been reported against the target driver; they are due to the fact that multiple threads may try to destroy the iscsi session at the same time. This may be reproduced for example when a "targetcli iscsi/iqn.../tpg1 disable" command is executed while a logout operation is underway. When this happens, two or more threads may end up sleeping and waiting for iscsit_close_connection() to execute "complete(session_wait_comp)". Only one of the threads will wake up and proceed to destroy the session structure, the remaining threads will hang forever. Note that if the blocked threads are somehow forced to wake up with complete_all(), they will try to free the same iscsi session structure destroyed by the first thread, causing double frees, memory corruptions etc... With this patch, the threads that want to destroy the iscsi session will increase the session refcount and will set the "session_close" flag to 1; then they wait for the driver to close the remaining active connections. When the last connection is closed, iscsit_close_connection() will wake up all the threads and will wait for the session's refcount to reach zero; when this happens, iscsit_close_connection() will destroy the session structure because no one is referencing it anymore. INFO: task targetcli:5971 blocked for more than 120 seconds. Tainted: P OE 4.15.0-72-generic #81~16.04.1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. targetcli D 0 5971 1 0x00000080 Call Trace: __schedule+0x3d6/0x8b0 ? vprintk_func+0x44/0xe0 schedule+0x36/0x80 schedule_timeout+0x1db/0x370 ? __dynamic_pr_debug+0x8a/0xb0 wait_for_completion+0xb4/0x140 ? wake_up_q+0x70/0x70 iscsit_free_session+0x13d/0x1a0 [iscsi_target_mod] iscsit_release_sessions_for_tpg+0x16b/0x1e0 [iscsi_target_mod] iscsit_tpg_disable_portal_group+0xca/0x1c0 [iscsi_target_mod] lio_target_tpg_enable_store+0x66/0xe0 [iscsi_target_mod] configfs_write_file+0xb9/0x120 __vfs_write+0x1b/0x40 vfs_write+0xb8/0x1b0 SyS_write+0x5c/0xe0 do_syscall_64+0x73/0x130 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Link: https://lore.kernel.org/r/20200313170656.9716-3-mlombard@redhat.com Reported-by: Matt Coleman Tested-by: Matt Coleman Tested-by: Rahul Kundu Signed-off-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target.c | 35 ++++++++++++-------- drivers/target/iscsi/iscsi_target_configfs.c | 5 ++- drivers/target/iscsi/iscsi_target_login.c | 5 +-- include/target/iscsi/iscsi_target_core.h | 2 +- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 40993c575017..ee49b227dc12 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4314,30 +4314,37 @@ int iscsit_close_connection( if (!atomic_read(&sess->session_reinstatement) && atomic_read(&sess->session_fall_back_to_erl0)) { spin_unlock_bh(&sess->conn_lock); + complete_all(&sess->session_wait_comp); iscsit_close_session(sess); return 0; } else if (atomic_read(&sess->session_logout)) { pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); sess->session_state = TARG_SESS_STATE_FREE; - spin_unlock_bh(&sess->conn_lock); - if (atomic_read(&sess->sleep_on_sess_wait_comp)) - complete(&sess->session_wait_comp); + if (atomic_read(&sess->session_close)) { + spin_unlock_bh(&sess->conn_lock); + complete_all(&sess->session_wait_comp); + iscsit_close_session(sess); + } else { + spin_unlock_bh(&sess->conn_lock); + } return 0; } else { pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); sess->session_state = TARG_SESS_STATE_FAILED; - if (!atomic_read(&sess->session_continuation)) { - spin_unlock_bh(&sess->conn_lock); + if (!atomic_read(&sess->session_continuation)) iscsit_start_time2retain_handler(sess); - } else - spin_unlock_bh(&sess->conn_lock); - if (atomic_read(&sess->sleep_on_sess_wait_comp)) - complete(&sess->session_wait_comp); + if (atomic_read(&sess->session_close)) { + spin_unlock_bh(&sess->conn_lock); + complete_all(&sess->session_wait_comp); + iscsit_close_session(sess); + } else { + spin_unlock_bh(&sess->conn_lock); + } return 0; } @@ -4446,9 +4453,9 @@ static void iscsit_logout_post_handler_closesession( complete(&conn->conn_logout_comp); iscsit_dec_conn_usage_count(conn); + atomic_set(&sess->session_close, 1); iscsit_stop_session(sess, sleep, sleep); iscsit_dec_session_usage_count(sess); - iscsit_close_session(sess); } static void iscsit_logout_post_handler_samecid( @@ -4593,8 +4600,6 @@ void iscsit_stop_session( int is_last; spin_lock_bh(&sess->conn_lock); - if (session_sleep) - atomic_set(&sess->sleep_on_sess_wait_comp, 1); if (connection_sleep) { list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, @@ -4652,12 +4657,15 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || + atomic_read(&sess->session_close) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); continue; } + iscsit_inc_session_usage_count(sess); atomic_set(&sess->session_reinstatement, 1); atomic_set(&sess->session_fall_back_to_erl0, 1); + atomic_set(&sess->session_close, 1); spin_unlock(&sess->conn_lock); list_move_tail(&se_sess->sess_list, &free_list); @@ -4667,8 +4675,9 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) list_for_each_entry_safe(se_sess, se_sess_tmp, &free_list, sess_list) { sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + list_del_init(&se_sess->sess_list); iscsit_stop_session(sess, 1, 1); - iscsit_close_session(sess); + iscsit_dec_session_usage_count(sess); session_count++; } diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 0ebc4818e132..4191e4a8a9ed 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1503,20 +1503,23 @@ static void lio_tpg_close_session(struct se_session *se_sess) spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || + atomic_read(&sess->session_close) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); spin_unlock_bh(&se_tpg->session_lock); return; } + iscsit_inc_session_usage_count(sess); atomic_set(&sess->session_reinstatement, 1); atomic_set(&sess->session_fall_back_to_erl0, 1); + atomic_set(&sess->session_close, 1); spin_unlock(&sess->conn_lock); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); iscsit_stop_session(sess, 1, 1); - iscsit_close_session(sess); + iscsit_dec_session_usage_count(sess); } static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 27893d90c4ef..55df6f99e669 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -199,6 +199,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) spin_lock(&sess_p->conn_lock); if (atomic_read(&sess_p->session_fall_back_to_erl0) || atomic_read(&sess_p->session_logout) || + atomic_read(&sess_p->session_close) || (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess_p->conn_lock); continue; @@ -209,6 +210,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) (sess_p->sess_ops->SessionType == sessiontype))) { atomic_set(&sess_p->session_reinstatement, 1); atomic_set(&sess_p->session_fall_back_to_erl0, 1); + atomic_set(&sess_p->session_close, 1); spin_unlock(&sess_p->conn_lock); iscsit_inc_session_usage_count(sess_p); iscsit_stop_time2retain_timer(sess_p); @@ -233,7 +235,6 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) if (sess->session_state == TARG_SESS_STATE_FAILED) { spin_unlock_bh(&sess->conn_lock); iscsit_dec_session_usage_count(sess); - iscsit_close_session(sess); return 0; } spin_unlock_bh(&sess->conn_lock); @@ -241,7 +242,6 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) iscsit_stop_session(sess, 1, 1); iscsit_dec_session_usage_count(sess); - iscsit_close_session(sess); return 0; } @@ -534,6 +534,7 @@ static int iscsi_login_non_zero_tsih_s2( sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr; if (atomic_read(&sess_p->session_fall_back_to_erl0) || atomic_read(&sess_p->session_logout) || + atomic_read(&sess_p->session_close) || (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) continue; if (!memcmp(sess_p->isid, pdu->isid, 6) && diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index cf5f3fff1f1a..fd7e4d1df9a1 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -673,7 +673,7 @@ struct iscsi_session { atomic_t session_logout; atomic_t session_reinstatement; atomic_t session_stop_active; - atomic_t sleep_on_sess_wait_comp; + atomic_t session_close; /* connection list */ struct list_head sess_conn_list; struct list_head cr_active_list; -- GitLab From fdb1ccf027ecf6a10b8704ec1e546cff0b10a57d Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 9 Apr 2020 10:34:29 -0500 Subject: [PATCH 1008/1278] x86/microcode/AMD: Increase microcode PATCH_MAX_SIZE commit bdf89df3c54518eed879d8fac7577fcfb220c67e upstream. Future AMD CPUs will have microcode patches that exceed the default 4K patch size. Raise our limit. Signed-off-by: John Allen Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v4.14.. Link: https://lkml.kernel.org/r/20200409152931.GA685273@mojo.amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/microcode_amd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566..5c524d4f71cd 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); -- GitLab From 3756b8b746c5d63c6fec0bb2349508d0fa0f3707 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:21 -0800 Subject: [PATCH 1009/1278] x86/intel_rdt: Enumerate L2 Code and Data Prioritization (CDP) feature commit a511e7935378ef1f321456a90beae2a2632d3d83 upstream. L2 Code and Data Prioritization (CDP) is enumerated in CPUID(EAX=0x10, ECX=0x2):ECX.bit2 Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-4-git-send-email-fenghua.yu@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/scattered.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b4bef819d5d5..157cfaf1064c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -205,7 +205,7 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ - +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index ed7ce5184a77..0b9c7150cb23 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, + { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, -- GitLab From 73e3d7823560741d3473a62c7b1c150c4641f8a4 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:22 -0800 Subject: [PATCH 1010/1278] x86/intel_rdt: Add two new resources for L2 Code and Data Prioritization (CDP) commit def10853930a82456ab862a3a8292a3a16c386e7 upstream. L2 data and L2 code are added as new resources in rdt_resources_all[] and data in the resources are configured. When L2 CDP is enabled, the schemata will have the two resources in this format: L2DATA:l2id0=xxxx;l2id1=xxxx;.... L2CODE:l2id0=xxxx;l2id1=xxxx;.... xxxx represent CBM (Cache Bit Mask) values in the schemata, similar to all others (L2 CAT/L3 CAT/L3 CDP). Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-5-git-send-email-fenghua.yu@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt.c | 66 ++++++++++++++++++++++++++++----- arch/x86/kernel/cpu/intel_rdt.h | 2 + 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 3f731d7f04bf..5d81ff1028af 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = { .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L2DATA] = + { + .rid = RDT_RESOURCE_L2DATA, + .name = "L2DATA", + .domains = domain_init(RDT_RESOURCE_L2DATA), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 0, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, + [RDT_RESOURCE_L2CODE] = + { + .rid = RDT_RESOURCE_L2CODE, + .name = "L2CODE", + .domains = domain_init(RDT_RESOURCE_L2CODE), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 1, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, [RDT_RESOURCE_MBA] = { .rid = RDT_RESOURCE_MBA, @@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->alloc_enabled = true; } -static void rdt_get_cdp_l3_config(int type) +static void rdt_get_cdp_config(int level, int type) { - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_l = &rdt_resources_all[level]; struct rdt_resource *r = &rdt_resources_all[type]; - r->num_closid = r_l3->num_closid / 2; - r->cache.cbm_len = r_l3->cache.cbm_len; - r->default_ctrl = r_l3->default_ctrl; - r->cache.shareable_bits = r_l3->cache.shareable_bits; + r->num_closid = r_l->num_closid / 2; + r->cache.cbm_len = r_l->cache.cbm_len; + r->default_ctrl = r_l->default_ctrl; + r->cache.shareable_bits = r_l->cache.shareable_bits; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; /* @@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type) r->alloc_enabled = false; } +static void rdt_get_cdp_l3_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA); + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE); +} + +static void rdt_get_cdp_l2_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA); + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); +} + static int get_cache_id(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); @@ -729,15 +775,15 @@ static __init bool get_rdt_alloc_resources(void) if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); - if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { - rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); - rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); - } + if (rdt_cpu_has(X86_FEATURE_CDP_L3)) + rdt_get_cdp_l3_config(); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); + if (rdt_cpu_has(X86_FEATURE_CDP_L2)) + rdt_get_cdp_l2_config(); ret = true; } diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index a43a72d8e88e..854f3f5f995b 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -354,6 +354,8 @@ enum { RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, + RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE, RDT_RESOURCE_MBA, /* Must be the last */ -- GitLab From d03e74f0c68d89731daba72124d0201848a2e6f8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:23 -0800 Subject: [PATCH 1011/1278] x86/intel_rdt: Enable L2 CDP in MSR IA32_L2_QOS_CFG commit 99adde9b370de8e07ef76630c6f60dbf586cdf0e upstream. Bit 0 in MSR IA32_L2_QOS_CFG (0xc82) is L2 CDP enable bit. By default, the bit is zero, i.e. L2 CAT is enabled, and L2 CDP is disabled. When the resctrl mount parameter "cdpl2" is given, the bit is set to 1 and L2 CDP is enabled. In L2 CDP mode, the L2 CAT mask MSRs are re-mapped into interleaved pairs of mask MSRs for code (referenced by an odd CLOSID) and data (referenced by an even CLOSID). Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-6-git-send-email-fenghua.yu@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt.h | 3 + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 117 ++++++++++++++++++----- 2 files changed, 94 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 854f3f5f995b..1b4b51e146e1 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -7,12 +7,15 @@ #include #define IA32_L3_QOS_CFG 0xc81 +#define IA32_L2_QOS_CFG 0xc82 #define IA32_L3_CBM_BASE 0xc90 #define IA32_L2_CBM_BASE 0xd10 #define IA32_MBA_THRTL_BASE 0xd50 #define L3_QOS_CDP_ENABLE 0x01ULL +#define L2_QOS_CDP_ENABLE 0x01ULL + /* * Event IDs are used to program IA32_QM_EVTSEL before reading event * counter from IA32_QM_CTR diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 0ec30b2384c0..f6275c84e740 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -922,6 +922,7 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, kernfs_remove(kn); return ret; } + static void l3_qos_cfg_update(void *arg) { bool *enable = arg; @@ -929,8 +930,17 @@ static void l3_qos_cfg_update(void *arg) wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); } -static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) +static void l2_qos_cfg_update(void *arg) { + bool *enable = arg; + + wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); +} + +static int set_cache_qos_cfg(int level, bool enable) +{ + void (*update)(void *arg); + struct rdt_resource *r_l; cpumask_var_t cpu_mask; struct rdt_domain *d; int cpu; @@ -938,16 +948,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - list_for_each_entry(d, &r->domains, list) { + if (level == RDT_RESOURCE_L3) + update = l3_qos_cfg_update; + else if (level == RDT_RESOURCE_L2) + update = l2_qos_cfg_update; + else + return -EINVAL; + + r_l = &rdt_resources_all[level]; + list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ if (cpumask_test_cpu(cpu, cpu_mask)) - l3_qos_cfg_update(&enable); + update(&enable); /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ - smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); + smp_call_function_many(cpu_mask, update, &enable, 1); put_cpu(); free_cpumask_var(cpu_mask); @@ -955,52 +973,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) return 0; } -static int cdp_enable(void) +static int cdp_enable(int level, int data_type, int code_type) { - struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; + struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; + struct rdt_resource *r_l = &rdt_resources_all[level]; int ret; - if (!r_l3->alloc_capable || !r_l3data->alloc_capable || - !r_l3code->alloc_capable) + if (!r_l->alloc_capable || !r_ldata->alloc_capable || + !r_lcode->alloc_capable) return -EINVAL; - ret = set_l3_qos_cfg(r_l3, true); + ret = set_cache_qos_cfg(level, true); if (!ret) { - r_l3->alloc_enabled = false; - r_l3data->alloc_enabled = true; - r_l3code->alloc_enabled = true; + r_l->alloc_enabled = false; + r_ldata->alloc_enabled = true; + r_lcode->alloc_enabled = true; } return ret; } -static void cdp_disable(void) +static int cdpl3_enable(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE); +} + +static int cdpl2_enable(void) +{ + return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE); +} + +static void cdp_disable(int level, int data_type, int code_type) +{ + struct rdt_resource *r = &rdt_resources_all[level]; r->alloc_enabled = r->alloc_capable; - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) { - rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false; - rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false; - set_l3_qos_cfg(r, false); + if (rdt_resources_all[data_type].alloc_enabled) { + rdt_resources_all[data_type].alloc_enabled = false; + rdt_resources_all[code_type].alloc_enabled = false; + set_cache_qos_cfg(level, false); } } +static void cdpl3_disable(void) +{ + cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); +} + +static void cdpl2_disable(void) +{ + cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); +} + +static void cdp_disable_all(void) +{ + if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + cdpl3_disable(); + if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + cdpl2_disable(); +} + static int parse_rdtgroupfs_options(char *data) { char *token, *o = data; int ret = 0; while ((token = strsep(&o, ",")) != NULL) { - if (!*token) - return -EINVAL; + if (!*token) { + ret = -EINVAL; + goto out; + } - if (!strcmp(token, "cdp")) - ret = cdp_enable(); + if (!strcmp(token, "cdp")) { + ret = cdpl3_enable(); + if (ret) + goto out; + } else if (!strcmp(token, "cdpl2")) { + ret = cdpl2_enable(); + if (ret) + goto out; + } else { + ret = -EINVAL; + goto out; + } } + return 0; + +out: + pr_err("Invalid mount option \"%s\"\n", token); + return ret; } @@ -1155,7 +1220,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, out_info: kernfs_remove(kn_info); out_cdp: - cdp_disable(); + cdp_disable_all(); out: mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); @@ -1322,7 +1387,7 @@ static void rdt_kill_sb(struct super_block *sb) /*Put everything back to default values. */ for_each_alloc_enabled_rdt_resource(r) reset_all_ctrls(r); - cdp_disable(); + cdp_disable_all(); rmdir_all_sub(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); -- GitLab From 5246c08f70b4d721a48b9a98417cb178fa0531cf Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 21 Feb 2020 16:21:05 +0000 Subject: [PATCH 1012/1278] x86/resctrl: Preserve CDP enable over CPU hotplug commit 9fe0450785abbc04b0ed5d3cf61fcdb8ab656b4b upstream. Resctrl assumes that all CPUs are online when the filesystem is mounted, and that CPUs remember their CDP-enabled state over CPU hotplug. This goes wrong when resctrl's CDP-enabled state changes while all the CPUs in a domain are offline. When a domain comes online, enable (or disable!) CDP to match resctrl's current setting. Fixes: 5ff193fbde20 ("x86/intel_rdt: Add basic resctrl filesystem support") Suggested-by: Reinette Chatre Signed-off-by: James Morse Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200221162105.154163-1-james.morse@arm.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt.c | 2 ++ arch/x86/kernel/cpu/intel_rdt.h | 1 + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 13 +++++++++++++ 3 files changed, 16 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 5d81ff1028af..07742b69d914 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -532,6 +532,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 1b4b51e146e1..b43a786ec15f 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -442,5 +442,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_INTEL_RDT_H */ diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index f6275c84e740..587ec7da09aa 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1757,6 +1757,19 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, return ret; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * We allow creating mon groups only with in a directory called "mon_groups" * which is present in every ctrl_mon group. Check if this is a valid -- GitLab From 4de034e30d1e14ede9a3fb3737d756c0a8a0697b Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 17 Mar 2020 09:26:45 -0700 Subject: [PATCH 1013/1278] x86/resctrl: Fix invalid attempt at removing the default resource group commit b0151da52a6d4f3951ea24c083e7a95977621436 upstream. The default resource group ("rdtgroup_default") is associated with the root of the resctrl filesystem and should never be removed. New resource groups can be created as subdirectories of the resctrl filesystem and they can be removed from user space. There exists a safeguard in the directory removal code (rdtgroup_rmdir()) that ensures that only subdirectories can be removed by testing that the directory to be removed has to be a child of the root directory. A possible deadlock was recently fixed with 334b0f4e9b1b ("x86/resctrl: Fix a deadlock due to inaccurate reference"). This fix involved associating the private data of the "mon_groups" and "mon_data" directories to the resource group to which they belong instead of NULL as before. A consequence of this change was that the original safeguard code preventing removal of "mon_groups" and "mon_data" found in the root directory failed resulting in attempts to remove the default resource group that ends in a BUG: kernel BUG at mm/slub.c:3969! invalid opcode: 0000 [#1] SMP PTI Call Trace: rdtgroup_rmdir+0x16b/0x2c0 kernfs_iop_rmdir+0x5c/0x90 vfs_rmdir+0x7a/0x160 do_rmdir+0x17d/0x1e0 do_syscall_64+0x55/0x1d0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by improving the directory removal safeguard to ensure that subdirectories of the resctrl root directory can only be removed if they are a child of the resctrl filesystem's root _and_ not associated with the default resource group. Fixes: 334b0f4e9b1b ("x86/resctrl: Fix a deadlock due to inaccurate reference") Reported-by: Sai Praneeth Prakhya Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Tested-by: Sai Praneeth Prakhya Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/884cbe1773496b5dbec1b6bd11bb50cffa83603d.1584461853.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 587ec7da09aa..60c63b23e3ba 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1918,7 +1918,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); else if (rdtgrp->type == RDTMON_GROUP && is_mon_groups(parent_kn, kn->name)) -- GitLab From a9ef63657e45a2a243ac1cc788e5710adb39c798 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Mon, 23 Sep 2019 15:36:42 -0700 Subject: [PATCH 1014/1278] mm/vmalloc.c: move 'area->pages' after if statement commit 7ea362427c170061b8822dd41bafaa72b3bcb9ad upstream. If !area->pages statement is true where memory allocation fails, area is freed. In this case 'area->pages = pages' should not executed. So move 'area->pages = pages' after if statement. [akpm@linux-foundation.org: give area->pages the same treatment] Link: http://lkml.kernel.org/r/20190830035716.GA190684@LGEARND20B15 Signed-off-by: Austin Kim Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Uladzislau Rezki (Sony) Cc: Roman Gushchin Cc: Roman Penyaev Cc: Rick Edgecombe Cc: Mike Rapoport Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d00961ba0c42..88091fd704f4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1682,7 +1682,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); - area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, @@ -1690,13 +1689,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } else { pages = kmalloc_node(array_size, nested_gfp, node); } - area->pages = pages; - if (!area->pages) { + + if (!pages) { remove_vm_area(area->addr); kfree(area); return NULL; } + area->pages = pages; + area->nr_pages = nr_pages; + for (i = 0; i < area->nr_pages; i++) { struct page *page; -- GitLab From 3365079c22ef4305044bd6d9d35808e6a8d8ba3f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Apr 2020 13:23:28 -0500 Subject: [PATCH 1015/1278] objtool: Fix switch table detection in .text.unlikely commit b401efc120a399dfda1f4d2858a4de365c9b08ef upstream. If a switch jump table's indirect branch is in a ".cold" subfunction in .text.unlikely, objtool doesn't detect it, and instead prints a false warning: drivers/media/v4l2-core/v4l2-ioctl.o: warning: objtool: v4l_print_format.cold()+0xd6: sibling call from callable instruction with modified stack frame drivers/hwmon/max6650.o: warning: objtool: max6650_probe.cold()+0xa5: sibling call from callable instruction with modified stack frame drivers/media/dvb-frontends/drxk_hard.o: warning: objtool: init_drxk.cold()+0x16f: sibling call from callable instruction with modified stack frame Fix it by comparing the function, instead of the section and offset. Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/157c35d42ca9b6354bbb1604fe9ad7d1153ccb21.1585761021.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5422543faff8..ccd5319d1284 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -915,10 +915,7 @@ static struct rela *find_switch_table(struct objtool_file *file, * it. */ for (; - &insn->list != &file->insn_list && - insn->sec == func->sec && - insn->offset >= func->offset; - + &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) -- GitLab From c1ac17ab54ee38924670911d800ddfc8b1d610a1 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Mon, 13 Apr 2020 19:29:21 +0800 Subject: [PATCH 1016/1278] scsi: sg: add sg_remove_request in sg_common_write commit 849f8583e955dbe3a1806e03ecacd5e71cce0a08 upstream. If the dxfer_len is greater than 256M then the request is invalid and we need to call sg_remove_request in sg_common_write. Link: https://lore.kernel.org/r/1586777361-17339-1-git-send-email-huawei.libin@huawei.com Fixes: f930c7043663 ("scsi: sg: only check for dxfer_len greater than 256M") Acked-by: Douglas Gilbert Signed-off-by: Li Bin Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 3a406b40f150..b5f589b7b43d 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -809,8 +809,10 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, "sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if (hp->dxfer_len >= SZ_256M) + if (hp->dxfer_len >= SZ_256M) { + sg_remove_request(sfp, srp); return -EINVAL; + } k = sg_start_req(srp, cmnd); if (k) { -- GitLab From 73a55c0a66a5bb60297c1c98fcbc62b0507878dd Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Feb 2020 16:14:11 -0800 Subject: [PATCH 1017/1278] ext4: use non-movable memory for superblock readahead commit d87f639258a6a5980183f11876c884931ad93da2 upstream. Since commit a8ac900b8163 ("ext4: use non-movable memory for the superblock") buffers for ext4 superblock were allocated using the sb_bread_unmovable() helper which allocated buffer heads out of non-movable memory blocks. It was necessarily to not block page migrations and do not cause cma allocation failures. However commit 85c8f176a611 ("ext4: preload block group descriptors") broke this by introducing pre-reading of the ext4 superblock. The problem is that __breadahead() is using __getblk() underneath, which allocates buffer heads out of movable memory. It resulted in page migration failures I've seen on a machine with an ext4 partition and a preallocated cma area. Fix this by introducing sb_breadahead_unmovable() and __breadahead_gfp() helpers which use non-movable memory for buffer head allocations and use them for the ext4 superblock readahead. Reviewed-by: Andreas Dilger Fixes: 85c8f176a611 ("ext4: preload block group descriptors") Signed-off-by: Roman Gushchin Link: https://lore.kernel.org/r/20200229001411.128010-1-guro@fb.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 11 +++++++++++ fs/ext4/inode.c | 2 +- fs/ext4/super.c | 2 +- include/linux/buffer_head.h | 8 ++++++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index bdca7b10e239..cae7f24a0410 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1398,6 +1398,17 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__breadahead); +void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, + gfp_t gfp) +{ + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); + if (likely(bh)) { + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + brelse(bh); + } +} +EXPORT_SYMBOL(__breadahead_gfp); + /** * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index eafe4a41eb50..5b0d5ca2c2b2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4593,7 +4593,7 @@ static int __ext4_get_inode_loc(struct inode *inode, if (end > table) end = table; while (b <= end) - sb_breadahead(sb, b++); + sb_breadahead_unmovable(sb, b++); } /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 875032fe75c4..98e27432c859 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4145,7 +4145,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Pre-read the descriptors into the buffer cache */ for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sb_breadahead(sb, block); + sb_breadahead_unmovable(sb, block); } for (i = 0; i < db_count; i++) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index afa37f807f12..2e1077ea77db 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -187,6 +187,8 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); +void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, + gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); @@ -319,6 +321,12 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } +static inline void +sb_breadahead_unmovable(struct super_block *sb, sector_t block) +{ + __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { -- GitLab From 2d05ff93d3e164239f6e981694bf590a1fc0d6d6 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Wed, 8 Apr 2020 18:12:29 +0000 Subject: [PATCH 1018/1278] arm, bpf: Fix bugs with ALU64 {RSH, ARSH} BPF_K shift by 0 commit bb9562cf5c67813034c96afb50bd21130a504441 upstream. The current arm BPF JIT does not correctly compile RSH or ARSH when the immediate shift amount is 0. This causes the "rsh64 by 0 imm" and "arsh64 by 0 imm" BPF selftests to hang the kernel by reaching an instruction the verifier determines to be unreachable. The root cause is in how immediate right shifts are encoded on arm. For LSR and ASR (logical and arithmetic right shift), a bit-pattern of 00000 in the immediate encodes a shift amount of 32. When the BPF immediate is 0, the generated code shifts by 32 instead of the expected behavior (a no-op). This patch fixes the bugs by adding an additional check if the BPF immediate is 0. After the change, the above mentioned BPF selftests pass. Fixes: 39c13c204bb11 ("arm: eBPF JIT compiler") Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200408181229.10909-1-luke.r.nels@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/arm/net/bpf_jit_32.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index b18fb70c5dcf..e13aca6e6d4b 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -796,7 +796,11 @@ static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, } /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); @@ -829,7 +833,11 @@ static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, } /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); -- GitLab From eb4274d043b19e90b7532a84955ad40852205058 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 Apr 2020 14:10:11 +0200 Subject: [PATCH 1019/1278] netfilter: nf_tables: report EOPNOTSUPP on unsupported flags/object type commit d9583cdf2f38d0f526d9a8c8564dd2e35e649bc7 upstream. EINVAL should be used for malformed netlink messages. New userspace utility and old kernels might easily result in EINVAL when exercising new set features, which is misleading. Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 91490446ebb4..5b8d5bfeb7ac 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3129,7 +3129,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) - return -EINVAL; + return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == (NFT_SET_MAP | NFT_SET_OBJECT)) @@ -3167,7 +3167,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); if (objtype == NFT_OBJECT_UNSPEC || objtype > NFT_OBJECT_MAX) - return -EINVAL; + return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else -- GitLab From c01aac99cab39d52dab725dde3dd8bce60fde5df Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 8 Apr 2020 19:43:52 +0800 Subject: [PATCH 1020/1278] irqchip/mbigen: Free msi_desc on device teardown commit edfc23f6f9fdbd7825d50ac1f380243cde19b679 upstream. Using irq_domain_free_irqs_common() on the irqdomain free path will leave the MSI descriptor unfreed when platform devices get removed. Properly free it by MSI domain free function. Fixes: 9650c60ebfec0 ("irqchip/mbigen: Create irq domain for each mbigen device") Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200408114352.1604-1-yuzenghui@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-mbigen.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index f7fdbf5d183b..c98358be0bc8 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -231,10 +231,16 @@ static int mbigen_irq_domain_alloc(struct irq_domain *domain, return 0; } +static void mbigen_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + platform_msi_domain_free(domain, virq, nr_irqs); +} + static const struct irq_domain_ops mbigen_domain_ops = { .translate = mbigen_domain_translate, .alloc = mbigen_irq_domain_alloc, - .free = irq_domain_free_irqs_common, + .free = mbigen_irq_domain_free, }; static int mbigen_of_create_domain(struct platform_device *pdev, -- GitLab From 464e814a8e32a7644ab1b2642865d3189225de62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Apr 2020 10:20:29 +0200 Subject: [PATCH 1021/1278] ALSA: hda: Don't release card at firmware loading error commit 25faa4bd37c10f19e4b848b9032a17a3d44c6f09 upstream. At the error path of the firmware loading error, the driver tries to release the card object and set NULL to drvdata. This may be referred badly at the possible PM action, as the driver itself is still bound and the PM callbacks read the card object. Instead, we continue the probing as if it were no option set. This is often a better choice than the forced abort, too. Fixes: 5cb543dba986 ("ALSA: hda - Deferred probing with request_firmware_nowait()") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207043 Link: https://lore.kernel.org/r/20200413082034.25166-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5729e1fe69e9..d392c1ec0b28 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2034,24 +2034,15 @@ static void azx_firmware_cb(const struct firmware *fw, void *context) { struct snd_card *card = context; struct azx *chip = card->private_data; - struct pci_dev *pci = chip->pci; - - if (!fw) { - dev_err(card->dev, "Cannot load firmware, aborting\n"); - goto error; - } - chip->fw = fw; + if (fw) + chip->fw = fw; + else + dev_err(card->dev, "Cannot load firmware, continue without patching\n"); if (!chip->disabled) { /* continue probing */ - if (azx_probe_continue(chip)) - goto error; + azx_probe_continue(chip); } - return; /* OK */ - - error: - snd_card_free(card); - pci_set_drvdata(pci, NULL); } #endif -- GitLab From 5efb8f94df58b51781baf334803249e099662045 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 25 Feb 2019 20:03:42 -0800 Subject: [PATCH 1022/1278] lib/raid6: use vdupq_n_u8 to avoid endianness warnings commit 1ad3935b39da78a403e7df7a3813f866c731bc64 upstream. Clang warns: vector initializers are not compatible with NEON intrinsics in big endian mode [-Wnonportable-vector-initialization] While this is usually the case, it's not an issue for this case since we're initializing the uint8x16_t (16x uint8_t's) with the same value. Instead, use vdupq_n_u8 which both compilers lower into a single movi instruction: https://godbolt.org/z/vBrgzt This avoids the static storage for a constant value. Link: https://github.com/ClangBuiltLinux/linux/issues/214 Suggested-by: Nathan Chancellor Reviewed-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- lib/raid6/neon.uc | 5 ++--- lib/raid6/recov_neon_inner.c | 7 ++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc index d5242f544551..b7c68030da4f 100644 --- a/lib/raid6/neon.uc +++ b/lib/raid6/neon.uc @@ -28,7 +28,6 @@ typedef uint8x16_t unative_t; -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) #define NSIZE sizeof(unative_t) /* @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop, int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = stop; /* P/Q right side optimization */ p = dptr[disks-2]; /* XOR parity */ diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c index 8cd20c9f834a..7d00c31a6547 100644 --- a/lib/raid6/recov_neon_inner.c +++ b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@ #include -static const uint8x16_t x0f = { - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; - #ifdef CONFIG_ARM /* * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, uint8x16_t pm1 = vld1q_u8(pbmul + 16); uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while ( bytes-- ) { @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, { uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while (bytes--) { -- GitLab From 24fe595d29bd9d07af75e18cd2f30fdf9f52fb77 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 8 Oct 2018 12:57:36 +0200 Subject: [PATCH 1023/1278] video: fbdev: sis: Remove unnecessary parentheses and commented code commit 864eb1afc60cb43e7df879b97f8ca0d719bbb735 upstream. Clang warns when multiple pairs of parentheses are used for a single conditional statement. drivers/video/fbdev/sis/init301.c:851:42: warning: equality comparison with extraneous parentheses [-Wparentheses-equality] } else if((SiS_Pr->SiS_IF_DEF_LVDS == 1) /* || ~~~~~~~~~~~~~~~~~~~~~~~~^~~~ drivers/video/fbdev/sis/init301.c:851:42: note: remove extraneous parentheses around the comparison to silence this warning } else if((SiS_Pr->SiS_IF_DEF_LVDS == 1) /* || ~ ^ ~ drivers/video/fbdev/sis/init301.c:851:42: note: use '=' to turn this equality comparison into an assignment } else if((SiS_Pr->SiS_IF_DEF_LVDS == 1) /* || ^~ = 1 warning generated. Remove the parentheses and while we're at it, clean up the commented code, which has been here since the beginning of git history. Link: https://github.com/ClangBuiltLinux/linux/issues/118 Signed-off-by: Nathan Chancellor Cc: Thomas Winischhofer Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/sis/init301.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/video/fbdev/sis/init301.c b/drivers/video/fbdev/sis/init301.c index 1ec9c3e0e1d8..f23a381442d3 100644 --- a/drivers/video/fbdev/sis/init301.c +++ b/drivers/video/fbdev/sis/init301.c @@ -522,9 +522,7 @@ SiS_PanelDelay(struct SiS_Private *SiS_Pr, unsigned short DelayTime) SiS_DDC2Delay(SiS_Pr, 0x4000); } - } else if((SiS_Pr->SiS_IF_DEF_LVDS == 1) /* || - (SiS_Pr->SiS_CustomT == CUT_COMPAQ1280) || - (SiS_Pr->SiS_CustomT == CUT_CLEVO1400) */ ) { /* 315 series, LVDS; Special */ + } else if (SiS_Pr->SiS_IF_DEF_LVDS == 1) { /* 315 series, LVDS; Special */ if(SiS_Pr->SiS_IF_DEF_CH70xx == 0) { PanelID = SiS_GetReg(SiS_Pr->SiS_P3d4,0x36); -- GitLab From db7a49488068101d0d87ee87adf94ae820f459d2 Mon Sep 17 00:00:00 2001 From: Joe Moriarty Date: Mon, 12 Feb 2018 14:51:42 -0500 Subject: [PATCH 1024/1278] drm: NULL pointer dereference [null-pointer-deref] (CWE 476) problem commit 22a07038c0eaf4d1315a493ce66dcd255accba19 upstream. The Parfait (version 2.1.0) static code analysis tool found the following NULL pointer derefernce problem. - drivers/gpu/drm/drm_dp_mst_topology.c The call to drm_dp_calculate_rad() in function drm_dp_port_setup_pdt() could result in a NULL pointer being returned to port->mstb due to a failure to allocate memory for port->mstb. Signed-off-by: Joe Moriarty Reviewed-by: Steven Sistare Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180212195144.98323-3-joe.moriarty@oracle.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index db0e9ce57e29..42eaeae86bb8 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1043,10 +1043,12 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port) lct = drm_dp_calculate_rad(port, rad); port->mstb = drm_dp_add_mst_branch_device(lct, rad); - port->mstb->mgr = port->mgr; - port->mstb->port_parent = port; + if (port->mstb) { + port->mstb->mgr = port->mgr; + port->mstb->port_parent = port; - send_link = true; + send_link = true; + } break; } return send_link; -- GitLab From e2f44233faf89e208bf9af0064eb4677dd7cda6b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 3 Jan 2018 12:06:15 +0100 Subject: [PATCH 1025/1278] clk: Fix debugfs_create_*() usage commit 4c8326d5ebb0de3191e98980c80ab644026728d0 upstream. When exposing data access through debugfs, the correct debugfs_create_*() functions must be used, matching the data types. Remove all casts from data pointers passed to debugfs_create_*() functions, as such casts prevent the compiler from flagging bugs. clk_core.rate and .accuracy are "unsigned long", hence casting their addresses to "u32 *" exposed the wrong halves on big-endian 64-bit systems. Fix this by using debugfs_create_ulong() instead. Octal permissions are preferred, as they are easier to read than symbolic permissions. Hence replace "S_IRUGO" by "0444" throughout. Signed-off-by: Geert Uytterhoeven [sboyd@codeaurora.org: Squash the octal change in too] Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8341a128dab1..44b6f23cc851 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2172,18 +2172,16 @@ static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry) core->dentry = d; - d = debugfs_create_u32("clk_rate", S_IRUGO, core->dentry, - (u32 *)&core->rate); + d = debugfs_create_ulong("clk_rate", 0444, core->dentry, &core->rate); if (!d) goto err_out; - d = debugfs_create_u32("clk_accuracy", S_IRUGO, core->dentry, - (u32 *)&core->accuracy); + d = debugfs_create_ulong("clk_accuracy", 0444, core->dentry, + &core->accuracy); if (!d) goto err_out; - d = debugfs_create_u32("clk_phase", S_IRUGO, core->dentry, - (u32 *)&core->phase); + d = debugfs_create_u32("clk_phase", 0444, core->dentry, &core->phase); if (!d) goto err_out; @@ -2192,23 +2190,23 @@ static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry) if (!d) goto err_out; - d = debugfs_create_u32("clk_prepare_count", S_IRUGO, core->dentry, - (u32 *)&core->prepare_count); + d = debugfs_create_u32("clk_prepare_count", 0444, core->dentry, + &core->prepare_count); if (!d) goto err_out; - d = debugfs_create_u32("clk_enable_count", S_IRUGO, core->dentry, - (u32 *)&core->enable_count); + d = debugfs_create_u32("clk_enable_count", 0444, core->dentry, + &core->enable_count); if (!d) goto err_out; - d = debugfs_create_u32("clk_notifier_count", S_IRUGO, core->dentry, - (u32 *)&core->notifier_count); + d = debugfs_create_u32("clk_notifier_count", 0444, core->dentry, + &core->notifier_count); if (!d) goto err_out; if (core->num_parents > 1) { - d = debugfs_create_file("clk_possible_parents", S_IRUGO, + d = debugfs_create_file("clk_possible_parents", 0444, core->dentry, core, &possible_parents_fops); if (!d) goto err_out; @@ -2304,22 +2302,22 @@ static int __init clk_debug_init(void) if (!rootdir) return -ENOMEM; - d = debugfs_create_file("clk_summary", S_IRUGO, rootdir, &all_lists, + d = debugfs_create_file("clk_summary", 0444, rootdir, &all_lists, &clk_summary_fops); if (!d) return -ENOMEM; - d = debugfs_create_file("clk_dump", S_IRUGO, rootdir, &all_lists, + d = debugfs_create_file("clk_dump", 0444, rootdir, &all_lists, &clk_dump_fops); if (!d) return -ENOMEM; - d = debugfs_create_file("clk_orphan_summary", S_IRUGO, rootdir, + d = debugfs_create_file("clk_orphan_summary", 0444, rootdir, &orphan_list, &clk_summary_fops); if (!d) return -ENOMEM; - d = debugfs_create_file("clk_orphan_dump", S_IRUGO, rootdir, + d = debugfs_create_file("clk_orphan_dump", 0444, rootdir, &orphan_list, &clk_dump_fops); if (!d) return -ENOMEM; -- GitLab From 42d394b07ccd09907ceced8e689903c30ab3bec9 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 21 Apr 2020 13:39:56 +0100 Subject: [PATCH 1026/1278] Revert "gpio: set up initial state from .get_direction()" [ Upstream commit 1ca2a92b2a99323f666f1b669b7484df4bda05e4 ] This reverts commit 72d3200061776264941be1b5a9bb8e926b3b30a5. We cannot blindly query the direction of all GPIOs when the pins are first registered. The get_direction callback normally triggers a read/write to hardware, but we shouldn't be touching the hardware for an individual GPIO until after it's been properly claimed. Signed-off-by: Timur Tabi Reviewed-by: Stephen Boyd Signed-off-by: Linus Walleij Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f0777a7a4305..d5b42cc86d71 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1245,31 +1245,14 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) struct gpio_desc *desc = &gdev->descs[i]; desc->gdev = gdev; - /* - * REVISIT: most hardware initializes GPIOs as inputs - * (often with pullups enabled) so power usage is - * minimized. Linux code should set the gpio direction - * first thing; but until it does, and in case - * chip->get_direction is not set, we may expose the - * wrong direction in sysfs. - */ - - if (chip->get_direction) { - /* - * If we have .get_direction, set up the initial - * direction flag from the hardware. - */ - int dir = chip->get_direction(chip, i); - if (!dir) - set_bit(FLAG_IS_OUT, &desc->flags); - } else if (!chip->direction_input) { - /* - * If the chip lacks the .direction_input callback - * we logically assume all lines are outputs. - */ - set_bit(FLAG_IS_OUT, &desc->flags); - } + /* REVISIT: most hardware initializes GPIOs as inputs (often + * with pullups enabled) so power usage is minimized. Linux + * code should set the gpio direction first thing; but until + * it does, and in case chip->get_direction is not set, we may + * expose the wrong direction in sysfs. + */ + desc->flags = !chip->direction_input ? (1 << FLAG_IS_OUT) : 0; } #ifdef CONFIG_PINCTRL -- GitLab From 6a0e901f9f742a99219293e994e380ee8de1237a Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Tue, 21 Apr 2020 13:39:57 +0100 Subject: [PATCH 1027/1278] arm64: perf: remove unsupported events for Cortex-A73 [ Upstream commit f8ada189550984ee21f27be736042b74a7da1d68 ] bus access read/write events are not supported in A73, based on the Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73" Acked-by: Julien Thierry Signed-off-by: Xu YiPing Signed-off-by: Will Deacon Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 05fdae70e9f6..53df84b2a07f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] -- GitLab From 6dc0256f802be6bc783fb9542affb48d267f592c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 13:39:58 +0100 Subject: [PATCH 1028/1278] arm64: traps: Don't print stack or raw PC/LR values in backtraces [ Upstream commit a25ffd3a6302a67814280274d8f1aa4ae2ea4b59 ] Printing raw pointer values in backtraces has potential security implications and are of questionable value anyway. This patch follows x86's lead and removes the "Exception stack:" dump from kernel backtraces, as well as converting PC/LR values to symbols such as "sysrq_handle_crash+0x20/0x30". Tested-by: Laura Abbott Signed-off-by: Will Deacon Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/process.c | 8 ++--- arch/arm64/kernel/traps.c | 65 ++----------------------------------- 2 files changed, 6 insertions(+), 67 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ee5ce03c9315..2ff327651ebe 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -186,11 +186,9 @@ void __show_regs(struct pt_regs *regs) } show_regs_print_info(KERN_DEFAULT); - print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", lr); - printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, lr, regs->pstate); - printk("sp : %016llx\n", sp); + print_symbol("pc : %s\n", regs->pc); + print_symbol("lr : %s\n", lr); + printk("sp : %016llx pstate : %08llx\n", sp, regs->pstate); i = top_reg; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5ae9c86c30d1..b30d23431fe1 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -59,55 +59,9 @@ static const char *handler[]= { int show_unhandled_signals = 0; -/* - * Dump out the contents of some kernel memory nicely... - */ -static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top) -{ - unsigned long first; - mm_segment_t fs; - int i; - - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - - printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top); - - for (first = bottom & ~31; first < top; first += 32) { - unsigned long p; - char str[sizeof(" 12345678") * 8 + 1]; - - memset(str, ' ', sizeof(str)); - str[sizeof(str) - 1] = '\0'; - - for (p = first, i = 0; i < (32 / 8) - && p < top; i++, p += 8) { - if (p >= bottom && p < top) { - unsigned long val; - - if (__get_user(val, (unsigned long *)p) == 0) - sprintf(str + i * 17, " %016lx", val); - else - sprintf(str + i * 17, " ????????????????"); - } - } - printk("%s%04lx:%s\n", lvl, first & 0xffff, str); - } - - set_fs(fs); -} - static void dump_backtrace_entry(unsigned long where) { - /* - * Note that 'where' can have a physical address, but it's not handled. - */ - print_ip_sym(where); + printk(" %pS\n", (void *)where); } static void __dump_instr(const char *lvl, struct pt_regs *regs) @@ -177,10 +131,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) #endif printk("Call trace:\n"); - while (1) { - unsigned long stack; - int ret; - + do { /* skip until specified stack frame */ if (!skip) { dump_backtrace_entry(frame.pc); @@ -195,17 +146,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) */ dump_backtrace_entry(regs->pc); } - ret = unwind_frame(tsk, &frame); - if (ret < 0) - break; - if (in_entry_text(frame.pc)) { - stack = frame.fp - offsetof(struct pt_regs, stackframe); - - if (on_accessible_stack(tsk, stack)) - dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs)); - } - } + } while (!unwind_frame(tsk, &frame)); put_task_stack(tsk); } -- GitLab From af96128494a29761547cf43b67a989a7f74f66f2 Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Tue, 21 Apr 2020 13:40:00 +0100 Subject: [PATCH 1029/1278] arch_topology: Fix section miss match warning due to free_raw_capacity() [ Upstream commit 82d8ba717ccb54dd803624db044f351b2a54d000 ] Remove the __init annotation from free_raw_capacity() to avoid the following warning. The function init_cpu_capacity_callback() references the function __init free_raw_capacity(). WARNING: vmlinux.o(.text+0x425cc0): Section mismatch in reference from the function init_cpu_capacity_callback() to the function .init.text:free_raw_capacity(). Signed-off-by: Prasad Sodagudi Acked-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 41be9ff7d70a..3da53cc6cf2b 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -96,7 +96,7 @@ subsys_initcall(register_cpu_capacity_sysctl); static u32 capacity_scale; static u32 *raw_capacity; -static int __init free_raw_capacity(void) +static int free_raw_capacity(void) { kfree(raw_capacity); raw_capacity = NULL; -- GitLab From a556431df23e98e0c0ac9126b721367df183e6fc Mon Sep 17 00:00:00 2001 From: Hamad Kadmany Date: Tue, 21 Apr 2020 13:40:01 +0100 Subject: [PATCH 1030/1278] wil6210: increase firmware ready timeout [ Upstream commit 6ccae584014ef7074359eb4151086beef66ecfa9 ] Firmware ready event may take longer than current timeout in some scenarios, for example with multiple RFs connected where each requires an initial calibration. Increase the timeout to support these scenarios. Signed-off-by: Hamad Kadmany Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index bac829aa950d..a3dc42841526 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -871,7 +871,7 @@ static void wil_bl_crash_info(struct wil6210_priv *wil, bool is_err) static int wil_wait_for_fw_ready(struct wil6210_priv *wil) { - ulong to = msecs_to_jiffies(1000); + ulong to = msecs_to_jiffies(2000); ulong left = wait_for_completion_timeout(&wil->wmi_ready, to); if (0 == left) { -- GitLab From 64ba980bbfd29e08156b3e5936b36933a739ab63 Mon Sep 17 00:00:00 2001 From: Dedy Lansky Date: Tue, 21 Apr 2020 13:40:02 +0100 Subject: [PATCH 1031/1278] wil6210: fix temperature debugfs [ Upstream commit 6d9eb7ebae3d7e951bc0999235ae7028eb4cae4f ] For negative temperatures, "temp" debugfs is showing wrong values. Use signed types so proper calculations is done for sub zero temperatures. Signed-off-by: Dedy Lansky Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/debugfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index 6db00c167d2e..3a98f75c5d7e 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1093,7 +1093,7 @@ static const struct file_operations fops_ssid = { }; /*---------temp------------*/ -static void print_temp(struct seq_file *s, const char *prefix, u32 t) +static void print_temp(struct seq_file *s, const char *prefix, s32 t) { switch (t) { case 0: @@ -1101,7 +1101,8 @@ static void print_temp(struct seq_file *s, const char *prefix, u32 t) seq_printf(s, "%s N/A\n", prefix); break; default: - seq_printf(s, "%s %d.%03d\n", prefix, t / 1000, t % 1000); + seq_printf(s, "%s %s%d.%03d\n", prefix, (t < 0 ? "-" : ""), + abs(t / 1000), abs(t % 1000)); break; } } @@ -1109,7 +1110,7 @@ static void print_temp(struct seq_file *s, const char *prefix, u32 t) static int wil_temp_debugfs_show(struct seq_file *s, void *data) { struct wil6210_priv *wil = s->private; - u32 t_m, t_r; + s32 t_m, t_r; int rc = wmi_get_temperature(wil, &t_m, &t_r); if (rc) { -- GitLab From 4719923f9b9bffa23b2389c3cf80cc0bd9338b20 Mon Sep 17 00:00:00 2001 From: Venkat Gopalakrishnan Date: Tue, 21 Apr 2020 13:40:03 +0100 Subject: [PATCH 1032/1278] scsi: ufs: make sure all interrupts are processed [ Upstream commit 7f6ba4f12e6cbfdefbb95cfd8fc67ece6c15d799 ] As multiple requests are submitted to the ufs host controller in parallel there could be instances where the command completion interrupt arrives later for a request that is already processed earlier as the corresponding doorbell was cleared when handling the previous interrupt. Read the interrupt status in a loop after processing the received interrupt to catch such interrupts and handle it. Signed-off-by: Venkat Gopalakrishnan Signed-off-by: Asutosh Das Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 6dbdddffa0b3..78d9c2c48236 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5371,19 +5371,30 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) u32 intr_status, enabled_intr_status; irqreturn_t retval = IRQ_NONE; struct ufs_hba *hba = __hba; + int retries = hba->nutrs; spin_lock(hba->host->host_lock); intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); - enabled_intr_status = - intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - if (intr_status) - ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS); + /* + * There could be max of hba->nutrs reqs in flight and in worst case + * if the reqs get finished 1 by 1 after the interrupt status is + * read, make sure we handle them by checking the interrupt status + * again in a loop until we process all of the reqs before returning. + */ + do { + enabled_intr_status = + intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + if (intr_status) + ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS); + if (enabled_intr_status) { + ufshcd_sl_intr(hba, enabled_intr_status); + retval = IRQ_HANDLED; + } + + intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); + } while (intr_status && --retries); - if (enabled_intr_status) { - ufshcd_sl_intr(hba, enabled_intr_status); - retval = IRQ_HANDLED; - } spin_unlock(hba->host->host_lock); return retval; } -- GitLab From 310a92c1720e87de261f481048e19b606aa04495 Mon Sep 17 00:00:00 2001 From: Subhash Jadavani Date: Tue, 21 Apr 2020 13:40:04 +0100 Subject: [PATCH 1033/1278] scsi: ufs: ufs-qcom: remove broken hci version quirk [ Upstream commit 69a6fff068567469c0ef1156ae5ac8d3d71701f0 ] UFSHCD_QUIRK_BROKEN_UFS_HCI_VERSION is only applicable for QCOM UFS host controller version 2.x.y and this has been fixed from version 3.x.y onwards, hence this change removes this quirk for version 3.x.y onwards. [mkp: applied by hand] Signed-off-by: Subhash Jadavani Signed-off-by: Asutosh Das Signed-off-by: Martin K. Petersen Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufs-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index c87d770b519a..f2b8de195d8a 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1094,7 +1094,7 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba) hba->quirks |= UFSHCD_QUIRK_BROKEN_LCC; } - if (host->hw_ver.major >= 0x2) { + if (host->hw_ver.major == 0x2) { hba->quirks |= UFSHCD_QUIRK_BROKEN_UFS_HCI_VERSION; if (!ufs_qcom_cap_qunipro(host)) -- GitLab From a1c2069246b69d594f69176b67662cf43010fb8e Mon Sep 17 00:00:00 2001 From: Dedy Lansky Date: Tue, 21 Apr 2020 13:40:05 +0100 Subject: [PATCH 1034/1278] wil6210: rate limit wil_rx_refill error [ Upstream commit 3d6b72729cc2933906de8d2c602ae05e920b2122 ] wil_err inside wil_rx_refill can flood the log buffer. Replace it with wil_err_ratelimited. Signed-off-by: Dedy Lansky Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/txrx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index 16750056b8b5..b483c4266097 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -636,8 +636,8 @@ static int wil_rx_refill(struct wil6210_priv *wil, int count) v->swtail = next_tail) { rc = wil_vring_alloc_skb(wil, v, v->swtail, headroom); if (unlikely(rc)) { - wil_err(wil, "Error %d in wil_rx_refill[%d]\n", - rc, v->swtail); + wil_err_ratelimited(wil, "Error %d in rx refill[%d]\n", + rc, v->swtail); break; } } -- GitLab From 054808c3d848d9f2d011b2a87b98a945a9b4bedd Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 21 Apr 2020 13:40:06 +0100 Subject: [PATCH 1035/1278] rpmsg: glink: use put_device() if device_register fail [ Upstream commit a9011726c4bb37e5d6a7279bf47fcc19cd9d3e1a ] if device_register() returned an error! Always use put_device() to give up the reference initialized. And unregister device for other return error. Signed-off-by: Arvind Yadav Signed-off-by: Bjorn Andersson Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_smem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 2b54e71886d9..69a14041ef1f 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -212,6 +212,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent, ret = device_register(dev); if (ret) { pr_err("failed to register glink edge\n"); + put_device(dev); return ERR_PTR(ret); } @@ -294,7 +295,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent, return glink; err_put_dev: - put_device(dev); + device_unregister(dev); return ERR_PTR(ret); } -- GitLab From 971702e14c9cc5fe4aa0f6d442d6fa7c6109380c Mon Sep 17 00:00:00 2001 From: Mohit Aggarwal Date: Tue, 21 Apr 2020 13:40:07 +0100 Subject: [PATCH 1036/1278] rtc: pm8xxx: Fix issue in RTC write path [ Upstream commit 83220bf38b77a830f8e62ab1a0d0408304f9b966 ] In order to set time in rtc, need to disable rtc hw before writing into rtc registers. Also fixes disabling of alarm while setting rtc time. Signed-off-by: Mohit Aggarwal Signed-off-by: Alexandre Belloni Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-pm8xxx.c | 49 +++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index a1b4b0ed1f19..3b619b7b2c53 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -74,16 +74,18 @@ struct pm8xxx_rtc { /* * Steps to write the RTC registers. * 1. Disable alarm if enabled. - * 2. Write 0x00 to LSB. - * 3. Write Byte[1], Byte[2], Byte[3] then Byte[0]. - * 4. Enable alarm if disabled in step 1. + * 2. Disable rtc if enabled. + * 3. Write 0x00 to LSB. + * 4. Write Byte[1], Byte[2], Byte[3] then Byte[0]. + * 5. Enable rtc if disabled in step 2. + * 6. Enable alarm if disabled in step 1. */ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm) { int rc, i; unsigned long secs, irq_flags; - u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0; - unsigned int ctrl_reg; + u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0, rtc_disabled = 0; + unsigned int ctrl_reg, rtc_ctrl_reg; struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); const struct pm8xxx_rtc_regs *regs = rtc_dd->regs; @@ -92,23 +94,38 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_tm_to_time(tm, &secs); + dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs); + for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) { value[i] = secs & 0xFF; secs >>= 8; } - dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs); - spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags); - rc = regmap_read(rtc_dd->regmap, regs->ctrl, &ctrl_reg); + rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg); if (rc) goto rtc_rw_fail; if (ctrl_reg & regs->alarm_en) { alarm_enabled = 1; ctrl_reg &= ~regs->alarm_en; - rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg); + rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg); + if (rc) { + dev_err(dev, "Write to RTC Alarm control register failed\n"); + goto rtc_rw_fail; + } + } + + /* Disable RTC H/w before writing on RTC register */ + rc = regmap_read(rtc_dd->regmap, regs->ctrl, &rtc_ctrl_reg); + if (rc) + goto rtc_rw_fail; + + if (rtc_ctrl_reg & PM8xxx_RTC_ENABLE) { + rtc_disabled = 1; + rtc_ctrl_reg &= ~PM8xxx_RTC_ENABLE; + rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg); if (rc) { dev_err(dev, "Write to RTC control register failed\n"); goto rtc_rw_fail; @@ -137,11 +154,21 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm) goto rtc_rw_fail; } + /* Enable RTC H/w after writing on RTC register */ + if (rtc_disabled) { + rtc_ctrl_reg |= PM8xxx_RTC_ENABLE; + rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg); + if (rc) { + dev_err(dev, "Write to RTC control register failed\n"); + goto rtc_rw_fail; + } + } + if (alarm_enabled) { ctrl_reg |= regs->alarm_en; - rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg); + rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg); if (rc) { - dev_err(dev, "Write to RTC control register failed\n"); + dev_err(dev, "Write to RTC Alarm control register failed\n"); goto rtc_rw_fail; } } -- GitLab From bb8ca3d39feca1c07cc138ef16675a3ce690b469 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 21 Apr 2020 13:40:09 +0100 Subject: [PATCH 1037/1278] rpmsg: glink: Fix missing mutex_init() in qcom_glink_alloc_channel() [ Upstream commit fb416f69900773d5a6030c909114099f92d07ab9 ] qcom_glink_alloc_channel() allocates the mutex but not initialize it. Use mutex_init() on it to initialize it correctly. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Bjorn Andersson Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 114481c9fba1..7802663efe33 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -221,6 +221,7 @@ static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink, /* Setup glink internal glink_channel data */ spin_lock_init(&channel->recv_lock); spin_lock_init(&channel->intent_lock); + mutex_init(&channel->intent_req_lock); channel->glink = glink; channel->name = kstrdup(name, GFP_KERNEL); -- GitLab From deb1682b6f049bf5e6c496b1b2d45395ee971f32 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 21 Apr 2020 13:40:10 +0100 Subject: [PATCH 1038/1278] rpmsg: glink: smem: Ensure ordering during tx [ Upstream commit 9d32497361ff89d2fc8306407de6f04b2bfb2836 ] Ensure the ordering of the fifo write and the update of the write index, so that the index is not updated before the data has landed in the fifo. Acked-By: Chris Lew Reported-by: Arun Kumar Neelakantam Signed-off-by: Bjorn Andersson Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_smem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 69a14041ef1f..ed616b0bc563 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -181,6 +181,9 @@ static void glink_smem_tx_write(struct qcom_glink_pipe *glink_pipe, if (head >= pipe->native.length) head -= pipe->native.length; + /* Ensure ordering of fifo and head update */ + wmb(); + *pipe->head = cpu_to_le32(head); } -- GitLab From 5ccc2c89e67c3311eb43c52c40388bfc48253ac5 Mon Sep 17 00:00:00 2001 From: Lazar Alexei Date: Tue, 21 Apr 2020 13:40:11 +0100 Subject: [PATCH 1039/1278] wil6210: fix PCIe bus mastering in case of interface down [ Upstream commit 680c242dc25e036265793edc7d755cfc15afd231 ] In case of interface down, radio is turned off but PCIe mastering is not cleared. This can cause unexpected PCIe access to the shutdown device. Fix this by clearing PCIe mastering also in case interface is down Signed-off-by: Lazar Alexei Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/pcie_bus.c | 24 +++++++++++++-------- drivers/net/wireless/ath/wil6210/pm.c | 10 ++------- drivers/net/wireless/ath/wil6210/wil6210.h | 4 ++-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index 6a3ab4bf916d..b2c3cf6db881 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -393,6 +393,9 @@ static int wil6210_suspend(struct device *dev, bool is_runtime) int rc = 0; struct pci_dev *pdev = to_pci_dev(dev); struct wil6210_priv *wil = pci_get_drvdata(pdev); + struct net_device *ndev = wil_to_ndev(wil); + bool keep_radio_on = ndev->flags & IFF_UP && + wil->keep_radio_on_during_sleep; wil_dbg_pm(wil, "suspend: %s\n", is_runtime ? "runtime" : "system"); @@ -400,14 +403,14 @@ static int wil6210_suspend(struct device *dev, bool is_runtime) if (rc) goto out; - rc = wil_suspend(wil, is_runtime); + rc = wil_suspend(wil, is_runtime, keep_radio_on); if (!rc) { wil->suspend_stats.successful_suspends++; - /* If platform device supports keep_radio_on_during_sleep - * it will control PCIe master + /* In case radio stays on, platform device will control + * PCIe master */ - if (!wil->keep_radio_on_during_sleep) + if (!keep_radio_on) /* disable bus mastering */ pci_clear_master(pdev); } @@ -420,20 +423,23 @@ static int wil6210_resume(struct device *dev, bool is_runtime) int rc = 0; struct pci_dev *pdev = to_pci_dev(dev); struct wil6210_priv *wil = pci_get_drvdata(pdev); + struct net_device *ndev = wil_to_ndev(wil); + bool keep_radio_on = ndev->flags & IFF_UP && + wil->keep_radio_on_during_sleep; wil_dbg_pm(wil, "resume: %s\n", is_runtime ? "runtime" : "system"); - /* If platform device supports keep_radio_on_during_sleep it will - * control PCIe master + /* In case radio stays on, platform device will control + * PCIe master */ - if (!wil->keep_radio_on_during_sleep) + if (!keep_radio_on) /* allow master */ pci_set_master(pdev); - rc = wil_resume(wil, is_runtime); + rc = wil_resume(wil, is_runtime, keep_radio_on); if (rc) { wil_err(wil, "device failed to resume (%d)\n", rc); wil->suspend_stats.failed_resumes++; - if (!wil->keep_radio_on_during_sleep) + if (!keep_radio_on) pci_clear_master(pdev); } else { wil->suspend_stats.successful_resumes++; diff --git a/drivers/net/wireless/ath/wil6210/pm.c b/drivers/net/wireless/ath/wil6210/pm.c index 8f5d1b447aaa..8378742ecd49 100644 --- a/drivers/net/wireless/ath/wil6210/pm.c +++ b/drivers/net/wireless/ath/wil6210/pm.c @@ -279,12 +279,9 @@ static int wil_resume_radio_off(struct wil6210_priv *wil) return rc; } -int wil_suspend(struct wil6210_priv *wil, bool is_runtime) +int wil_suspend(struct wil6210_priv *wil, bool is_runtime, bool keep_radio_on) { int rc = 0; - struct net_device *ndev = wil_to_ndev(wil); - bool keep_radio_on = ndev->flags & IFF_UP && - wil->keep_radio_on_during_sleep; wil_dbg_pm(wil, "suspend: %s\n", is_runtime ? "runtime" : "system"); @@ -307,12 +304,9 @@ int wil_suspend(struct wil6210_priv *wil, bool is_runtime) return rc; } -int wil_resume(struct wil6210_priv *wil, bool is_runtime) +int wil_resume(struct wil6210_priv *wil, bool is_runtime, bool keep_radio_on) { int rc = 0; - struct net_device *ndev = wil_to_ndev(wil); - bool keep_radio_on = ndev->flags & IFF_UP && - wil->keep_radio_on_during_sleep; unsigned long long suspend_time_usec = 0; wil_dbg_pm(wil, "resume: %s\n", is_runtime ? "runtime" : "system"); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 315ec8b59662..c5b6b783100a 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -1000,8 +1000,8 @@ int wil_request_firmware(struct wil6210_priv *wil, const char *name, bool wil_fw_verify_file_exists(struct wil6210_priv *wil, const char *name); int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime); -int wil_suspend(struct wil6210_priv *wil, bool is_runtime); -int wil_resume(struct wil6210_priv *wil, bool is_runtime); +int wil_suspend(struct wil6210_priv *wil, bool is_runtime, bool keep_radio_on); +int wil_resume(struct wil6210_priv *wil, bool is_runtime, bool keep_radio_on); bool wil_is_wmi_idle(struct wil6210_priv *wil); int wmi_resume(struct wil6210_priv *wil); int wmi_suspend(struct wil6210_priv *wil); -- GitLab From f786a17bbe27d447cfc5fed0aefcb3e05a3f0aaf Mon Sep 17 00:00:00 2001 From: Lior David Date: Tue, 21 Apr 2020 13:40:12 +0100 Subject: [PATCH 1040/1278] wil6210: add block size checks during FW load [ Upstream commit 705d2fde94b23cd76efbeedde643ffa7c32fac7f ] When loading FW from file add block size checks to ensure a corrupted FW file will not cause the driver to write outside the device memory. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/fw_inc.c | 58 ++++++++++++++-------- drivers/net/wireless/ath/wil6210/wil6210.h | 1 + drivers/net/wireless/ath/wil6210/wmi.c | 11 +++- 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c index e01acac88825..7d090150187c 100644 --- a/drivers/net/wireless/ath/wil6210/fw_inc.c +++ b/drivers/net/wireless/ath/wil6210/fw_inc.c @@ -26,14 +26,17 @@ prefix_type, rowsize, \ groupsize, buf, len, ascii) -#define FW_ADDR_CHECK(ioaddr, val, msg) do { \ - ioaddr = wmi_buffer(wil, val); \ - if (!ioaddr) { \ - wil_err_fw(wil, "bad " msg ": 0x%08x\n", \ - le32_to_cpu(val)); \ - return -EINVAL; \ - } \ - } while (0) +static bool wil_fw_addr_check(struct wil6210_priv *wil, + void __iomem **ioaddr, __le32 val, + u32 size, const char *msg) +{ + *ioaddr = wmi_buffer_block(wil, val, size); + if (!(*ioaddr)) { + wil_err_fw(wil, "bad %s: 0x%08x\n", msg, le32_to_cpu(val)); + return false; + } + return true; +} /** * wil_fw_verify - verify firmware file validity @@ -165,7 +168,8 @@ static int fw_handle_data(struct wil6210_priv *wil, const void *data, return -EINVAL; } - FW_ADDR_CHECK(dst, d->addr, "address"); + if (!wil_fw_addr_check(wil, &dst, d->addr, s, "address")) + return -EINVAL; wil_dbg_fw(wil, "write [0x%08x] <== %zu bytes\n", le32_to_cpu(d->addr), s); wil_memcpy_toio_32(dst, d->data, s); @@ -197,7 +201,8 @@ static int fw_handle_fill(struct wil6210_priv *wil, const void *data, return -EINVAL; } - FW_ADDR_CHECK(dst, d->addr, "address"); + if (!wil_fw_addr_check(wil, &dst, d->addr, s, "address")) + return -EINVAL; v = le32_to_cpu(d->value); wil_dbg_fw(wil, "fill [0x%08x] <== 0x%08x, %zu bytes\n", @@ -253,7 +258,8 @@ static int fw_handle_direct_write(struct wil6210_priv *wil, const void *data, u32 v = le32_to_cpu(block[i].value); u32 x, y; - FW_ADDR_CHECK(dst, block[i].addr, "address"); + if (!wil_fw_addr_check(wil, &dst, block[i].addr, 0, "address")) + return -EINVAL; x = readl(dst); y = (x & m) | (v & ~m); @@ -319,10 +325,15 @@ static int fw_handle_gateway_data(struct wil6210_priv *wil, const void *data, wil_dbg_fw(wil, "gw write record [%3d] blocks, cmd 0x%08x\n", n, gw_cmd); - FW_ADDR_CHECK(gwa_addr, d->gateway_addr_addr, "gateway_addr_addr"); - FW_ADDR_CHECK(gwa_val, d->gateway_value_addr, "gateway_value_addr"); - FW_ADDR_CHECK(gwa_cmd, d->gateway_cmd_addr, "gateway_cmd_addr"); - FW_ADDR_CHECK(gwa_ctl, d->gateway_ctrl_address, "gateway_ctrl_address"); + if (!wil_fw_addr_check(wil, &gwa_addr, d->gateway_addr_addr, 0, + "gateway_addr_addr") || + !wil_fw_addr_check(wil, &gwa_val, d->gateway_value_addr, 0, + "gateway_value_addr") || + !wil_fw_addr_check(wil, &gwa_cmd, d->gateway_cmd_addr, 0, + "gateway_cmd_addr") || + !wil_fw_addr_check(wil, &gwa_ctl, d->gateway_ctrl_address, 0, + "gateway_ctrl_address")) + return -EINVAL; wil_dbg_fw(wil, "gw addresses: addr 0x%08x val 0x%08x" " cmd 0x%08x ctl 0x%08x\n", @@ -378,12 +389,19 @@ static int fw_handle_gateway_data4(struct wil6210_priv *wil, const void *data, wil_dbg_fw(wil, "gw4 write record [%3d] blocks, cmd 0x%08x\n", n, gw_cmd); - FW_ADDR_CHECK(gwa_addr, d->gateway_addr_addr, "gateway_addr_addr"); + if (!wil_fw_addr_check(wil, &gwa_addr, d->gateway_addr_addr, 0, + "gateway_addr_addr")) + return -EINVAL; for (k = 0; k < ARRAY_SIZE(block->value); k++) - FW_ADDR_CHECK(gwa_val[k], d->gateway_value_addr[k], - "gateway_value_addr"); - FW_ADDR_CHECK(gwa_cmd, d->gateway_cmd_addr, "gateway_cmd_addr"); - FW_ADDR_CHECK(gwa_ctl, d->gateway_ctrl_address, "gateway_ctrl_address"); + if (!wil_fw_addr_check(wil, &gwa_val[k], + d->gateway_value_addr[k], + 0, "gateway_value_addr")) + return -EINVAL; + if (!wil_fw_addr_check(wil, &gwa_cmd, d->gateway_cmd_addr, 0, + "gateway_cmd_addr") || + !wil_fw_addr_check(wil, &gwa_ctl, d->gateway_ctrl_address, 0, + "gateway_ctrl_address")) + return -EINVAL; wil_dbg_fw(wil, "gw4 addresses: addr 0x%08x cmd 0x%08x ctl 0x%08x\n", le32_to_cpu(d->gateway_addr_addr), diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index c5b6b783100a..0bfd51adcc81 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -865,6 +865,7 @@ void wil_mbox_ring_le2cpus(struct wil6210_mbox_ring *r); int wil_find_cid(struct wil6210_priv *wil, const u8 *mac); void wil_set_ethtoolops(struct net_device *ndev); +void __iomem *wmi_buffer_block(struct wil6210_priv *wil, __le32 ptr, u32 size); void __iomem *wmi_buffer(struct wil6210_priv *wil, __le32 ptr); void __iomem *wmi_addr(struct wil6210_priv *wil, u32 ptr); int wmi_read_hdr(struct wil6210_priv *wil, __le32 ptr, diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 798516f42f2f..6cfb820caa3e 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -140,13 +140,15 @@ static u32 wmi_addr_remap(u32 x) /** * Check address validity for WMI buffer; remap if needed * @ptr - internal (linker) fw/ucode address + * @size - if non zero, validate the block does not + * exceed the device memory (bar) * * Valid buffer should be DWORD aligned * * return address for accessing buffer from the host; * if buffer is not valid, return NULL. */ -void __iomem *wmi_buffer(struct wil6210_priv *wil, __le32 ptr_) +void __iomem *wmi_buffer_block(struct wil6210_priv *wil, __le32 ptr_, u32 size) { u32 off; u32 ptr = le32_to_cpu(ptr_); @@ -161,10 +163,17 @@ void __iomem *wmi_buffer(struct wil6210_priv *wil, __le32 ptr_) off = HOSTADDR(ptr); if (off > wil->bar_size - 4) return NULL; + if (size && ((off + size > wil->bar_size) || (off + size < off))) + return NULL; return wil->csr + off; } +void __iomem *wmi_buffer(struct wil6210_priv *wil, __le32 ptr_) +{ + return wmi_buffer_block(wil, ptr_, 0); +} + /** * Check address validity */ -- GitLab From 1881ed10abc22cfbb4c8e82d03cacf5ba8249862 Mon Sep 17 00:00:00 2001 From: Lior David Date: Tue, 21 Apr 2020 13:40:13 +0100 Subject: [PATCH 1041/1278] wil6210: fix length check in __wmi_send [ Upstream commit 26a6d5274865532502c682ff378ac8ebe2886238 ] The current length check: sizeof(cmd) + len > r->entry_size will allow very large values of len (> U16_MAX - sizeof(cmd)) and can cause a buffer overflow. Fix the check to cover this case. In addition, ensure the mailbox entry_size is not too small, since this can also bypass the above check. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/interrupt.c | 22 +++++++++++++++++++- drivers/net/wireless/ath/wil6210/wmi.c | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 59def4f3fcf3..5cf341702dc1 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -358,6 +358,25 @@ static void wil_cache_mbox_regs(struct wil6210_priv *wil) wil_mbox_ring_le2cpus(&wil->mbox_ctl.tx); } +static bool wil_validate_mbox_regs(struct wil6210_priv *wil) +{ + size_t min_size = sizeof(struct wil6210_mbox_hdr) + + sizeof(struct wmi_cmd_hdr); + + if (wil->mbox_ctl.rx.entry_size < min_size) { + wil_err(wil, "rx mbox entry too small (%d)\n", + wil->mbox_ctl.rx.entry_size); + return false; + } + if (wil->mbox_ctl.tx.entry_size < min_size) { + wil_err(wil, "tx mbox entry too small (%d)\n", + wil->mbox_ctl.tx.entry_size); + return false; + } + + return true; +} + static irqreturn_t wil6210_irq_misc(int irq, void *cookie) { struct wil6210_priv *wil = cookie; @@ -393,7 +412,8 @@ static irqreturn_t wil6210_irq_misc(int irq, void *cookie) if (isr & ISR_MISC_FW_READY) { wil_dbg_irq(wil, "IRQ: FW ready\n"); wil_cache_mbox_regs(wil); - set_bit(wil_status_mbox_ready, wil->status); + if (wil_validate_mbox_regs(wil)) + set_bit(wil_status_mbox_ready, wil->status); /** * Actual FW ready indicated by the * WMI_FW_READY_EVENTID diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 6cfb820caa3e..22bfa10ea827 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -231,7 +231,7 @@ static int __wmi_send(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len) uint retry; int rc = 0; - if (sizeof(cmd) + len > r->entry_size) { + if (len > r->entry_size - sizeof(cmd)) { wil_err(wil, "WMI size too large: %d bytes, max is %d\n", (int)(sizeof(cmd) + len), r->entry_size); return -ERANGE; -- GitLab From cf4857e107acf1a9ff46084743248c28f150038f Mon Sep 17 00:00:00 2001 From: Hamad Kadmany Date: Tue, 21 Apr 2020 13:40:14 +0100 Subject: [PATCH 1042/1278] wil6210: abort properly in cfg suspend [ Upstream commit 144a12a6d83f3ca34ddefce5dee4d502afd2fc5b ] On-going operations were not aborted properly and required locks were not taken. Signed-off-by: Hamad Kadmany Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/cfg80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index c374ed311520..58784e77e215 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1735,9 +1735,12 @@ static int wil_cfg80211_suspend(struct wiphy *wiphy, wil_dbg_pm(wil, "suspending\n"); - wil_p2p_stop_discovery(wil); - + mutex_lock(&wil->mutex); + mutex_lock(&wil->p2p_wdev_mutex); + wil_p2p_stop_radio_operations(wil); wil_abort_scan(wil, true); + mutex_unlock(&wil->p2p_wdev_mutex); + mutex_unlock(&wil->mutex); out: return rc; -- GitLab From ef024894687611d7180fd8cbe6474583731f47d2 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Tue, 21 Apr 2020 13:40:15 +0100 Subject: [PATCH 1043/1278] soc: qcom: smem: Use le32_to_cpu for comparison [ Upstream commit a216000f0140f415cec96129f777b5234c9d142f ] Endianness can vary in the system, add le32_to_cpu when comparing partition sizes from smem. Signed-off-by: Chris Lew Acked-by: Bjorn Andersson Signed-off-by: Andy Gross Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/smem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c index 18ec52f2078a..89dd50fa404f 100644 --- a/drivers/soc/qcom/smem.c +++ b/drivers/soc/qcom/smem.c @@ -646,7 +646,7 @@ static int qcom_smem_enumerate_partitions(struct qcom_smem *smem, return -EINVAL; } - if (header->size != entry->size) { + if (le32_to_cpu(header->size) != le32_to_cpu(entry->size)) { dev_err(smem->dev, "Partition %d has invalid size\n", i); return -EINVAL; -- GitLab From c6c4c74a0fe1bbe0141311f1bcec9e175897fd99 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 21 Apr 2020 13:40:16 +0100 Subject: [PATCH 1044/1278] of: fix missing kobject init for !SYSFS && OF_DYNAMIC config [ Upstream commit bd82bbf38cbe27f2c65660da801900d71bcc5cc8 ] The ref counting is broken for OF_DYNAMIC when sysfs is disabled because the kobject initialization is skipped. Only the properties add/remove/update should be skipped for !SYSFS config. Tested-by: Nicolas Pitre Reviewed-by: Frank Rowand Acked-by: Grant Likely Signed-off-by: Rob Herring Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/of/base.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 41b254be0295..c0281be8e061 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -169,9 +169,6 @@ int __of_attach_node_sysfs(struct device_node *np) struct property *pp; int rc; - if (!IS_ENABLED(CONFIG_SYSFS)) - return 0; - if (!of_kset) return 0; -- GitLab From fce4bd5793775570e461fc5d2da9b47e12a62c74 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 13 Mar 2020 11:20:51 +0100 Subject: [PATCH 1045/1278] rbd: avoid a deadlock on header_rwsem when flushing notifies [ Upstream commit 0e4e1de5b63fa423b13593337a27fd2d2b0bcf77 ] rbd_unregister_watch() flushes notifies and therefore cannot be called under header_rwsem because a header update notify takes header_rwsem to synchronize with "rbd map". If mapping an image fails after the watch is established and a header update notify sneaks in, we deadlock when erroring out from rbd_dev_image_probe(). Move watch registration and unregistration out of the critical section. The only reason they were put there was to make header_rwsem management slightly more obvious. Fixes: 811c66887746 ("rbd: fix rbd map vs notify races") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Signed-off-by: Sasha Levin --- drivers/block/rbd.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f2b1994d58a0..fb1b9b8946f0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3847,6 +3847,10 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev) cancel_work_sync(&rbd_dev->unlock_work); } +/* + * header_rwsem must not be held to avoid a deadlock with + * rbd_dev_refresh() when flushing notifies. + */ static void rbd_unregister_watch(struct rbd_device *rbd_dev) { WARN_ON(waitqueue_active(&rbd_dev->lock_waitq)); @@ -6057,6 +6061,9 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) * device. If this image is the one being mapped (i.e., not a * parent), initiate a watch on its header object before using that * object to get detailed information about the rbd image. + * + * On success, returns with header_rwsem held for write if called + * with @depth == 0. */ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { @@ -6087,6 +6094,9 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) } } + if (!depth) + down_write(&rbd_dev->header_rwsem); + ret = rbd_dev_header_info(rbd_dev); if (ret) goto err_out_watch; @@ -6135,6 +6145,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: + if (!depth) + up_write(&rbd_dev->header_rwsem); if (!depth) rbd_unregister_watch(rbd_dev); err_out_format: @@ -6194,12 +6206,9 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_rbd_dev; } - down_write(&rbd_dev->header_rwsem); rc = rbd_dev_image_probe(rbd_dev, 0); - if (rc < 0) { - up_write(&rbd_dev->header_rwsem); + if (rc < 0) goto err_out_rbd_dev; - } /* If we are mapping a snapshot it must be marked read-only */ -- GitLab From f463b1273df7624f9a8a5d4de53e42207061b90c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Mar 2020 15:52:54 +0100 Subject: [PATCH 1046/1278] rbd: call rbd_dev_unprobe() after unwatching and flushing notifies [ Upstream commit 952c48b0ed18919bff7528501e9a3fff8a24f8cd ] rbd_dev_unprobe() is supposed to undo most of rbd_dev_image_probe(), including rbd_dev_header_info(), which means that rbd_dev_header_info() isn't supposed to be called after rbd_dev_unprobe(). However, rbd_dev_image_release() calls rbd_dev_unprobe() before rbd_unregister_watch(). This is racy because a header update notify can sneak in: "rbd unmap" thread ceph-watch-notify worker rbd_dev_image_release() rbd_dev_unprobe() free and zero out header rbd_watch_cb() rbd_dev_refresh() rbd_dev_header_info() read in header The same goes for "rbd map" because rbd_dev_image_probe() calls rbd_dev_unprobe() on errors. In both cases this results in a memory leak. Fixes: fd22aef8b47c ("rbd: move rbd_unregister_watch() call into rbd_dev_image_release()") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Signed-off-by: Sasha Levin --- drivers/block/rbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index fb1b9b8946f0..557cf52f674b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6048,9 +6048,10 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) static void rbd_dev_image_release(struct rbd_device *rbd_dev) { - rbd_dev_unprobe(rbd_dev); if (rbd_dev->opts) rbd_unregister_watch(rbd_dev); + + rbd_dev_unprobe(rbd_dev); rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -6099,7 +6100,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) ret = rbd_dev_header_info(rbd_dev); if (ret) - goto err_out_watch; + goto err_out_probe; /* * If this image is the one being mapped, we have pool name and @@ -6143,12 +6144,11 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return 0; err_out_probe: - rbd_dev_unprobe(rbd_dev); -err_out_watch: if (!depth) up_write(&rbd_dev->header_rwsem); if (!depth) rbd_unregister_watch(rbd_dev); + rbd_dev_unprobe(rbd_dev); err_out_format: rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); -- GitLab From b712f00a98c10b6bca4f89cbe5cd04189b1dae7a Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 16 Apr 2020 16:42:47 -0500 Subject: [PATCH 1047/1278] of: unittest: kmemleak in of_unittest_platform_populate() [ Upstream commit 216830d2413cc61be3f76bc02ffd905e47d2439e ] kmemleak reports several memory leaks from devicetree unittest. This is the fix for problem 2 of 5. of_unittest_platform_populate() left an elevated reference count for grandchild nodes (which are platform devices). Fix the platform device reference counts so that the memory will be freed. Fixes: fb2caa50fbac ("of/selftest: add testcase for nodes with same name and address") Reported-by: Erhard F. Signed-off-by: Frank Rowand Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 4bf6a9db6ac0..55c98f119df2 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -887,10 +887,13 @@ static void __init of_unittest_platform_populate(void) of_platform_populate(np, match, NULL, &test_bus->dev); for_each_child_of_node(np, child) { - for_each_child_of_node(child, grandchild) - unittest(of_find_device_by_node(grandchild), + for_each_child_of_node(child, grandchild) { + pdev = of_find_device_by_node(grandchild); + unittest(pdev, "Could not create device for node '%s'\n", grandchild->name); + of_dev_put(pdev); + } } of_platform_depopulate(&test_bus->dev); -- GitLab From eac4b10ec13112acc34bee8e02a8071857497a5b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 17 Jan 2020 13:36:46 +0200 Subject: [PATCH 1048/1278] clk: at91: usb: continue if clk_hw_round_rate() return zero [ Upstream commit b0ecf1c6c6e82da4847900fad0272abfd014666d ] clk_hw_round_rate() may call round rate function of its parents. In case of SAM9X60 two of USB parrents are PLLA and UPLL. These clocks are controlled by clk-sam9x60-pll.c driver. The round rate function for this driver is sam9x60_pll_round_rate() which call in turn sam9x60_pll_get_best_div_mul(). In case the requested rate is not in the proper range (rate < characteristics->output[0].min && rate > characteristics->output[0].max) the sam9x60_pll_round_rate() will return a negative number to its caller (called by clk_core_round_rate_nolock()). clk_hw_round_rate() will return zero in case a negative number is returned by clk_core_round_rate_nolock(). With this, the USB clock will continue its rate computation even caller of clk_hw_round_rate() returned an error. With this, the USB clock on SAM9X60 may not chose the best parent. I detected this after a suspend/resume cycle on SAM9X60. Signed-off-by: Claudiu Beznea Link: https://lkml.kernel.org/r/1579261009-4573-2-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/at91/clk-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c index 791770a563fc..6fac6383d024 100644 --- a/drivers/clk/at91/clk-usb.c +++ b/drivers/clk/at91/clk-usb.c @@ -78,6 +78,9 @@ static int at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, tmp_parent_rate = req->rate * div; tmp_parent_rate = clk_hw_round_rate(parent, tmp_parent_rate); + if (!tmp_parent_rate) + continue; + tmp_rate = DIV_ROUND_CLOSEST(tmp_parent_rate, div); if (tmp_rate < req->rate) tmp_diff = req->rate - tmp_rate; -- GitLab From c73c3e1d04e77daf7f89397b8f68f6c7b548aaf0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Mar 2020 00:51:43 +0300 Subject: [PATCH 1049/1278] power: supply: bq27xxx_battery: Silence deferred-probe error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 583b53ece0b0268c542a1eafadb62e3d4b0aab8c ] The driver fails to probe with -EPROBE_DEFER if battery's power supply (charger driver) isn't ready yet and this results in a bit noisy error message in KMSG during kernel's boot up. Let's silence the harmless error message. Signed-off-by: Dmitry Osipenko Reviewed-by: Andrew F. Davis Reviewed-by: Pali Rohár Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/bq27xxx_battery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 51f0961ecf3e..a7d8cadf172c 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -1842,7 +1842,10 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di) di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg); if (IS_ERR(di->bat)) { - dev_err(di->dev, "failed to register battery\n"); + if (PTR_ERR(di->bat) == -EPROBE_DEFER) + dev_dbg(di->dev, "failed to register battery, deferring probe\n"); + else + dev_err(di->dev, "failed to register battery\n"); return PTR_ERR(di->bat); } -- GitLab From 1fec728da800320099a7c6fd10e5e766379b043c Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 13 Jan 2020 23:24:09 -0800 Subject: [PATCH 1050/1278] clk: tegra: Fix Tegra PMC clock out parents [ Upstream commit 6fe38aa8cac3a5db38154331742835a4d9740788 ] Tegra PMC clocks clk_out_1, clk_out_2, and clk_out_3 supported parents are osc, osc_div2, osc_div4 and extern clock. Clock driver is using incorrect parents clk_m, clk_m_div2, clk_m_div4 for PMC clocks. This patch fixes this. Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra-pmc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clk/tegra/clk-tegra-pmc.c b/drivers/clk/tegra/clk-tegra-pmc.c index a35579a3f884..476dab494c44 100644 --- a/drivers/clk/tegra/clk-tegra-pmc.c +++ b/drivers/clk/tegra/clk-tegra-pmc.c @@ -60,16 +60,16 @@ struct pmc_clk_init_data { static DEFINE_SPINLOCK(clk_out_lock); -static const char *clk_out1_parents[] = { "clk_m", "clk_m_div2", - "clk_m_div4", "extern1", +static const char *clk_out1_parents[] = { "osc", "osc_div2", + "osc_div4", "extern1", }; -static const char *clk_out2_parents[] = { "clk_m", "clk_m_div2", - "clk_m_div4", "extern2", +static const char *clk_out2_parents[] = { "osc", "osc_div2", + "osc_div4", "extern2", }; -static const char *clk_out3_parents[] = { "clk_m", "clk_m_div2", - "clk_m_div4", "extern3", +static const char *clk_out3_parents[] = { "osc", "osc_div2", + "osc_div4", "extern3", }; static struct pmc_clk_init_data pmc_clks[] = { -- GitLab From 617dcdd3d27d0f5059bafe88d44750b13efc4e5c Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 13 Mar 2020 11:09:12 +0100 Subject: [PATCH 1051/1278] soc: imx: gpc: fix power up sequencing [ Upstream commit e0ea2d11f8a08ba7066ff897e16c5217215d1e68 ] Currently we wait only until the PGC inverts the isolation setting before disabling the peripheral clocks. This doesn't ensure that the reset is properly propagated through the peripheral devices in the power domain. Wait until the PGC signals that the power up request is done and wait a bit for resets to propagate before disabling the clocks. Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- drivers/soc/imx/gpc.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 3a12123de466..0e083fe8b893 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -97,8 +97,8 @@ static int imx6_pm_domain_power_off(struct generic_pm_domain *genpd) static int imx6_pm_domain_power_on(struct generic_pm_domain *genpd) { struct imx_pm_domain *pd = to_imx_pm_domain(genpd); - int i, ret, sw, sw2iso; - u32 val; + int i, ret; + u32 val, req; if (pd->supply) { ret = regulator_enable(pd->supply); @@ -117,17 +117,18 @@ static int imx6_pm_domain_power_on(struct generic_pm_domain *genpd) regmap_update_bits(pd->regmap, pd->reg_offs + GPC_PGC_CTRL_OFFS, 0x1, 0x1); - /* Read ISO and ISO2SW power up delays */ - regmap_read(pd->regmap, pd->reg_offs + GPC_PGC_PUPSCR_OFFS, &val); - sw = val & 0x3f; - sw2iso = (val >> 8) & 0x3f; - /* Request GPC to power up domain */ - val = BIT(pd->cntr_pdn_bit + 1); - regmap_update_bits(pd->regmap, GPC_CNTR, val, val); + req = BIT(pd->cntr_pdn_bit + 1); + regmap_update_bits(pd->regmap, GPC_CNTR, req, req); - /* Wait ISO + ISO2SW IPG clock cycles */ - udelay(DIV_ROUND_UP(sw + sw2iso, pd->ipg_rate_mhz)); + /* Wait for the PGC to handle the request */ + ret = regmap_read_poll_timeout(pd->regmap, GPC_CNTR, val, !(val & req), + 1, 50); + if (ret) + pr_err("powerup request on domain %s timed out\n", genpd->name); + + /* Wait for reset to propagate through peripherals */ + usleep_range(5, 10); /* Disable reset clocks for all devices in the domain */ for (i = 0; i < pd->num_clks; i++) @@ -329,6 +330,7 @@ static const struct regmap_config imx_gpc_regmap_config = { .rd_table = &access_table, .wr_table = &access_table, .max_register = 0x2ac, + .fast_io = true, }; static struct generic_pm_domain *imx_gpc_onecell_domains[] = { -- GitLab From 3eecbde9cbe23a1943feb935b82ba7f8fbb6a5a7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 11 Mar 2020 23:39:51 +0100 Subject: [PATCH 1052/1278] rtc: 88pm860x: fix possible race condition [ Upstream commit 9cf4789e6e4673d0b2c96fa6bb0c35e81b43111a ] The RTC IRQ is requested before the struct rtc_device is allocated, this may lead to a NULL pointer dereference in the IRQ handler. To fix this issue, allocating the rtc_device struct before requesting the RTC IRQ using devm_rtc_allocate_device, and use rtc_register_device to register the RTC device. Also remove the unnecessary error message as the core already prints the info. Link: https://lore.kernel.org/r/20200311223956.51352-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-88pm860x.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 7d3e5168fcef..efbbde7379f1 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -341,6 +341,10 @@ static int pm860x_rtc_probe(struct platform_device *pdev) info->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, info); + info->rtc_dev = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(info->rtc_dev)) + return PTR_ERR(info->rtc_dev); + ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, rtc_update_handler, IRQF_ONESHOT, "rtc", info); @@ -382,13 +386,11 @@ static int pm860x_rtc_probe(struct platform_device *pdev) } } - info->rtc_dev = devm_rtc_device_register(&pdev->dev, "88pm860x-rtc", - &pm860x_rtc_ops, THIS_MODULE); - ret = PTR_ERR(info->rtc_dev); - if (IS_ERR(info->rtc_dev)) { - dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); + info->rtc_dev->ops = &pm860x_rtc_ops; + + ret = rtc_register_device(info->rtc_dev); + if (ret) return ret; - } /* * enable internal XO instead of internal 3.25MHz clock since it can -- GitLab From 0fd0b83ac7bc47678663a79c67394d558cc598f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Feb 2020 11:01:12 -0500 Subject: [PATCH 1053/1278] NFSv4/pnfs: Return valid stateids in nfs_layout_find_inode_by_stateid() [ Upstream commit d911c57a19551c6bef116a3b55c6b089901aacb0 ] Make sure to test the stateid for validity so that we catch instances where the server may have been reusing stateids in nfs_layout_find_inode_by_stateid(). Fixes: 7b410d9ce460 ("pNFS: Delay getting the layout header in CB_LAYOUTRECALL handlers") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/callback_proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b8d55da2f04d..440ff8e7082b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -127,6 +127,8 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!pnfs_layout_is_valid(lo)) + continue; if (stateid != NULL && !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) continue; -- GitLab From da186a08b3d1165ea482dcbe4ba5343c46ca20a8 Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Wed, 28 Aug 2019 17:01:22 +0900 Subject: [PATCH 1054/1278] NFS: direct.c: Fix memory leak of dreq when nfs_get_lock_context fails [ Upstream commit 8605cf0e852af3b2c771c18417499dc4ceed03d5 ] When dreq is allocated by nfs_direct_req_alloc(), dreq->kref is initialized to 2. Therefore we need to call nfs_direct_req_release() twice to release the allocated dreq. Usually it is called in nfs_file_direct_{read, write}() and nfs_direct_complete(). However, current code only calls nfs_direct_req_relese() once if nfs_get_lock_context() fails in nfs_file_direct_{read, write}(). So, that case would result in memory leak. Fix this by adding the missing call. Signed-off-by: Misono Tomohiro Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/direct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9d07b53e1647..e6ea4511c41c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -600,6 +600,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { result = PTR_ERR(l_ctx); + nfs_direct_req_release(dreq); goto out_release; } dreq->l_ctx = l_ctx; @@ -1023,6 +1024,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { result = PTR_ERR(l_ctx); + nfs_direct_req_release(dreq); goto out_release; } dreq->l_ctx = l_ctx; -- GitLab From 77ca252e4cfbd21a46ee930b65cffa1ac265078b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 16 Mar 2020 12:39:55 +0100 Subject: [PATCH 1055/1278] s390/cpuinfo: fix wrong output when CPU0 is offline [ Upstream commit 872f27103874a73783aeff2aac2b41a489f67d7c ] /proc/cpuinfo should not print information about CPU 0 when it is offline. Fixes: 281eaa8cb67c ("s390/cpuinfo: simplify locking and skip offline cpus early") Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: shortened commit message] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/processor.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 6fe2e1875058..675d4be0c2b7 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -157,8 +157,9 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned long first = cpumask_first(cpu_online_mask); - if (!n) + if (n == first) show_cpu_summary(m, v); if (!machine_has_cpu_mhz) return 0; @@ -171,6 +172,8 @@ static inline void *c_update(loff_t *pos) { if (*pos) *pos = cpumask_next(*pos - 1, cpu_online_mask); + else + *pos = cpumask_first(cpu_online_mask); return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; } -- GitLab From 66385f1113b9ecf282c625b79c26dd513ab36988 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 23 Mar 2020 15:27:29 -0700 Subject: [PATCH 1056/1278] powerpc/maple: Fix declaration made after definition [ Upstream commit af6cf95c4d003fccd6c2ecc99a598fb854b537e7 ] When building ppc64 defconfig, Clang errors (trimmed for brevity): arch/powerpc/platforms/maple/setup.c:365:1: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] machine_device_initcall(maple, maple_cpc925_edac_setup); ^ machine_device_initcall expands to __define_machine_initcall, which in turn has the macro machine_is used in it, which declares mach_##name with an __attribute__((weak)). define_machine actually defines mach_##name, which in this file happens before the declaration, hence the warning. To fix this, move define_machine after machine_device_initcall so that the declaration occurs before the definition, which matches how machine_device_initcall and define_machine work throughout arch/powerpc. While we're here, remove some spaces before tabs. Fixes: 8f101a051ef0 ("edac: cpc925 MC platform device setup") Reported-by: Nick Desaulniers Suggested-by: Ilie Halip Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200323222729.15365-1-natechancellor@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/maple/setup.c | 34 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index b7f937563827..d1fee2d35b49 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -299,23 +299,6 @@ static int __init maple_probe(void) return 1; } -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = maple_progress, - .power_save = power4_idle, -}; - #ifdef CONFIG_EDAC /* * Register a platform device for CPC925 memory controller on @@ -372,3 +355,20 @@ static int __init maple_cpc925_edac_setup(void) } machine_device_initcall(maple, maple_cpc925_edac_setup); #endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = maple_progress, + .power_save = power4_idle, +}; -- GitLab From f8c6f4104f44a885237e56b8d6a4d1ee0c417b76 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 18 Mar 2020 14:19:38 -0500 Subject: [PATCH 1057/1278] ext4: do not commit super on read-only bdev [ Upstream commit c96e2b8564adfb8ac14469ebc51ddc1bfecb3ae2 ] Under some circumstances we may encounter a filesystem error on a read-only block device, and if we try to save the error info to the superblock and commit it, we'll wind up with a noisy error and backtrace, i.e.: [ 3337.146838] EXT4-fs error (device pmem1p2): ext4_get_journal_inode:4634: comm mount: inode #0: comm mount: iget: illegal inode # ------------[ cut here ]------------ generic_make_request: Trying to write to read-only block-device pmem1p2 (partno 2) WARNING: CPU: 107 PID: 115347 at block/blk-core.c:788 generic_make_request_checks+0x6b4/0x7d0 ... To avoid this, commit the error info in the superblock only if the block device is writable. Reported-by: Ritesh Harjani Signed-off-by: Eric Sandeen Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/4b6e774d-cc00-3469-7abb-108eb151071a@sandeen.net Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 98e27432c859..0ced133a36ec 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -369,7 +369,8 @@ static void save_error_info(struct super_block *sb, const char *func, unsigned int line) { __save_error_info(sb, func, line); - ext4_commit_super(sb, 1); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); } /* -- GitLab From 17336dd3138049aaba7e152c7a65c6dc966e820c Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 6 Apr 2020 20:08:43 -0700 Subject: [PATCH 1058/1278] include/linux/swapops.h: correct guards for non_swap_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f3673d7d324d872d9d8ddb73b3e5e47fbf12e0d ] If CONFIG_DEVICE_PRIVATE is defined, but neither CONFIG_MEMORY_FAILURE nor CONFIG_MIGRATION, then non_swap_entry() will return 0, meaning that the condition (non_swap_entry(entry) && is_device_private_entry(entry)) in zap_pte_range() will never be true even if the entry is a device private one. Equally any other code depending on non_swap_entry() will not function as expected. I originally spotted this just by looking at the code, I haven't actually observed any problems. Looking a bit more closely it appears that actually this situation (currently at least) cannot occur: DEVICE_PRIVATE depends on ZONE_DEVICE ZONE_DEVICE depends on MEMORY_HOTREMOVE MEMORY_HOTREMOVE depends on MIGRATION Fixes: 5042db43cc26 ("mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory") Signed-off-by: Steven Price Signed-off-by: Andrew Morton Cc: Jérôme Glisse Cc: Arnd Bergmann Cc: Dan Williams Cc: John Hubbard Link: http://lkml.kernel.org/r/20200305130550.22693-1-steven.price@arm.com Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/swapops.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 1d3877c39a00..0b8c86096752 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -377,7 +377,8 @@ static inline void num_poisoned_pages_inc(void) } #endif -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \ + defined(CONFIG_DEVICE_PRIVATE) static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; -- GitLab From 6b9da6edc015cd5c858dac00aabc6e6fe33cf0fc Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 6 Apr 2020 20:10:25 -0700 Subject: [PATCH 1059/1278] percpu_counter: fix a data race at vm_committed_as [ Upstream commit 7e2345200262e4a6056580f0231cccdaffc825f3 ] "vm_committed_as.count" could be accessed concurrently as reported by KCSAN, BUG: KCSAN: data-race in __vm_enough_memory / percpu_counter_add_batch write to 0xffffffff9451c538 of 8 bytes by task 65879 on cpu 35: percpu_counter_add_batch+0x83/0xd0 percpu_counter_add_batch at lib/percpu_counter.c:91 __vm_enough_memory+0xb9/0x260 dup_mm+0x3a4/0x8f0 copy_process+0x2458/0x3240 _do_fork+0xaa/0x9f0 __do_sys_clone+0x125/0x160 __x64_sys_clone+0x70/0x90 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffffffff9451c538 of 8 bytes by task 66773 on cpu 19: __vm_enough_memory+0x199/0x260 percpu_counter_read_positive at include/linux/percpu_counter.h:81 (inlined by) __vm_enough_memory at mm/util.c:839 mmap_region+0x1b2/0xa10 do_mmap+0x45c/0x700 vm_mmap_pgoff+0xc0/0x130 ksys_mmap_pgoff+0x6e/0x300 __x64_sys_mmap+0x33/0x40 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe The read is outside percpu_counter::lock critical section which results in a data race. Fix it by adding a READ_ONCE() in percpu_counter_read_positive() which could also service as the existing compiler memory barrier. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Marco Elver Link: http://lkml.kernel.org/r/1582302724-2804-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/percpu_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 73a7bf30fe9a..3f3cece31148 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -78,9 +78,9 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc) */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { - s64 ret = fbc->count; + /* Prevent reloads of fbc->count */ + s64 ret = READ_ONCE(fbc->count); - barrier(); /* Prevent reloads of fbc->count */ if (ret >= 0) return ret; return 0; -- GitLab From b3ee5ef07eccf70b8817a9763d2ae3d728701d27 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 6 Apr 2020 20:09:37 -0700 Subject: [PATCH 1060/1278] compiler.h: fix error in BUILD_BUG_ON() reporting [ Upstream commit af9c5d2e3b355854ff0e4acfbfbfadcd5198a349 ] compiletime_assert() uses __LINE__ to create a unique function name. This means that if you have more than one BUILD_BUG_ON() in the same source line (which can happen if they appear e.g. in a macro), then the error message from the compiler might output the wrong condition. For this source file: #include #define macro() \ BUILD_BUG_ON(1); \ BUILD_BUG_ON(0); void foo() { macro(); } gcc would output: ./include/linux/compiler.h:350:38: error: call to `__compiletime_assert_9' declared with attribute error: BUILD_BUG_ON failed: 0 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) However, it was not the BUILD_BUG_ON(0) that failed, so it should say 1 instead of 0. With this patch, we use __COUNTER__ instead of __LINE__, so each BUILD_BUG_ON() gets a different function name and the correct condition is printed: ./include/linux/compiler.h:350:38: error: call to `__compiletime_assert_0' declared with attribute error: BUILD_BUG_ON failed: 1 _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) Signed-off-by: Vegard Nossum Signed-off-by: Andrew Morton Reviewed-by: Masahiro Yamada Reviewed-by: Daniel Santos Cc: Rasmus Villemoes Cc: Ian Abbott Cc: Joe Perches Link: http://lkml.kernel.org/r/20200331112637.25047-1-vegard.nossum@oracle.com Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f84d332085c3..3ffe3f3f7903 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -331,7 +331,7 @@ unsigned long read_word_at_a_time(const void *addr) * compiler has support to do so. */ #define compiletime_assert(condition, msg) \ - _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) #define compiletime_assert_atomic_type(t) \ compiletime_assert(__native_word(t), \ -- GitLab From 19e4e3ddc8d1c762c8dd45f31ca1fb7ed305842b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 3 Apr 2020 17:30:48 +0200 Subject: [PATCH 1061/1278] KVM: s390: vsie: Fix possible race when shadowing region 3 tables [ Upstream commit 1493e0f944f3c319d11e067c185c904d01c17ae5 ] We have to properly retry again by returning -EINVAL immediately in case somebody else instantiated the table concurrently. We missed to add the goto in this function only. The code now matches the other, similar shadowing functions. We are overwriting an existing region 2 table entry. All allocated pages are added to the crst_list to be freed later, so they are not lost forever. However, when unshadowing the region 2 table, we wouldn't trigger unshadowing of the original shadowed region 3 table that we replaced. It would get unshadowed when the original region 3 table is modified. As it's not connected to the page table hierarchy anymore, it's not going to get used anymore. However, for a limited time, this page table will stick around, so it's in some sense a temporary memory leak. Identified by manual code inspection. I don't think this classifies as stable material. Fixes: 998f637cc4b9 ("s390/mm: avoid races on region/segment/page table shadowing") Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200403153050.20569-4-david@redhat.com Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Sasha Levin --- arch/s390/mm/gmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index e297efa6e648..a29d2e88b00e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1687,6 +1687,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, goto out_free; } else if (*table & _REGION_ENTRY_ORIGIN) { rc = -EAGAIN; /* Race with shadow */ + goto out_free; } crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ -- GitLab From 8f0f235c52494fa43305aa659fe309bd6bc29767 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 3 Apr 2020 10:03:45 -0400 Subject: [PATCH 1062/1278] x86: ACPI: fix CPU hotplug deadlock [ Upstream commit 696ac2e3bf267f5a2b2ed7d34e64131f2287d0ad ] Similar to commit 0266d81e9bf5 ("acpi/processor: Prevent cpu hotplug deadlock") except this is for acpi_processor_ffh_cstate_probe(): "The problem is that the work is scheduled on the current CPU from the hotplug thread associated with that CPU. It's not required to invoke these functions via the workqueue because the hotplug thread runs on the target CPU already. Check whether current is a per cpu thread pinned on the target CPU and invoke the function directly to avoid the workqueue." WARNING: possible circular locking dependency detected ------------------------------------------------------ cpuhp/1/15 is trying to acquire lock: ffffc90003447a28 ((work_completion)(&wfc.work)){+.+.}-{0:0}, at: __flush_work+0x4c6/0x630 but task is already holding lock: ffffffffafa1c0e8 (cpuidle_lock){+.+.}-{3:3}, at: cpuidle_pause_and_lock+0x17/0x20 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (cpu_hotplug_lock){++++}-{0:0}: cpus_read_lock+0x3e/0xc0 irq_calc_affinity_vectors+0x5f/0x91 __pci_enable_msix_range+0x10f/0x9a0 pci_alloc_irq_vectors_affinity+0x13e/0x1f0 pci_alloc_irq_vectors_affinity at drivers/pci/msi.c:1208 pqi_ctrl_init+0x72f/0x1618 [smartpqi] pqi_pci_probe.cold.63+0x882/0x892 [smartpqi] local_pci_probe+0x7a/0xc0 work_for_cpu_fn+0x2e/0x50 process_one_work+0x57e/0xb90 worker_thread+0x363/0x5b0 kthread+0x1f4/0x220 ret_from_fork+0x27/0x50 -> #0 ((work_completion)(&wfc.work)){+.+.}-{0:0}: __lock_acquire+0x2244/0x32a0 lock_acquire+0x1a2/0x680 __flush_work+0x4e6/0x630 work_on_cpu+0x114/0x160 acpi_processor_ffh_cstate_probe+0x129/0x250 acpi_processor_evaluate_cst+0x4c8/0x580 acpi_processor_get_power_info+0x86/0x740 acpi_processor_hotplug+0xc3/0x140 acpi_soft_cpu_online+0x102/0x1d0 cpuhp_invoke_callback+0x197/0x1120 cpuhp_thread_fun+0x252/0x2f0 smpboot_thread_fn+0x255/0x440 kthread+0x1f4/0x220 ret_from_fork+0x27/0x50 other info that might help us debug this: Chain exists of: (work_completion)(&wfc.work) --> cpuhp_state-up --> cpuidle_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpuidle_lock); lock(cpuhp_state-up); lock(cpuidle_lock); lock((work_completion)(&wfc.work)); *** DEADLOCK *** 3 locks held by cpuhp/1/15: #0: ffffffffaf51ab10 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x69/0x2f0 #1: ffffffffaf51ad40 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x69/0x2f0 #2: ffffffffafa1c0e8 (cpuidle_lock){+.+.}-{3:3}, at: cpuidle_pause_and_lock+0x17/0x20 Call Trace: dump_stack+0xa0/0xea print_circular_bug.cold.52+0x147/0x14c check_noncircular+0x295/0x2d0 __lock_acquire+0x2244/0x32a0 lock_acquire+0x1a2/0x680 __flush_work+0x4e6/0x630 work_on_cpu+0x114/0x160 acpi_processor_ffh_cstate_probe+0x129/0x250 acpi_processor_evaluate_cst+0x4c8/0x580 acpi_processor_get_power_info+0x86/0x740 acpi_processor_hotplug+0xc3/0x140 acpi_soft_cpu_online+0x102/0x1d0 cpuhp_invoke_callback+0x197/0x1120 cpuhp_thread_fun+0x252/0x2f0 smpboot_thread_fn+0x255/0x440 kthread+0x1f4/0x220 ret_from_fork+0x27/0x50 Signed-off-by: Qian Cai Tested-by: Borislav Petkov [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- arch/x86/kernel/acpi/cstate.c | 3 ++- drivers/acpi/processor_throttling.c | 7 ------- include/acpi/processor.h | 8 ++++++++ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index dde437f5d14f..596e7640d895 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -133,7 +133,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx, + false); if (retval == 0) { /* Use the hint in CST */ percpu_entry->states[cx->index].eax = cx->address; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 7f9aff4b8d62..9fdc13a2f2d5 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -909,13 +909,6 @@ static long __acpi_processor_get_throttling(void *data) return pr->throttling.acpi_processor_get_throttling(pr); } -static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct) -{ - if (direct || (is_percpu_thread() && cpu == smp_processor_id())) - return fn(arg); - return work_on_cpu(cpu, fn, arg); -} - static int acpi_processor_get_throttling(struct acpi_processor *pr) { if (!pr) diff --git a/include/acpi/processor.h b/include/acpi/processor.h index d591bb77f592..f4bff2313547 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -291,6 +291,14 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx } #endif +static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg, + bool direct) +{ + if (direct || (is_percpu_thread() && cpu == smp_processor_id())) + return fn(arg); + return work_on_cpu(cpu, fn, arg); +} + /* in processor_perflib.c */ #ifdef CONFIG_CPU_FREQ -- GitLab From cb8ce25d1f09809715886de491924507e516e0c7 Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Wed, 1 Apr 2020 20:06:58 +0800 Subject: [PATCH 1063/1278] drm/amdkfd: kfree the wrong pointer [ Upstream commit 3148a6a0ef3cf93570f30a477292768f7eb5d3c3 ] Originally, it kfrees the wrong pointer for mem_obj. It would cause memory leak under stress test. Signed-off-by: Jack Zhang Acked-by: Nirmoy Das Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 61fff25b4ce7..ecd4eba221c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -550,9 +550,9 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, return 0; kfd_gtt_no_free_chunk: - pr_debug("Allocation failed with mem_obj = %p\n", mem_obj); + pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); mutex_unlock(&kfd->gtt_sa_lock); - kfree(mem_obj); + kfree(*mem_obj); return -ENOMEM; } -- GitLab From af8a66de65efe29be42f0726f477fff2e7bc1560 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Mar 2020 20:06:45 -0400 Subject: [PATCH 1064/1278] NFS: Fix memory leaks in nfs_pageio_stop_mirroring() [ Upstream commit 862f35c94730c9270833f3ad05bd758a29f204ed ] If we just set the mirror count to 1 without first clearing out the mirrors, we can leak queued up requests. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/pagelist.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ceb6892d9bbd..7c01936be7c7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -864,15 +864,6 @@ static void nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, pgio->pg_mirror_count = mirror_count; } -/* - * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) - */ -void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) -{ - pgio->pg_mirror_count = 1; - pgio->pg_mirror_idx = 0; -} - static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) { pgio->pg_mirror_count = 1; @@ -1301,6 +1292,14 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } +/* + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) + */ +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) +{ + nfs_pageio_complete(pgio); +} + int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", -- GitLab From c49f66238e53f4b9f208ef6bf879b2688e28d6f7 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 19 Mar 2020 21:32:30 -0700 Subject: [PATCH 1065/1278] iommu/vt-d: Fix mm reference leak [ Upstream commit 902baf61adf6b187f0a6b789e70d788ea71ff5bc ] Move canonical address check before mmget_not_zero() to avoid mm reference leak. Fixes: 9d8c3af31607 ("iommu/vt-d: IOMMU Page Request needs to check if address is canonical.") Signed-off-by: Jacob Pan Acked-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-svm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f5573bb9f450..837459762eb3 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -613,14 +613,15 @@ static irqreturn_t prq_event_thread(int irq, void *d) * any faults on kernel addresses. */ if (!svm->mm) goto bad_req; - /* If the mm is already defunct, don't handle faults. */ - if (!mmget_not_zero(svm->mm)) - goto bad_req; /* If address is not canonical, return invalid response */ if (!is_canonical_address(address)) goto bad_req; + /* If the mm is already defunct, don't handle faults. */ + if (!mmget_not_zero(svm->mm)) + goto bad_req; + down_read(&svm->mm->mmap_sem); vma = find_extend_vma(svm->mm, address); if (!vma || address < vma->vm_start) -- GitLab From 7ca6f11ed16fa248b3e00d6a39470f9e4f5973d0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 22 Mar 2020 19:45:41 -0700 Subject: [PATCH 1066/1278] ext2: fix empty body warnings when -Wextra is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 44a52022e7f15cbaab957df1c14f7a4f527ef7cf ] When EXT2_ATTR_DEBUG is not defined, modify the 2 debug macros to use the no_printk() macro instead of . This fixes gcc warnings when -Wextra is used: ../fs/ext2/xattr.c:252:42: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] ../fs/ext2/xattr.c:258:42: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] ../fs/ext2/xattr.c:330:42: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] ../fs/ext2/xattr.c:872:45: warning: suggest braces around empty body in an ‘else’ statement [-Wempty-body] I have verified that the only object code change (with gcc 7.5.0) is the reversal of some instructions from 'cmp a,b' to 'cmp b,a'. Link: https://lore.kernel.org/r/e18a7395-61fb-2093-18e8-ed4f8cf56248@infradead.org Signed-off-by: Randy Dunlap Cc: Jan Kara Cc: linux-ext4@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/ext2/xattr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index dd8f10db82e9..4439bfaf1c57 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -56,6 +56,7 @@ #include #include +#include #include #include #include @@ -84,8 +85,8 @@ printk("\n"); \ } while (0) #else -# define ea_idebug(f...) -# define ea_bdebug(f...) +# define ea_idebug(inode, f...) no_printk(f) +# define ea_bdebug(bh, f...) no_printk(f) #endif static int ext2_xattr_set2(struct inode *, struct buffer_head *, -- GitLab From b2c5f60ccc6f140879d4a763aaa6c54a46b3c37a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 17 Mar 2020 12:40:02 +0100 Subject: [PATCH 1067/1278] ext2: fix debug reference to ext2_xattr_cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 32302085a8d90859c40cf1a5e8313f575d06ec75 ] Fix a debug-only build error in ext2/xattr.c: When building without extra debugging, (and with another patch that uses no_printk() instead of for the ext2-xattr debug-print macros, this build error happens: ../fs/ext2/xattr.c: In function ‘ext2_xattr_cache_insert’: ../fs/ext2/xattr.c:869:18: error: ‘ext2_xattr_cache’ undeclared (first use in this function); did you mean ‘ext2_xattr_list’? atomic_read(&ext2_xattr_cache->c_entry_count)); Fix the problem by removing cached entry count from the debug message since otherwise we'd have to export the mbcache structure just for that. Fixes: be0726d33cb8 ("ext2: convert to mbcache2") Reported-by: Randy Dunlap Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/ext2/xattr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 4439bfaf1c57..bd1d68ff3a9f 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -839,8 +839,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh) error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1); if (error) { if (error == -EBUSY) { - ea_bdebug(bh, "already in cache (%d cache entries)", - atomic_read(&ext2_xattr_cache->c_entry_count)); + ea_bdebug(bh, "already in cache"); error = 0; } } else -- GitLab From 483af85a4e71fa0bd7a41e9666683cc94b1ccf0f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 25 Feb 2020 19:20:56 +0300 Subject: [PATCH 1068/1278] libnvdimm: Out of bounds read in __nd_ioctl() [ Upstream commit f84afbdd3a9e5e10633695677b95422572f920dc ] The "cmd" comes from the user and it can be up to 255. It it's more than the number of bits in long, it results out of bounds read when we check test_bit(cmd, &cmd_mask). The highest valid value for "cmd" is ND_CMD_CALL (10) so I added a compare against that. Fixes: 62232e45f4a2 ("libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200225162055.amtosfy7m35aivxg@kili.mountain Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/nvdimm/bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2f1b54fab399..83e18b367944 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -951,8 +951,10 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, return -EFAULT; } - if (!desc || (desc->out_num + desc->in_num == 0) || - !test_bit(cmd, &cmd_mask)) + if (!desc || + (desc->out_num + desc->in_num == 0) || + cmd > ND_CMD_CALL || + !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* fail write commands (when read-only) */ -- GitLab From 3b098eb7486fa87445ac3f8332986b5a50bd0822 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Fri, 14 Feb 2020 18:44:51 +0800 Subject: [PATCH 1069/1278] iommu/amd: Fix the configuration of GCR3 table root pointer [ Upstream commit c20f36534666e37858a14e591114d93cc1be0d34 ] The SPA of the GCR3 table root pointer[51:31] masks 20 bits. However, this requires 21 bits (Please see the AMD IOMMU specification). This leads to the potential failure when the bit 51 of SPA of the GCR3 table root pointer is 1'. Signed-off-by: Adrian Huang Fixes: 52815b75682e2 ("iommu/amd: Add support for IOMMUv2 domain mode") Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 3054c0971759..74c8638aac2b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -348,7 +348,7 @@ #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) -#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL) +#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) #define DTE_GCR3_INDEX_A 0 #define DTE_GCR3_INDEX_B 1 -- GitLab From c6ca2a69114bd1daeb733d2b81ea8a248739fb58 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Mar 2020 14:38:46 -0700 Subject: [PATCH 1070/1278] net: dsa: bcm_sf2: Fix overflow checks commit d0802dc411f469569a537283b6f3833af47aece9 upstream. Commit f949a12fd697 ("net: dsa: bcm_sf2: fix buffer overflow doing set_rxnfc") tried to fix the some user controlled buffer overflows in bcm_sf2_cfp_rule_set() and bcm_sf2_cfp_rule_del() but the fix was using CFP_NUM_RULES, which while it is correct not to overflow the bitmaps, is not representative of what the device actually supports. Correct that by using bcm_sf2_cfp_rule_size() instead. The latter subtracts the number of rules by 1, so change the checks from greater than or equal to greater than accordingly. Fixes: f949a12fd697 ("net: dsa: bcm_sf2: fix buffer overflow doing set_rxnfc") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2_cfp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 7f8d269dd75a..814618c0b632 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -130,17 +130,14 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, (fs->m_ext.vlan_etype || fs->m_ext.data[1])) return -EINVAL; - if (fs->location != RX_CLS_LOC_ANY && fs->location >= CFP_NUM_RULES) + if (fs->location != RX_CLS_LOC_ANY && + fs->location > bcm_sf2_cfp_rule_size(priv)) return -EINVAL; if (fs->location != RX_CLS_LOC_ANY && test_bit(fs->location, priv->cfp.used)) return -EBUSY; - if (fs->location != RX_CLS_LOC_ANY && - fs->location > bcm_sf2_cfp_rule_size(priv)) - return -EINVAL; - ip_frag = be32_to_cpu(fs->m_ext.data[0]); /* We do not support discarding packets, check that the @@ -333,7 +330,7 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, int ret; u32 reg; - if (loc >= CFP_NUM_RULES) + if (loc > bcm_sf2_cfp_rule_size(priv)) return -EINVAL; /* Refuse deletion of unused rules, and the default reserved rule */ -- GitLab From 4ea3913b19b6d99058f71f4f58c9b7f95e618087 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 13 Jan 2020 14:08:14 +0300 Subject: [PATCH 1071/1278] fbdev: potential information leak in do_fb_ioctl() commit d3d19d6fc5736a798b118971935ce274f7deaa82 upstream. The "fix" struct has a 2 byte hole after ->ywrapstep and the "fix = info->fix;" assignment doesn't necessarily clear it. It depends on the compiler. The solution is just to replace the assignment with an memcpy(). Fixes: 1f5e31d7e55a ("fbmem: don't call copy_from/to_user() with mutex held") Signed-off-by: Dan Carpenter Cc: Andrew Morton Cc: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Andrea Righi Cc: Daniel Vetter Cc: Sam Ravnborg Cc: Maarten Lankhorst Cc: Daniel Thompson Cc: Peter Rosin Cc: Jani Nikula Cc: Gerd Hoffmann Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20200113100132.ixpaymordi24n3av@kili.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 83961a22bef1..07dcf687a52b 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1134,7 +1134,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, case FBIOGET_FSCREENINFO: if (!lock_fb_info(info)) return -ENODEV; - fix = info->fix; + memcpy(&fix, &info->fix, sizeof(fix)); unlock_fb_info(info); ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0; -- GitLab From bcc576e551a4af3d6b340fc2d96385798d5e3997 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 9 Jan 2020 18:39:12 +1100 Subject: [PATCH 1072/1278] tty: evh_bytechan: Fix out of bounds accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3670664b5da555a2a481449b3baafff113b0ac35 upstream. ev_byte_channel_send() assumes that its third argument is a 16 byte array. Some places where it is called it may not be (or we can't easily tell if it is). Newer compilers have started producing warnings about this, so make sure we actually pass a 16 byte array. There may be more elegant solutions to this, but the driver is quite old and hasn't been updated in many years. The warnings (from a powerpc allyesconfig build) are: In file included from include/linux/byteorder/big_endian.h:5, from arch/powerpc/include/uapi/asm/byteorder.h:14, from include/asm-generic/bitops/le.h:6, from arch/powerpc/include/asm/bitops.h:250, from include/linux/bitops.h:29, from include/linux/kernel.h:12, from include/asm-generic/bug.h:19, from arch/powerpc/include/asm/bug.h:109, from include/linux/bug.h:5, from include/linux/mmdebug.h:5, from include/linux/gfp.h:5, from include/linux/slab.h:15, from drivers/tty/ehv_bytechan.c:24: drivers/tty/ehv_bytechan.c: In function ‘ehv_bc_udbg_putc’: arch/powerpc/include/asm/epapr_hcalls.h:298:20: warning: array subscript 1 is outside array bounds of ‘const char[1]’ [-Warray-bounds] 298 | r6 = be32_to_cpu(p[1]); include/uapi/linux/byteorder/big_endian.h:40:51: note: in definition of macro ‘__be32_to_cpu’ 40 | #define __be32_to_cpu(x) ((__force __u32)(__be32)(x)) | ^ arch/powerpc/include/asm/epapr_hcalls.h:298:7: note: in expansion of macro ‘be32_to_cpu’ 298 | r6 = be32_to_cpu(p[1]); | ^~~~~~~~~~~ drivers/tty/ehv_bytechan.c:166:13: note: while referencing ‘data’ 166 | static void ehv_bc_udbg_putc(char c) | ^~~~~~~~~~~~~~~~ Fixes: dcd83aaff1c8 ("tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver") Signed-off-by: Stephen Rothwell Tested-by: Laurentiu Tudor [mpe: Trim warnings from change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200109183912.5fcb52aa@canb.auug.org.au Signed-off-by: Greg Kroah-Hartman --- drivers/tty/ehv_bytechan.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index a1c7125cb968..5a348efb91ad 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -139,6 +139,21 @@ static int find_console_handle(void) return 1; } +static unsigned int local_ev_byte_channel_send(unsigned int handle, + unsigned int *count, + const char *p) +{ + char buffer[EV_BYTE_CHANNEL_MAX_BYTES]; + unsigned int c = *count; + + if (c < sizeof(buffer)) { + memcpy(buffer, p, c); + memset(&buffer[c], 0, sizeof(buffer) - c); + p = buffer; + } + return ev_byte_channel_send(handle, count, p); +} + /*************************** EARLY CONSOLE DRIVER ***************************/ #ifdef CONFIG_PPC_EARLY_DEBUG_EHV_BC @@ -157,7 +172,7 @@ static void byte_channel_spin_send(const char data) do { count = 1; - ret = ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, + ret = local_ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, &count, &data); } while (ret == EV_EAGAIN); } @@ -224,7 +239,7 @@ static int ehv_bc_console_byte_channel_send(unsigned int handle, const char *s, while (count) { len = min_t(unsigned int, count, EV_BYTE_CHANNEL_MAX_BYTES); do { - ret = ev_byte_channel_send(handle, &len, s); + ret = local_ev_byte_channel_send(handle, &len, s); } while (ret == EV_EAGAIN); count -= len; s += len; @@ -404,7 +419,7 @@ static void ehv_bc_tx_dequeue(struct ehv_bc_data *bc) CIRC_CNT_TO_END(bc->head, bc->tail, BUF_SIZE), EV_BYTE_CHANNEL_MAX_BYTES); - ret = ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); + ret = local_ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); /* 'len' is valid only if the return code is 0 or EV_EAGAIN */ if (!ret || (ret == EV_EAGAIN)) -- GitLab From 7c1449d4e89d9f78154c42693cc348e81145f258 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 23 Jan 2020 09:19:01 -0800 Subject: [PATCH 1073/1278] locktorture: Print ratio of acquisitions, not failures commit 80c503e0e68fbe271680ab48f0fe29bc034b01b7 upstream. The __torture_print_stats() function in locktorture.c carefully initializes local variable "min" to statp[0].n_lock_acquired, but then compares it to statp[i].n_lock_fail. Given that the .n_lock_fail field should normally be zero, and given the initialization, it seems reasonable to display the maximum and minimum number acquisitions instead of miscomputing the maximum and minimum number of failures. This commit therefore switches from failures to acquisitions. And this turns out to be not only a day-zero bug, but entirely my own fault. I hate it when that happens! Fixes: 0af3fe1efa53 ("locktorture: Add a lock-torture kernel module") Reported-by: Will Deacon Signed-off-by: Paul E. McKenney Acked-by: Will Deacon Cc: Davidlohr Bueso Cc: Josh Triplett Cc: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- kernel/locking/locktorture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 6dca260eeccf..032868be3259 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -723,10 +723,10 @@ static void __torture_print_stats(char *page, if (statp[i].n_lock_fail) fail = true; sum += statp[i].n_lock_acquired; - if (max < statp[i].n_lock_fail) - max = statp[i].n_lock_fail; - if (min > statp[i].n_lock_fail) - min = statp[i].n_lock_fail; + if (max < statp[i].n_lock_acquired) + max = statp[i].n_lock_acquired; + if (min > statp[i].n_lock_acquired) + min = statp[i].n_lock_acquired; } page += sprintf(page, "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", -- GitLab From 0280a9d9aa5bd64773793cb91fe3e62a11eda975 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2020 12:25:54 +0300 Subject: [PATCH 1074/1278] mtd: lpddr: Fix a double free in probe() commit 4da0ea71ea934af18db4c63396ba2af1a679ef02 upstream. This function is only called from lpddr_probe(). We free "lpddr" both here and in the caller, so it's a double free. The best place to free "lpddr" is in lpddr_probe() so let's delete this one. Fixes: 8dc004395d5e ("[MTD] LPDDR qinfo probing.") Signed-off-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200228092554.o57igp3nqhyvf66t@kili.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/lpddr/lpddr_cmds.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c index 018c75faadb3..e1c283ccbbde 100644 --- a/drivers/mtd/lpddr/lpddr_cmds.c +++ b/drivers/mtd/lpddr/lpddr_cmds.c @@ -81,7 +81,6 @@ struct mtd_info *lpddr_cmdset(struct map_info *map) shared = kmalloc(sizeof(struct flchip_shared) * lpddr->numchips, GFP_KERNEL); if (!shared) { - kfree(lpddr); kfree(mtd); return NULL; } -- GitLab From bab44430dc603d04715e1d6a02e39cf547462e46 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 18 Mar 2020 23:31:56 +0800 Subject: [PATCH 1075/1278] mtd: phram: fix a double free issue in error path commit 49c64df880570034308e4a9a49c4bc95cf8cdb33 upstream. The variable 'name' is released multiple times in the error path, which may cause double free issues. This problem is avoided by adding a goto label to release the memory uniformly. And this change also makes the code a bit more cleaner. Fixes: 4f678a58d335 ("mtd: fix memory leaks in phram_setup") Signed-off-by: Wen Yang Cc: Joern Engel Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200318153156.25612-1-wenyang@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/phram.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index 8b66e52ca3cc..9734e6903fe6 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -247,22 +247,25 @@ static int phram_setup(const char *val) ret = parse_num64(&start, token[1]); if (ret) { - kfree(name); parse_err("illegal start address\n"); + goto error; } ret = parse_num64(&len, token[2]); if (ret) { - kfree(name); parse_err("illegal device length\n"); + goto error; } ret = register_device(name, start, len); - if (!ret) - pr_info("%s device: %#llx at %#llx\n", name, len, start); - else - kfree(name); + if (ret) + goto error; + + pr_info("%s device: %#llx at %#llx\n", name, len, start); + return 0; +error: + kfree(name); return ret; } -- GitLab From bea47bf26d7e00ff59c25c2a579336cb5dfb2187 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Feb 2018 14:38:34 +0000 Subject: [PATCH 1076/1278] KEYS: Use individual pages in big_key for crypto buffers commit d9f4bb1a0f4db493efe6d7c58ffe696a57de7eb3 upstream. kmalloc() can't always allocate large enough buffers for big_key to use for crypto (1MB + some metadata) so we cannot use that to allocate the buffer. Further, vmalloc'd pages can't be passed to sg_init_one() and the aead crypto accessors cannot be called progressively and must be passed all the data in one go (which means we can't pass the data in one block at a time). Fix this by allocating the buffer pages individually and passing them through a multientry scatterlist to the crypto layer. This has the bonus advantage that we don't have to allocate a contiguous series of pages. We then vmap() the page list and pass that through to the VFS read/write routines. This can trigger a warning: WARNING: CPU: 0 PID: 60912 at mm/page_alloc.c:3883 __alloc_pages_nodemask+0xb7c/0x15f8 ([<00000000002acbb6>] __alloc_pages_nodemask+0x1ee/0x15f8) [<00000000002dd356>] kmalloc_order+0x46/0x90 [<00000000002dd3e0>] kmalloc_order_trace+0x40/0x1f8 [<0000000000326a10>] __kmalloc+0x430/0x4c0 [<00000000004343e4>] big_key_preparse+0x7c/0x210 [<000000000042c040>] key_create_or_update+0x128/0x420 [<000000000042e52c>] SyS_add_key+0x124/0x220 [<00000000007bba2c>] system_call+0xc4/0x2b0 from the keyctl/padd/useradd test of the keyutils testsuite on s390x. Note that it might be better to shovel data through in page-sized lumps instead as there's no particular need to use a monolithic buffer unless the kernel itself wants to access the data. Fixes: 13100a72f40f ("Security: Keys: Big keys stored encrypted") Reported-by: Paul Bunyan Signed-off-by: David Howells cc: Kirill Marinushkin Signed-off-by: Greg Kroah-Hartman --- security/keys/big_key.c | 110 +++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 23 deletions(-) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 929e14978c42..fa728f662a6f 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -22,6 +22,13 @@ #include #include +struct big_key_buf { + unsigned int nr_pages; + void *virt; + struct scatterlist *sg; + struct page *pages[]; +}; + /* * Layout of key payload words. */ @@ -91,10 +98,9 @@ static DEFINE_MUTEX(big_key_aead_lock); /* * Encrypt/decrypt big_key data */ -static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) +static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key) { int ret; - struct scatterlist sgio; struct aead_request *aead_req; /* We always use a zero nonce. The reason we can get away with this is * because we're using a different randomly generated key for every @@ -109,8 +115,7 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return -ENOMEM; memset(zero_nonce, 0, sizeof(zero_nonce)); - sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0)); - aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce); + aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); aead_request_set_ad(aead_req, 0); @@ -129,22 +134,82 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return ret; } +/* + * Free up the buffer. + */ +static void big_key_free_buffer(struct big_key_buf *buf) +{ + unsigned int i; + + if (buf->virt) { + memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE); + vunmap(buf->virt); + } + + for (i = 0; i < buf->nr_pages; i++) + if (buf->pages[i]) + __free_page(buf->pages[i]); + + kfree(buf); +} + +/* + * Allocate a buffer consisting of a set of pages with a virtual mapping + * applied over them. + */ +static void *big_key_alloc_buffer(size_t len) +{ + struct big_key_buf *buf; + unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int i, l; + + buf = kzalloc(sizeof(struct big_key_buf) + + sizeof(struct page) * npg + + sizeof(struct scatterlist) * npg, + GFP_KERNEL); + if (!buf) + return NULL; + + buf->nr_pages = npg; + buf->sg = (void *)(buf->pages + npg); + sg_init_table(buf->sg, npg); + + for (i = 0; i < buf->nr_pages; i++) { + buf->pages[i] = alloc_page(GFP_KERNEL); + if (!buf->pages[i]) + goto nomem; + + l = min_t(size_t, len, PAGE_SIZE); + sg_set_page(&buf->sg[i], buf->pages[i], l, 0); + len -= l; + } + + buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL); + if (!buf->virt) + goto nomem; + + return buf; + +nomem: + big_key_free_buffer(buf); + return NULL; +} + /* * Preparse a big key */ int big_key_preparse(struct key_preparsed_payload *prep) { + struct big_key_buf *buf; struct path *path = (struct path *)&prep->payload.data[big_key_path]; struct file *file; u8 *enckey; - u8 *data = NULL; ssize_t written; - size_t datalen = prep->datalen; + size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE; int ret; - ret = -EINVAL; if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) - goto error; + return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; @@ -157,13 +222,12 @@ int big_key_preparse(struct key_preparsed_payload *prep) * * File content is stored encrypted with randomly generated key. */ - size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; - memcpy(data, prep->data, datalen); + memcpy(buf->virt, prep->data, datalen); /* generate random key */ enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); @@ -176,7 +240,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; /* encrypt aligned data */ - ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey); + ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey); if (ret) goto err_enckey; @@ -187,7 +251,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; } - written = kernel_write(file, data, enclen, &pos); + written = kernel_write(file, buf->virt, enclen, &pos); if (written != enclen) { ret = written; if (written >= 0) @@ -202,7 +266,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) *path = file->f_path; path_get(path); fput(file); - kzfree(data); + big_key_free_buffer(buf); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); @@ -220,7 +284,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) err_enckey: kzfree(enckey); error: - kzfree(data); + big_key_free_buffer(buf); return ret; } @@ -298,15 +362,15 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { + struct big_key_buf *buf; struct path *path = (struct path *)&key->payload.data[big_key_path]; struct file *file; - u8 *data; u8 *enckey = (u8 *)key->payload.data[big_key_data]; size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; file = dentry_open(path, O_RDONLY, current_cred()); @@ -316,26 +380,26 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) } /* read file to kernel and decrypt */ - ret = kernel_read(file, data, enclen, &pos); + ret = kernel_read(file, buf->virt, enclen, &pos); if (ret >= 0 && ret != enclen) { ret = -EIO; goto err_fput; } - ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey); + ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey); if (ret) goto err_fput; ret = datalen; /* copy decrypted data to user */ - if (copy_to_user(buffer, data, datalen) != 0) + if (copy_to_user(buffer, buf->virt, datalen) != 0) ret = -EFAULT; err_fput: fput(file); error: - kzfree(data); + big_key_free_buffer(buf); } else { ret = datalen; if (copy_to_user(buffer, key->payload.data[big_key_data], -- GitLab From 7fa67aa30509ad0da72ac0f757aa76e53c0ea3de Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Mar 2020 21:11:24 -0400 Subject: [PATCH 1077/1278] KEYS: Don't write out to userspace while holding key semaphore commit d3ec10aa95819bff18a0d936b18884c7816d0914 upstream. A lockdep circular locking dependency report was seen when running a keyutils test: [12537.027242] ====================================================== [12537.059309] WARNING: possible circular locking dependency detected [12537.088148] 4.18.0-147.7.1.el8_1.x86_64+debug #1 Tainted: G OE --------- - - [12537.125253] ------------------------------------------------------ [12537.153189] keyctl/25598 is trying to acquire lock: [12537.175087] 000000007c39f96c (&mm->mmap_sem){++++}, at: __might_fault+0xc4/0x1b0 [12537.208365] [12537.208365] but task is already holding lock: [12537.234507] 000000003de5b58d (&type->lock_class){++++}, at: keyctl_read_key+0x15a/0x220 [12537.270476] [12537.270476] which lock already depends on the new lock. [12537.270476] [12537.307209] [12537.307209] the existing dependency chain (in reverse order) is: [12537.340754] [12537.340754] -> #3 (&type->lock_class){++++}: [12537.367434] down_write+0x4d/0x110 [12537.385202] __key_link_begin+0x87/0x280 [12537.405232] request_key_and_link+0x483/0xf70 [12537.427221] request_key+0x3c/0x80 [12537.444839] dns_query+0x1db/0x5a5 [dns_resolver] [12537.468445] dns_resolve_server_name_to_ip+0x1e1/0x4d0 [cifs] [12537.496731] cifs_reconnect+0xe04/0x2500 [cifs] [12537.519418] cifs_readv_from_socket+0x461/0x690 [cifs] [12537.546263] cifs_read_from_socket+0xa0/0xe0 [cifs] [12537.573551] cifs_demultiplex_thread+0x311/0x2db0 [cifs] [12537.601045] kthread+0x30c/0x3d0 [12537.617906] ret_from_fork+0x3a/0x50 [12537.636225] [12537.636225] -> #2 (root_key_user.cons_lock){+.+.}: [12537.664525] __mutex_lock+0x105/0x11f0 [12537.683734] request_key_and_link+0x35a/0xf70 [12537.705640] request_key+0x3c/0x80 [12537.723304] dns_query+0x1db/0x5a5 [dns_resolver] [12537.746773] dns_resolve_server_name_to_ip+0x1e1/0x4d0 [cifs] [12537.775607] cifs_reconnect+0xe04/0x2500 [cifs] [12537.798322] cifs_readv_from_socket+0x461/0x690 [cifs] [12537.823369] cifs_read_from_socket+0xa0/0xe0 [cifs] [12537.847262] cifs_demultiplex_thread+0x311/0x2db0 [cifs] [12537.873477] kthread+0x30c/0x3d0 [12537.890281] ret_from_fork+0x3a/0x50 [12537.908649] [12537.908649] -> #1 (&tcp_ses->srv_mutex){+.+.}: [12537.935225] __mutex_lock+0x105/0x11f0 [12537.954450] cifs_call_async+0x102/0x7f0 [cifs] [12537.977250] smb2_async_readv+0x6c3/0xc90 [cifs] [12538.000659] cifs_readpages+0x120a/0x1e50 [cifs] [12538.023920] read_pages+0xf5/0x560 [12538.041583] __do_page_cache_readahead+0x41d/0x4b0 [12538.067047] ondemand_readahead+0x44c/0xc10 [12538.092069] filemap_fault+0xec1/0x1830 [12538.111637] __do_fault+0x82/0x260 [12538.129216] do_fault+0x419/0xfb0 [12538.146390] __handle_mm_fault+0x862/0xdf0 [12538.167408] handle_mm_fault+0x154/0x550 [12538.187401] __do_page_fault+0x42f/0xa60 [12538.207395] do_page_fault+0x38/0x5e0 [12538.225777] page_fault+0x1e/0x30 [12538.243010] [12538.243010] -> #0 (&mm->mmap_sem){++++}: [12538.267875] lock_acquire+0x14c/0x420 [12538.286848] __might_fault+0x119/0x1b0 [12538.306006] keyring_read_iterator+0x7e/0x170 [12538.327936] assoc_array_subtree_iterate+0x97/0x280 [12538.352154] keyring_read+0xe9/0x110 [12538.370558] keyctl_read_key+0x1b9/0x220 [12538.391470] do_syscall_64+0xa5/0x4b0 [12538.410511] entry_SYSCALL_64_after_hwframe+0x6a/0xdf [12538.435535] [12538.435535] other info that might help us debug this: [12538.435535] [12538.472829] Chain exists of: [12538.472829] &mm->mmap_sem --> root_key_user.cons_lock --> &type->lock_class [12538.472829] [12538.524820] Possible unsafe locking scenario: [12538.524820] [12538.551431] CPU0 CPU1 [12538.572654] ---- ---- [12538.595865] lock(&type->lock_class); [12538.613737] lock(root_key_user.cons_lock); [12538.644234] lock(&type->lock_class); [12538.672410] lock(&mm->mmap_sem); [12538.687758] [12538.687758] *** DEADLOCK *** [12538.687758] [12538.714455] 1 lock held by keyctl/25598: [12538.732097] #0: 000000003de5b58d (&type->lock_class){++++}, at: keyctl_read_key+0x15a/0x220 [12538.770573] [12538.770573] stack backtrace: [12538.790136] CPU: 2 PID: 25598 Comm: keyctl Kdump: loaded Tainted: G [12538.844855] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 12/27/2015 [12538.881963] Call Trace: [12538.892897] dump_stack+0x9a/0xf0 [12538.907908] print_circular_bug.isra.25.cold.50+0x1bc/0x279 [12538.932891] ? save_trace+0xd6/0x250 [12538.948979] check_prev_add.constprop.32+0xc36/0x14f0 [12538.971643] ? keyring_compare_object+0x104/0x190 [12538.992738] ? check_usage+0x550/0x550 [12539.009845] ? sched_clock+0x5/0x10 [12539.025484] ? sched_clock_cpu+0x18/0x1e0 [12539.043555] __lock_acquire+0x1f12/0x38d0 [12539.061551] ? trace_hardirqs_on+0x10/0x10 [12539.080554] lock_acquire+0x14c/0x420 [12539.100330] ? __might_fault+0xc4/0x1b0 [12539.119079] __might_fault+0x119/0x1b0 [12539.135869] ? __might_fault+0xc4/0x1b0 [12539.153234] keyring_read_iterator+0x7e/0x170 [12539.172787] ? keyring_read+0x110/0x110 [12539.190059] assoc_array_subtree_iterate+0x97/0x280 [12539.211526] keyring_read+0xe9/0x110 [12539.227561] ? keyring_gc_check_iterator+0xc0/0xc0 [12539.249076] keyctl_read_key+0x1b9/0x220 [12539.266660] do_syscall_64+0xa5/0x4b0 [12539.283091] entry_SYSCALL_64_after_hwframe+0x6a/0xdf One way to prevent this deadlock scenario from happening is to not allow writing to userspace while holding the key semaphore. Instead, an internal buffer is allocated for getting the keys out from the read method first before copying them out to userspace without holding the lock. That requires taking out the __user modifier from all the relevant read methods as well as additional changes to not use any userspace write helpers. That is, 1) The put_user() call is replaced by a direct copy. 2) The copy_to_user() call is replaced by memcpy(). 3) All the fault handling code is removed. Compiling on a x86-64 system, the size of the rxrpc_read() function is reduced from 3795 bytes to 2384 bytes with this patch. Fixes: ^1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Jarkko Sakkinen Signed-off-by: Waiman Long Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- include/keys/big_key-type.h | 2 +- include/keys/user-type.h | 3 +- include/linux/key-type.h | 2 +- net/dns_resolver/dns_key.c | 2 +- net/rxrpc/key.c | 27 +++------ security/keys/big_key.c | 11 ++-- security/keys/encrypted-keys/encrypted.c | 7 +-- security/keys/keyctl.c | 73 ++++++++++++++++++------ security/keys/keyring.c | 6 +- security/keys/request_key_auth.c | 7 +-- security/keys/trusted.c | 14 +---- security/keys/user_defined.c | 5 +- 12 files changed, 85 insertions(+), 74 deletions(-) diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h index e0970a578188..a7207a965466 100644 --- a/include/keys/big_key-type.h +++ b/include/keys/big_key-type.h @@ -21,6 +21,6 @@ extern void big_key_free_preparse(struct key_preparsed_payload *prep); extern void big_key_revoke(struct key *key); extern void big_key_destroy(struct key *key); extern void big_key_describe(const struct key *big_key, struct seq_file *m); -extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen); +extern long big_key_read(const struct key *key, char *buffer, size_t buflen); #endif /* _KEYS_BIG_KEY_TYPE_H */ diff --git a/include/keys/user-type.h b/include/keys/user-type.h index 12babe991594..0d8f3cd3056f 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -45,8 +45,7 @@ extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); extern void user_describe(const struct key *user, struct seq_file *m); -extern long user_read(const struct key *key, - char __user *buffer, size_t buflen); +extern long user_read(const struct key *key, char *buffer, size_t buflen); static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { diff --git a/include/linux/key-type.h b/include/linux/key-type.h index dfb3ba782d2c..535b310a4c3b 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -125,7 +125,7 @@ struct key_type { * much is copied into the buffer * - shouldn't do the copy if the buffer is NULL */ - long (*read)(const struct key *key, char __user *buffer, size_t buflen); + long (*read)(const struct key *key, char *buffer, size_t buflen); /* handle request_key() for this type instead of invoking * /sbin/request-key (optional) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 5f5d9eafccf5..ea133857f19e 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -242,7 +242,7 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) * - the key's semaphore is read-locked */ static long dns_resolver_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { int err = PTR_ERR(key->payload.data[dns_key_error]); diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index e7f6b8823eb6..ad9d1b21cb0b 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -35,7 +35,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); -static long rxrpc_read(const struct key *, char __user *, size_t); +static long rxrpc_read(const struct key *, char *, size_t); /* * rxrpc defined keys take an arbitrary string as the description and an @@ -1044,12 +1044,12 @@ EXPORT_SYMBOL(rxrpc_get_null_key); * - this returns the result in XDR form */ static long rxrpc_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { const struct rxrpc_key_token *token; const struct krb5_principal *princ; size_t size; - __be32 __user *xdr, *oldxdr; + __be32 *xdr, *oldxdr; u32 cnlen, toksize, ntoks, tok, zero; u16 toksizes[AFSTOKEN_MAX]; int loop; @@ -1126,30 +1126,25 @@ static long rxrpc_read(const struct key *key, if (!buffer || buflen < size) return size; - xdr = (__be32 __user *) buffer; + xdr = (__be32 *)buffer; zero = 0; #define ENCODE(x) \ do { \ - __be32 y = htonl(x); \ - if (put_user(y, xdr++) < 0) \ - goto fault; \ + *xdr++ = htonl(x); \ } while(0) #define ENCODE_DATA(l, s) \ do { \ u32 _l = (l); \ ENCODE(l); \ - if (copy_to_user(xdr, (s), _l) != 0) \ - goto fault; \ - if (_l & 3 && \ - copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ - goto fault; \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ xdr += (_l + 3) >> 2; \ } while(0) #define ENCODE64(x) \ do { \ __be64 y = cpu_to_be64(x); \ - if (copy_to_user(xdr, &y, 8) != 0) \ - goto fault; \ + memcpy(xdr, &y, 8); \ xdr += 8 >> 2; \ } while(0) #define ENCODE_STR(s) \ @@ -1240,8 +1235,4 @@ static long rxrpc_read(const struct key *key, ASSERTCMP((char __user *) xdr - buffer, ==, size); _leave(" = %zu", size); return size; - -fault: - _leave(" = -EFAULT"); - return -EFAULT; } diff --git a/security/keys/big_key.c b/security/keys/big_key.c index fa728f662a6f..1957275ad2af 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -353,7 +353,7 @@ void big_key_describe(const struct key *key, struct seq_file *m) * read the key data * - the key's semaphore is read-locked */ -long big_key_read(const struct key *key, char __user *buffer, size_t buflen) +long big_key_read(const struct key *key, char *buffer, size_t buflen) { size_t datalen = (size_t)key->payload.data[big_key_len]; long ret; @@ -392,9 +392,8 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) ret = datalen; - /* copy decrypted data to user */ - if (copy_to_user(buffer, buf->virt, datalen) != 0) - ret = -EFAULT; + /* copy out decrypted data */ + memcpy(buffer, buf->virt, datalen); err_fput: fput(file); @@ -402,9 +401,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) big_key_free_buffer(buf); } else { ret = datalen; - if (copy_to_user(buffer, key->payload.data[big_key_data], - datalen) != 0) - ret = -EFAULT; + memcpy(buffer, key->payload.data[big_key_data], datalen); } return ret; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index d92cbf9687c3..571f6d486838 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -895,14 +895,14 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) } /* - * encrypted_read - format and copy the encrypted data to userspace + * encrypted_read - format and copy out the encrypted data * * The resulting datablob format is: * * * On success, return to userspace the encrypted key datablob size. */ -static long encrypted_read(const struct key *key, char __user *buffer, +static long encrypted_read(const struct key *key, char *buffer, size_t buflen) { struct encrypted_key_payload *epayload; @@ -950,8 +950,7 @@ static long encrypted_read(const struct key *key, char __user *buffer, key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); - if (copy_to_user(buffer, ascii_buf, asciiblob_len) != 0) - ret = -EFAULT; + memcpy(buffer, ascii_buf, asciiblob_len); kzfree(ascii_buf); return asciiblob_len; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index e00e20204de0..4b6a084e323b 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -742,6 +742,21 @@ long keyctl_keyring_search(key_serial_t ringid, return ret; } +/* + * Call the read method + */ +static long __keyctl_read_key(struct key *key, char *buffer, size_t buflen) +{ + long ret; + + down_read(&key->sem); + ret = key_validate(key); + if (ret == 0) + ret = key->type->read(key, buffer, buflen); + up_read(&key->sem); + return ret; +} + /* * Read a key's payload. * @@ -757,26 +772,27 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) struct key *key; key_ref_t key_ref; long ret; + char *key_data; /* find the key first */ key_ref = lookup_user_key(keyid, 0, 0); if (IS_ERR(key_ref)) { ret = -ENOKEY; - goto error; + goto out; } key = key_ref_to_ptr(key_ref); ret = key_read_state(key); if (ret < 0) - goto error2; /* Negatively instantiated */ + goto key_put_out; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); if (ret == 0) goto can_read_key; if (ret != -EACCES) - goto error2; + goto key_put_out; /* we can't; see if it's searchable from this process's keyrings * - we automatically take account of the fact that it may be @@ -784,26 +800,51 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) */ if (!is_key_possessed(key_ref)) { ret = -EACCES; - goto error2; + goto key_put_out; } /* the key is probably readable - now try to read it */ can_read_key: - ret = -EOPNOTSUPP; - if (key->type->read) { - /* Read the data with the semaphore held (since we might sleep) - * to protect against the key being updated or revoked. - */ - down_read(&key->sem); - ret = key_validate(key); - if (ret == 0) - ret = key->type->read(key, buffer, buflen); - up_read(&key->sem); + if (!key->type->read) { + ret = -EOPNOTSUPP; + goto key_put_out; } -error2: + if (!buffer || !buflen) { + /* Get the key length from the read method */ + ret = __keyctl_read_key(key, NULL, 0); + goto key_put_out; + } + + /* + * Read the data with the semaphore held (since we might sleep) + * to protect against the key being updated or revoked. + * + * Allocating a temporary buffer to hold the keys before + * transferring them to user buffer to avoid potential + * deadlock involving page fault and mmap_sem. + */ + key_data = kmalloc(buflen, GFP_KERNEL); + + if (!key_data) { + ret = -ENOMEM; + goto key_put_out; + } + ret = __keyctl_read_key(key, key_data, buflen); + + /* + * Read methods will just return the required length without + * any copying if the provided length isn't large enough. + */ + if (ret > 0 && ret <= buflen) { + if (copy_to_user(buffer, key_data, ret)) + ret = -EFAULT; + } + kzfree(key_data); + +key_put_out: key_put(key); -error: +out: return ret; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 359b9cba3d0d..f7cf371bcd2a 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -432,7 +432,6 @@ static int keyring_read_iterator(const void *object, void *data) { struct keyring_read_iterator_context *ctx = data; const struct key *key = keyring_ptr_to_key(object); - int ret; kenter("{%s,%d},,{%zu/%zu}", key->type->name, key->serial, ctx->count, ctx->buflen); @@ -440,10 +439,7 @@ static int keyring_read_iterator(const void *object, void *data) if (ctx->count >= ctx->buflen) return 1; - ret = put_user(key->serial, ctx->buffer); - if (ret < 0) - return ret; - ctx->buffer++; + *ctx->buffer++ = key->serial; ctx->count += sizeof(key->serial); return 0; } diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 1d34b2a5f485..13ac3b1e57da 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -27,7 +27,7 @@ static int request_key_auth_instantiate(struct key *, static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); -static long request_key_auth_read(const struct key *, char __user *, size_t); +static long request_key_auth_read(const struct key *, char *, size_t); /* * The request-key authorisation key type definition. @@ -85,7 +85,7 @@ static void request_key_auth_describe(const struct key *key, * - the key's semaphore is read-locked */ static long request_key_auth_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { struct request_key_auth *rka = get_request_key_auth(key); size_t datalen; @@ -102,8 +102,7 @@ static long request_key_auth_read(const struct key *key, if (buflen > datalen) buflen = datalen; - if (copy_to_user(buffer, rka->callout_info, buflen) != 0) - ret = -EFAULT; + memcpy(buffer, rka->callout_info, buflen); } return ret; diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 98aa89ff7bfd..01e8544f79a5 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1136,11 +1136,10 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) * trusted_read - copy the sealed blob data to userspace in hex. * On success, return to userspace the trusted key datablob size. */ -static long trusted_read(const struct key *key, char __user *buffer, +static long trusted_read(const struct key *key, char *buffer, size_t buflen) { const struct trusted_key_payload *p; - char *ascii_buf; char *bufp; int i; @@ -1149,18 +1148,9 @@ static long trusted_read(const struct key *key, char __user *buffer, return -EINVAL; if (buffer && buflen >= 2 * p->blob_len) { - ascii_buf = kmalloc(2 * p->blob_len, GFP_KERNEL); - if (!ascii_buf) - return -ENOMEM; - - bufp = ascii_buf; + bufp = buffer; for (i = 0; i < p->blob_len; i++) bufp = hex_byte_pack(bufp, p->blob[i]); - if (copy_to_user(buffer, ascii_buf, 2 * p->blob_len) != 0) { - kzfree(ascii_buf); - return -EFAULT; - } - kzfree(ascii_buf); } return 2 * p->blob_len; } diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 9f558bedba23..0e723b676aef 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -172,7 +172,7 @@ EXPORT_SYMBOL_GPL(user_describe); * read the key data * - the key's semaphore is read-locked */ -long user_read(const struct key *key, char __user *buffer, size_t buflen) +long user_read(const struct key *key, char *buffer, size_t buflen) { const struct user_key_payload *upayload; long ret; @@ -185,8 +185,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen) if (buflen > upayload->datalen) buflen = upayload->datalen; - if (copy_to_user(buffer, upayload->data, buflen) != 0) - ret = -EFAULT; + memcpy(buffer, upayload->data, buflen); } return ret; -- GitLab From 050272a0423e68207fd2367831ae610680129062 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Apr 2020 08:01:25 +0200 Subject: [PATCH 1078/1278] Linux 4.14.177 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9db2e7f90769..d81fb98737f7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 176 +SUBLEVEL = 177 EXTRAVERSION = NAME = Petit Gorille -- GitLab From f93b88174e35d2295c59892c94c0f448d0291a96 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 27 Mar 2020 12:12:50 +0100 Subject: [PATCH 1079/1278] ANDROID: cuttlefish_defconfig: enable CONFIG_DEBUG_LIST For lots of good security reasons, this config option needs to be enabled Bug: 152470236 Signed-off-by: Greg Kroah-Hartman Change-Id: I96a88bbee9c4d17be97ed63262dbab2ef31fee79 --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 83f6f54d009a..45588a1f364d 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -463,6 +463,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DETECT_HUNG_TASK is not set CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y +CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_TEST_MEMINIT=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 9d2d0f051085..25689b73e594 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -489,6 +489,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y +CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_TEST_MEMINIT=y -- GitLab From 07903de79b17694f9ced7dd3439c449b539a3a4d Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Fri, 24 Apr 2020 10:02:48 -0700 Subject: [PATCH 1080/1278] ANDROID: Remove VLA from uid_sys_stats.c Remove one more VLA that was not caught. Bug: 154930031 Change-Id: I2c6c1f6d5e6ef23769c117a47689e1539b85c882 Signed-off-by: Alistair Delva --- drivers/misc/uid_sys_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index d04faf312ed6..c82424ef3c7f 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -126,7 +126,7 @@ static void get_full_task_comm(struct task_entry *task_entry, int i = 0, offset = 0, len = 0; /* save one byte for terminating null character */ int unused_len = MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1; - char buf[unused_len]; + char buf[MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1]; struct mm_struct *mm = task->mm; /* fill the first TASK_COMM_LEN bytes with thread name */ -- GitLab From 171824aa922cf2406a746c0bab45f4ffeb250a97 Mon Sep 17 00:00:00 2001 From: "hyeongseok.kim" Date: Tue, 21 Apr 2020 08:42:08 +0900 Subject: [PATCH 1081/1278] ANDROID: dm-bow: Fix not to skip trim at framented range If free blocks hole is smaller than discard_granularity, TRIM to this range can be skipped. Fix this by changing the granularity to 4kb at dm-bow layer, not to skip TRIM to every tiny free blocks. Bug: 154411183 Signed-off-by: hyeongseok.kim Cc: hyeongseok.kim Change-Id: Ic7c33d94a016d0ad5a75514eae1056c328c9c1ba (cherry picked from commit ca986e448cf2f92f1bb672fe9bc0b651fa6b76c5) Signed-off-by: Paul Lawrence --- drivers/md/dm-bow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index 89913a23cc0c..0d1ddb2b6e61 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -658,6 +658,7 @@ static int dm_bow_ctr(struct dm_target *ti, unsigned int argc, char **argv) bc->dev->bdev->bd_queue->limits.max_discard_sectors = 1 << 15; bc->forward_trims = false; } else { + bc->dev->bdev->bd_queue->limits.discard_granularity = 1 << 12; bc->forward_trims = true; } -- GitLab From fdd560ec460793a02cf226cc4f2aac6d0e637e5c Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 22 Apr 2020 12:39:09 -0700 Subject: [PATCH 1082/1278] ANDROID: Incremental fs: Use simple compression in log buffer Bug: 154342202 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Ibcc641dd92596018c9f10b5bc7bd0db2642a80c7 (cherry picked from commit b6b4a3a404ccd9c62347e27c4fc7883d776c2cbb) --- fs/incfs/data_mgmt.c | 310 ++++++++++-------- fs/incfs/data_mgmt.h | 76 +++-- fs/incfs/vfs.c | 2 +- .../selftests/filesystems/incfs/incfs_test.c | 89 +++-- 4 files changed, 291 insertions(+), 186 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 4ec08eb52461..d9c43d5cca19 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -34,8 +34,7 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); init_waitqueue_head(&mi->mi_log.ml_notif_wq); - rwlock_init(&mi->mi_log.rl_access_lock); - spin_lock_init(&mi->mi_log.rl_logging_lock); + spin_lock_init(&mi->mi_log.rl_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); error = incfs_realloc_mount_info(mi, options); @@ -53,6 +52,7 @@ int incfs_realloc_mount_info(struct mount_info *mi, struct mount_options *options) { void *new_buffer = NULL; + void *old_buffer; size_t new_buffer_size = 0; if (options->read_log_pages != mi->mi_options.read_log_pages) { @@ -71,16 +71,18 @@ int incfs_realloc_mount_info(struct mount_info *mi, return -ENOMEM; } - write_lock(&mi->mi_log.rl_access_lock); - kfree(mi->mi_log.rl_ring_buf); - WRITE_ONCE(mi->mi_log.rl_ring_buf, new_buffer); - WRITE_ONCE(mi->mi_log.rl_size, - new_buffer_size / sizeof(*mi->mi_log.rl_ring_buf)); - log_state = READ_ONCE(mi->mi_log.rl_state); - log_state.generation_id++; - log_state.next_index = log_state.current_pass_no = 0; - WRITE_ONCE(mi->mi_log.rl_state, log_state); - write_unlock(&mi->mi_log.rl_access_lock); + spin_lock(&mi->mi_log.rl_lock); + old_buffer = mi->mi_log.rl_ring_buf; + mi->mi_log.rl_ring_buf = new_buffer; + mi->mi_log.rl_size = new_buffer_size; + log_state = (struct read_log_state){ + .generation_id = mi->mi_log.rl_head.generation_id + 1, + }; + mi->mi_log.rl_head = log_state; + mi->mi_log.rl_tail = log_state; + spin_unlock(&mi->mi_log.rl_lock); + + kfree(old_buffer); } mi->mi_options = *options; @@ -246,37 +248,121 @@ static ssize_t decompress(struct mem_range src, struct mem_range dst) return result; } +static void log_read_one_record(struct read_log *rl, struct read_log_state *rs) +{ + union log_record *record = + (union log_record *)((u8 *)rl->rl_ring_buf + rs->next_offset); + size_t record_size; + + switch (record->full_record.type) { + case FULL: + rs->base_record = record->full_record; + record_size = sizeof(record->full_record); + break; + + case SAME_FILE: + rs->base_record.block_index = + record->same_file_record.block_index; + rs->base_record.absolute_ts_us += + record->same_file_record.relative_ts_us; + record_size = sizeof(record->same_file_record); + break; + + case SAME_FILE_NEXT_BLOCK: + ++rs->base_record.block_index; + rs->base_record.absolute_ts_us += + record->same_file_next_block.relative_ts_us; + record_size = sizeof(record->same_file_next_block); + break; + + case SAME_FILE_NEXT_BLOCK_SHORT: + ++rs->base_record.block_index; + rs->base_record.absolute_ts_us += + record->same_file_next_block_short.relative_ts_us; + record_size = sizeof(record->same_file_next_block_short); + break; + } + + rs->next_offset += record_size; + if (rs->next_offset > rl->rl_size - sizeof(*record)) { + rs->next_offset = 0; + ++rs->current_pass_no; + } + ++rs->current_record_no; +} + static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, - int block_index, bool timed_out) + int block_index) { struct read_log *log = &mi->mi_log; - struct read_log_state state; + struct read_log_state *head, *tail; s64 now_us = ktime_to_us(ktime_get()); - int rl_size; - struct read_log_record record = { - .file_id = *id, - .block_index = block_index, - .timed_out = timed_out, - .timestamp_us = now_us - }; - - read_lock(&log->rl_access_lock); - rl_size = READ_ONCE(log->rl_size); - if (rl_size != 0) { - spin_lock(&log->rl_logging_lock); - state = READ_ONCE(log->rl_state); - log->rl_ring_buf[state.next_index] = record; - if (++state.next_index == rl_size) { - state.next_index = 0; - ++state.current_pass_no; - } - WRITE_ONCE(log->rl_state, state); - spin_unlock(&log->rl_logging_lock); + s64 relative_us; + union log_record record; + size_t record_size; + + spin_lock(&log->rl_lock); + if (log->rl_size == 0) { + spin_unlock(&log->rl_lock); + return; } - read_unlock(&log->rl_access_lock); - if (rl_size != 0) - wake_up_all(&log->ml_notif_wq); + head = &log->rl_head; + tail = &log->rl_tail; + relative_us = now_us - head->base_record.absolute_ts_us; + + if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) || + relative_us >= 1ll << 32) { + record.full_record = (struct full_record){ + .type = FULL, + .block_index = block_index, + .file_id = *id, + .absolute_ts_us = now_us, + }; + record_size = sizeof(struct full_record); + } else if (block_index != head->base_record.block_index + 1 || + relative_us >= 1 << 30) { + record.same_file_record = (struct same_file_record){ + .type = SAME_FILE, + .block_index = block_index, + .relative_ts_us = relative_us, + }; + record_size = sizeof(struct same_file_record); + } else if (relative_us >= 1 << 14) { + record.same_file_next_block = (struct same_file_next_block){ + .type = SAME_FILE_NEXT_BLOCK, + .relative_ts_us = relative_us, + }; + record_size = sizeof(struct same_file_next_block); + } else { + record.same_file_next_block_short = + (struct same_file_next_block_short){ + .type = SAME_FILE_NEXT_BLOCK_SHORT, + .relative_ts_us = relative_us, + }; + record_size = sizeof(struct same_file_next_block_short); + } + + head->base_record.file_id = *id; + head->base_record.block_index = block_index; + head->base_record.absolute_ts_us = now_us; + + /* Advance tail beyond area we are going to overwrite */ + while (tail->current_pass_no < head->current_pass_no && + tail->next_offset < head->next_offset + record_size) + log_read_one_record(log, tail); + + memcpy(((u8 *)log->rl_ring_buf) + head->next_offset, &record, + record_size); + head->next_offset += record_size; + if (head->next_offset > log->rl_size - sizeof(record)) { + head->next_offset = 0; + ++head->current_pass_no; + } + ++head->current_record_no; + + spin_unlock(&log->rl_lock); + wake_up_all(&log->ml_notif_wq); } static int validate_hash_tree(struct file *bf, struct data_file *df, @@ -708,8 +794,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, mi = df->df_mount_info; if (timeout_ms == 0) { - log_block_read(mi, &df->df_id, block_index, - true /*timed out*/); + log_block_read(mi, &df->df_id, block_index); return -ETIME; } @@ -728,8 +813,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (wait_res == 0) { /* Wait has timed out */ - log_block_read(mi, &df->df_id, block_index, - true /*timed out*/); + log_block_read(mi, &df->df_id, block_index); return -ETIME; } if (wait_res < 0) { @@ -825,7 +909,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, } if (result >= 0) - log_block_read(mi, &df->df_id, index, false /*timed out*/); + log_block_read(mi, &df->df_id, index); out: return result; @@ -1195,49 +1279,29 @@ struct read_log_state incfs_get_log_state(struct mount_info *mi) struct read_log *log = &mi->mi_log; struct read_log_state result; - read_lock(&log->rl_access_lock); - spin_lock(&log->rl_logging_lock); - result = READ_ONCE(log->rl_state); - spin_unlock(&log->rl_logging_lock); - read_unlock(&log->rl_access_lock); + spin_lock(&log->rl_lock); + result = log->rl_head; + spin_unlock(&log->rl_lock); return result; } -static u64 calc_record_count(const struct read_log_state *state, int rl_size) -{ - return state->current_pass_no * (u64)rl_size + state->next_index; -} - int incfs_get_uncollected_logs_count(struct mount_info *mi, - struct read_log_state state) + const struct read_log_state *state) { struct read_log *log = &mi->mi_log; - struct read_log_state rl_state; - int rl_size; - u64 count; - - read_lock(&log->rl_access_lock); - rl_size = READ_ONCE(log->rl_size); - spin_lock(&log->rl_logging_lock); - rl_state = READ_ONCE(log->rl_state); - spin_unlock(&log->rl_logging_lock); - read_unlock(&log->rl_access_lock); - - count = calc_record_count(&rl_state, rl_size); - if (rl_state.generation_id == state.generation_id) - count -= calc_record_count(&state, rl_size); - return min_t(int, count, rl_size); -} - -static void fill_pending_read_from_log_record( - struct incfs_pending_read_info *dest, const struct read_log_record *src, - struct read_log_state *state, u64 log_size) -{ - dest->file_id = src->file_id; - dest->block_index = src->block_index; - dest->serial_number = - state->current_pass_no * log_size + state->next_index; - dest->timestamp_us = src->timestamp_us; + u32 generation; + u64 head_no, tail_no; + + spin_lock(&log->rl_lock); + tail_no = log->rl_tail.current_record_no; + head_no = log->rl_head.current_record_no; + generation = log->rl_head.generation_id; + spin_unlock(&log->rl_lock); + + if (generation != state->generation_id) + return head_no - tail_no; + else + return head_no - max_t(u64, tail_no, state->current_record_no); } int incfs_collect_logged_reads(struct mount_info *mi, @@ -1245,82 +1309,48 @@ int incfs_collect_logged_reads(struct mount_info *mi, struct incfs_pending_read_info *reads, int reads_size) { - struct read_log *log = &mi->mi_log; - struct read_log_state live_state; int dst_idx; - int rl_size; - int result = 0; - u64 read_count; - u64 written_count; - - read_lock(&log->rl_access_lock); - - rl_size = READ_ONCE(log->rl_size); - spin_lock(&log->rl_logging_lock); - live_state = READ_ONCE(log->rl_state); - spin_unlock(&log->rl_logging_lock); + struct read_log *log = &mi->mi_log; + struct read_log_state *head, *tail; - if (reader_state->generation_id != live_state.generation_id) { - reader_state->generation_id = live_state.generation_id; - reader_state->current_pass_no = reader_state->next_index = 0; - } + spin_lock(&log->rl_lock); + head = &log->rl_head; + tail = &log->rl_tail; - read_count = calc_record_count(reader_state, rl_size); - written_count = calc_record_count(&live_state, rl_size); - if (read_count == written_count) { - result = 0; - goto out; - } - if (reader_state->next_index >= rl_size) { - result = -ERANGE; - goto out; - } + if (reader_state->generation_id != head->generation_id) { + pr_debug("read ptr is wrong generation: %u/%u", + reader_state->generation_id, head->generation_id); - if (read_count > written_count) { - /* This reader is somehow ahead of the writer. */ - pr_debug("incfs: Log reader is ahead of writer\n"); - *reader_state = live_state; + *reader_state = (struct read_log_state){ + .generation_id = head->generation_id, + }; } - if (written_count - read_count > rl_size) { - /* - * Reading pointer is too far behind, - * start from the record following the write pointer. - */ - pr_debug( - "incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", - (u32)reader_state->next_index, - (u32)reader_state->current_pass_no, - (u32)live_state.next_index, - (u32)live_state.current_pass_no - 1, (u32)rl_size); + if (reader_state->current_record_no < tail->current_record_no) { + pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n", + (u32)reader_state->next_offset, + (u32)reader_state->current_pass_no, + (u32)tail->next_offset, (u32)tail->current_pass_no); - *reader_state = (struct read_log_state){ - .next_index = live_state.next_index, - .current_pass_no = live_state.current_pass_no - 1, - }; + *reader_state = *tail; } for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { - if (reader_state->next_index == live_state.next_index && - reader_state->current_pass_no == live_state.current_pass_no) + if (reader_state->current_record_no == head->current_record_no) break; - fill_pending_read_from_log_record( - &reads[dst_idx], - &log->rl_ring_buf[reader_state->next_index], - reader_state, rl_size); + log_read_one_record(log, reader_state); - reader_state->next_index++; - if (reader_state->next_index == rl_size) { - reader_state->next_index = 0; - reader_state->current_pass_no++; - } + reads[dst_idx] = (struct incfs_pending_read_info){ + .file_id = reader_state->base_record.file_id, + .block_index = reader_state->base_record.block_index, + .serial_number = reader_state->current_record_no, + .timestamp_us = reader_state->base_record.absolute_ts_us + }; } - result = dst_idx; -out: - read_unlock(&log->rl_access_lock); - return result; + spin_unlock(&log->rl_lock); + return dst_idx; } bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index b860997d0bb6..b7aecdd5bf4a 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -20,50 +20,74 @@ #define SEGMENTS_PER_FILE 3 -struct read_log_record { - u32 block_index : 31; - - u32 timed_out : 1; - - u64 timestamp_us; +enum LOG_RECORD_TYPE { + FULL, + SAME_FILE, + SAME_FILE_NEXT_BLOCK, + SAME_FILE_NEXT_BLOCK_SHORT, +}; +struct full_record { + enum LOG_RECORD_TYPE type : 2; /* FULL */ + u32 block_index : 30; incfs_uuid_t file_id; -} __packed; + u64 absolute_ts_us; +} __packed; /* 28 bytes */ + +struct same_file_record { + enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */ + u32 block_index : 30; + u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */ +} __packed; /* 12 bytes */ + +struct same_file_next_block { + enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */ + u32 relative_ts_us : 30; /* max 2^30 us ~= 15 min (17:50) */ +} __packed; /* 4 bytes */ + +struct same_file_next_block_short { + enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK_SHORT */ + u16 relative_ts_us : 14; /* max 2^14 us ~= 16 ms */ +} __packed; /* 2 bytes */ + +union log_record { + struct full_record full_record; + struct same_file_record same_file_record; + struct same_file_next_block same_file_next_block; + struct same_file_next_block_short same_file_next_block_short; +}; struct read_log_state { /* Log buffer generation id, incremented on configuration changes */ - u32 generation_id : 8; + u32 generation_id; - /* Next slot in rl_ring_buf to write into. */ - u32 next_index : 24; + /* Offset in rl_ring_buf to write into. */ + u32 next_offset; /* Current number of writer passes over rl_ring_buf */ u32 current_pass_no; + + /* Current full_record to diff against */ + struct full_record base_record; + + /* Current record number counting from configuration change */ + u64 current_record_no; }; /* A ring buffer to save records about data blocks which were recently read. */ struct read_log { - struct read_log_record *rl_ring_buf; + void *rl_ring_buf; int rl_size; - struct read_log_state rl_state; + struct read_log_state rl_head; - /* - * A lock for _all_ accesses to the struct, to protect against remounts. - * Taken for writing when resizing the buffer. - */ - rwlock_t rl_access_lock; + struct read_log_state rl_tail; - /* - * A lock to protect the actual logging - adding a new record. - * Note: ALWAYS taken after and under the |rl_access_lock|. - */ - spinlock_t rl_logging_lock; + /* A lock to protect the above fields */ + spinlock_t rl_lock; - /* - * A queue of waiters who want to be notified about reads. - */ + /* A queue of waiters who want to be notified about reads */ wait_queue_head_t ml_notif_wq; }; @@ -281,7 +305,7 @@ int incfs_collect_logged_reads(struct mount_info *mi, int reads_size); struct read_log_state incfs_get_log_state(struct mount_info *mi); int incfs_get_uncollected_logs_count(struct mount_info *mi, - struct read_log_state state); + const struct read_log_state *state); static inline struct inode_info *get_incfs_node(struct inode *inode) { diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 0a13821f5b59..ec028fc96303 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -643,7 +643,7 @@ static __poll_t log_poll(struct file *file, poll_table *wait) __poll_t ret = 0; poll_wait(file, &mi->mi_log.ml_notif_wq, wait); - count = incfs_get_uncollected_logs_count(mi, log_state->state); + count = incfs_get_uncollected_logs_count(mi, &log_state->state); if (count >= mi->mi_options.read_log_wakeup_count) ret = EPOLLIN | EPOLLRDNORM; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 150bd41d33cb..0fc4c04c514a 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -1932,16 +1932,19 @@ static int hash_tree_test(char *mount_dir) return TEST_FAILURE; } +enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG }; + static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, - bool no_rlog) + enum expected_log expected_log) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; - struct incfs_pending_read_info prs[100] = {}; + struct incfs_pending_read_info prs[2048] = {}; int prs_size = ARRAY_SIZE(prs); int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int expected_read_block_cnt; int res; int read_count; - int i; + int i, j; char *filename = concat_file_name(mount_dir, file->name); int fd; @@ -1952,17 +1955,29 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, if (block_cnt > prs_size) block_cnt = prs_size; + expected_read_block_cnt = block_cnt; for (i = 0; i < block_cnt; i++) { res = pread(fd, data, sizeof(data), INCFS_DATA_FILE_BLOCK_SIZE * i); + + /* Make some read logs of type SAME_FILE_NEXT_BLOCK */ + if (i % 10 == 0) + usleep(20000); + + /* Skip some blocks to make logs of type SAME_FILE */ + if (i % 10 == 5) { + ++i; + --expected_read_block_cnt; + } + if (res <= 0) goto failure; } - read_count = - wait_for_pending_reads(log_fd, no_rlog ? 10 : 0, prs, prs_size); - if (no_rlog) { + read_count = wait_for_pending_reads( + log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size); + if (expected_log == NO_LOG) { if (read_count == 0) goto success; if (read_count < 0) @@ -1979,14 +1994,26 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, goto failure; } - if (read_count != block_cnt) { + i = 0; + if (expected_log == PARTIAL_LOG) { + if (read_count == 0) { + ksft_print_msg("No logs %s.\n", file->name); + goto failure; + } + + for (i = 0, j = 0; j < expected_read_block_cnt - read_count; + i++, j++) + if (i % 10 == 5) + ++i; + + } else if (read_count != expected_read_block_cnt) { ksft_print_msg("Bad log read count %s %d %d.\n", file->name, - read_count, block_cnt); + read_count, expected_read_block_cnt); goto failure; } - for (i = 0; i < read_count; i++) { - struct incfs_pending_read_info *read = &prs[i]; + for (j = 0; j < read_count; i++, j++) { + struct incfs_pending_read_info *read = &prs[j]; if (!same_id(&read->file_id, &file->id)) { ksft_print_msg("Bad log read ino %s\n", file->name); @@ -1999,8 +2026,8 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, goto failure; } - if (i != 0) { - unsigned long psn = prs[i - 1].serial_number; + if (j != 0) { + unsigned long psn = prs[j - 1].serial_number; if (read->serial_number != psn + 1) { ksft_print_msg("Bad log read sn %s %d %d.\n", @@ -2015,6 +2042,9 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, file->name); goto failure; } + + if (i % 10 == 5) + ++i; } success: @@ -2065,7 +2095,7 @@ static int read_log_test(char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, false)) + if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) goto failure; } @@ -2093,7 +2123,7 @@ static int read_log_test(char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, false)) + if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) goto failure; } @@ -2120,7 +2150,30 @@ static int read_log_test(char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, true)) + if (validate_logs(mount_dir, log_fd, file, NO_LOG)) + goto failure; + } + + /* + * Remount and check that logs start working again + */ + drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); + if (drop_caches == -1) + goto failure; + i = write(drop_caches, "3", 1); + close(drop_caches); + if (i != 1) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1", + true) != 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG)) goto failure; } @@ -2143,7 +2196,7 @@ static int read_log_test(char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, false)) + if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) goto failure; } @@ -2294,8 +2347,6 @@ static int validate_ranges(const char *mount_dir, struct test_file *file) if (fba.start_index >= block_cnt) { if (fba.index_out != fba.start_index) { - printf("Paul: %d, %d\n", (int)fba.index_out, - (int)fba.start_index); error = -EINVAL; goto out; } @@ -2647,7 +2698,7 @@ int main(int argc, char *argv[]) rmdir(mount_dir); if (fails > 0) - ksft_exit_pass(); + ksft_exit_fail(); else ksft_exit_pass(); return 0; -- GitLab From 8654841a61c359ac26cd3634ea2081436736d113 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 20 Sep 2018 10:12:53 -0700 Subject: [PATCH 1083/1278] UPSTREAM: sysrq: Use panic() to force a crash (Upstream commit 8341f2f222d729688014ce8306727fdb9798d37e.) sysrq_handle_crash() currently forces a crash by dereferencing a NULL pointer, which is undefined behavior in C. Just call panic() instead, which is simpler and doesn't depend on compiler specific handling of the undefined behavior. Remove the comment on why the RCU lock needs to be released, it isn't accurate anymore since the crash now isn't handled by the page fault handler (for reference: the comment was added by commit 984cf355aeaa ("sysrq: Fix warning in sysrq generated crash.")). Releasing the lock is still good practice though. Suggested-by: Greg Kroah-Hartman Signed-off-by: Matthias Kaehlcke Signed-off-by: Greg Kroah-Hartman Bug: 154769329 Change-Id: I04d57e4d84f59b04454fb77f4f62e9d9d6eee10d Signed-off-by: Andrey Konovalov --- drivers/tty/sysrq.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 4c716ddd6599..ffe78c36915e 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -134,17 +134,10 @@ static struct sysrq_key_op sysrq_unraw_op = { static void sysrq_handle_crash(int key) { - char *killer = NULL; - - /* we need to release the RCU read lock here, - * otherwise we get an annoying - * 'BUG: sleeping function called from invalid context' - * complaint from the kernel before the panic. - */ + /* release the RCU read lock before crashing */ rcu_read_unlock(); - panic_on_oops = 1; /* force panic */ - wmb(); - *killer = 1; + + panic("sysrq triggered crash\n"); } static struct sysrq_key_op sysrq_crash_op = { .handler = sysrq_handle_crash, -- GitLab From 69b005be6745d5779c33e2c93d0eb5f8fa3469b7 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Thu, 23 Apr 2020 15:36:45 -0700 Subject: [PATCH 1084/1278] ANDROID: drm/virtio: rebase to latest virgl/drm-misc-next (take 2) So ToT + v4.14 can share same hypercall interface. TEST=compile Signed-off-by: Gurchetan Singh Signed-off-by: Lingfeng Yang Bug: 153580313 Change-Id: Ifd6d8d50983461380f24988cef5a0435a2ecc59b --- drivers/gpu/drm/virtio/virtgpu_drv.h | 10 +++---- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 38 ++++++++++++++++++------- drivers/gpu/drm/virtio/virtgpu_kms.c | 32 ++++++++++++--------- drivers/gpu/drm/virtio/virtgpu_object.c | 25 ++++++++-------- drivers/gpu/drm/virtio/virtgpu_ttm.c | 7 +++-- drivers/gpu/drm/virtio/virtgpu_vq.c | 11 +++---- include/uapi/drm/virtgpu_drm.h | 35 ++++++++++++----------- include/uapi/linux/virtio_gpu.h | 38 +++++++++++-------------- 8 files changed, 110 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index cb4ec764b522..9db4d6381bf1 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -59,7 +59,7 @@ struct virtio_gpu_object_params { /* 3d */ bool virgl; bool blob; - uint32_t blob_flags; + uint32_t blob_mem; uint32_t target; uint32_t bind; uint32_t depth; @@ -90,7 +90,7 @@ struct virtio_gpu_object { struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; bool created; - uint32_t blob_flags; + uint32_t blob_mem; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, gem_base) @@ -369,9 +369,9 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, - uint32_t ctx_id, uint32_t flags, - uint64_t size, uint64_t memory_id, - uint32_t nents, + uint32_t ctx_id, uint32_t blob_mem, + uint32_t blob_flags, uint64_t blob_id, + uint64_t size, uint32_t nents, struct virtio_gpu_mem_entry *ents); void virtio_gpu_cmd_map(struct virtio_gpu_device *vgdev, diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index df0a86e3562c..b8daaebc5dd6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -585,6 +585,7 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { void *buf; + uint32_t device_blob_mem = 0; int ret, si, nents; uint32_t handle = 0; struct scatterlist *sg; @@ -596,13 +597,29 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_fpriv *vfpriv = file->driver_priv; bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); - bool mappable = rc_blob->flags & VIRTGPU_RES_BLOB_USE_MAPPABLE; - bool guest = rc_blob->flags & VIRTGPU_RES_BLOB_GUEST_MASK; + bool mappable = rc_blob->blob_flags & VIRTGPU_BLOB_FLAG_MAPPABLE; + bool has_guest = (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_GUEST || + rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST); params.size = rc_blob->size; - params.blob_flags = rc_blob->flags; + params.blob_mem = rc_blob->blob_mem; params.blob = true; + if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_GUEST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_GUEST; + + if (vgdev->has_virgl_3d) { + if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOST3D; + else if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST; + } else { + if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOSTSYS; + else if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOSTSYS_GUEST; + } + if (rc_blob->cmd_size && vfpriv) { void *buf; void __user *cmd = u64_to_user_ptr(rc_blob->cmd); @@ -630,7 +647,7 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, goto err_free_obj; } - if (!guest) { + if (!has_guest) { nents = 0; } else if (use_dma_api) { obj->mapped = dma_map_sg(vgdev->vdev->dev.parent, @@ -642,7 +659,7 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, } ents = kzalloc(nents * sizeof(struct virtio_gpu_mem_entry), GFP_KERNEL); - if (guest) { + if (has_guest) { for_each_sg(obj->pages->sgl, sg, nents, si) { ents[si].addr = cpu_to_le64(use_dma_api ? sg_dma_address(sg) @@ -659,17 +676,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, } virtio_gpu_cmd_resource_create_blob(vgdev, obj, vfpriv->ctx_id, - rc_blob->flags, rc_blob->size, - rc_blob->memory_id, nents, - ents); + device_blob_mem, + rc_blob->blob_flags, + rc_blob->blob_id, + rc_blob->size, + nents, ents); ret = drm_gem_handle_create(file, &obj->gem_base, &handle); if (ret) goto err_fence_put; - if (!guest && mappable) { + if (!has_guest && mappable) virtio_gpu_cmd_map(vgdev, obj, obj->tbo.offset, fence); - } /* * No need to call virtio_gpu_object_reserve since the buffer is not diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 22434e34b4d8..b96a19005fb5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -185,20 +185,24 @@ int virtio_gpu_init(struct drm_device *dev) } if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_BLOB)) { - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_HOST_VISIBLE)) { - vgdev->cbar = 4; - vgdev->caddr = pci_resource_start(dev->pdev, vgdev->cbar); - vgdev->csize = pci_resource_len(dev->pdev, vgdev->cbar); - ret = pci_request_region(dev->pdev, vgdev->cbar, "virtio-gpu-coherent"); - if (ret != 0) { - DRM_WARN("Cannot request coherent memory bar\n"); - } else { - DRM_INFO("coherent host resources enabled, using %s bar %d," - "at 0x%lx, size %ld MB", dev_name(&dev->pdev->dev), - vgdev->cbar, vgdev->caddr, vgdev->csize >> 20); - - vgdev->has_host_visible = true; - } + vgdev->cbar = 4; + vgdev->caddr = pci_resource_start(dev->pdev, vgdev->cbar); + vgdev->csize = pci_resource_len(dev->pdev, vgdev->cbar); + ret = pci_request_region( + dev->pdev, + vgdev->cbar, + "virtio-gpu-coherent"); + if (ret != 0) { + DRM_WARN("Cannot request coherent memory bar\n"); + } else { + DRM_INFO("coherent host resources enabled\n"); + DRM_INFO( + "using %s bar %d, at 0x%lx, size %ld MB\n", + dev_name(&dev->pdev->dev), + vgdev->cbar, + vgdev->caddr, + vgdev->csize >> 20); + vgdev->has_host_visible = true; } vgdev->has_resource_blob = true; diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 67d52189cfb8..e442928d2848 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -95,18 +95,19 @@ static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) } // define internally for testing purposes -#define VIRTGPU_RESOURCE_CACHE_MASK 0xf000 -#define VIRTGPU_RESOURCE_CACHE_CACHED 0x1000 -#define VIRTGPU_RESOURCE_CACHE_UNCACHED 0x2000 -#define VIRTGPU_RESOURCE_CACHE_WC 0x3000 +#define VIRTGPU_BLOB_MEM_CACHE_MASK 0xf000 +#define VIRTGPU_BLOB_MEM_CACHE_CACHED 0x1000 +#define VIRTGPU_BLOB_MEM_CACHE_UNCACHED 0x2000 +#define VIRTGPU_BLOB_MEM_CACHE_WC 0x3000 static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo) { u32 c = 1; u32 ttm_caching_flags = 0; - u32 cache_type = (vgbo->blob_flags & VIRTGPU_RESOURCE_CACHE_MASK); - u32 guest = (vgbo->blob_flags & VIRTGPU_RES_BLOB_GUEST_MASK); + u32 cache_type = (vgbo->blob_mem & VIRTGPU_BLOB_MEM_CACHE_MASK); + bool has_guest = (vgbo->blob_mem == VIRTGPU_BLOB_MEM_GUEST || + vgbo->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST); vgbo->placement.placement = &vgbo->placement_code; vgbo->placement.busy_placement = &vgbo->placement_code; @@ -114,20 +115,20 @@ static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo) vgbo->placement_code.lpfn = 0; switch (cache_type) { - case VIRTGPU_RESOURCE_CACHE_CACHED: + case VIRTGPU_BLOB_MEM_CACHE_CACHED: ttm_caching_flags = TTM_PL_FLAG_CACHED; break; - case VIRTGPU_RESOURCE_CACHE_WC: + case VIRTGPU_BLOB_MEM_CACHE_WC: ttm_caching_flags = TTM_PL_FLAG_WC; break; - case VIRTGPU_RESOURCE_CACHE_UNCACHED: + case VIRTGPU_BLOB_MEM_CACHE_UNCACHED: ttm_caching_flags = TTM_PL_FLAG_UNCACHED; break; default: ttm_caching_flags = TTM_PL_MASK_CACHING; } - if (!guest && vgbo->blob) { + if (!has_guest && vgbo->blob) { vgbo->placement_code.flags = ttm_caching_flags | TTM_PL_FLAG_VRAM | TTM_PL_FLAG_NO_EVICT; @@ -172,11 +173,11 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, } bo->dumb = params->dumb; bo->blob = params->blob; - bo->blob_flags = params->blob_flags; + bo->blob_mem = params->blob_mem; if (params->virgl) { virtio_gpu_cmd_resource_create_3d(vgdev, bo, params, fence); - } else { + } else if (params->dumb) { virtio_gpu_cmd_create_resource(vgdev, bo, params, fence); } diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index 5202fc4f51cd..6c1f02b4f17c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -349,7 +349,7 @@ static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, struct virtio_gpu_device *vgdev; struct virtio_gpu_object *obj; struct virtio_gpu_ttm_tt *gtt; - uint32_t guest; + uint32_t has_guest; vgdev = virtio_gpu_get_vgdev(bo->bdev); obj = container_of(bo, struct virtio_gpu_object, tbo); @@ -358,9 +358,10 @@ static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, if (gtt == NULL) return NULL; gtt->obj = obj; - guest = (obj->blob_flags & VIRTGPU_RES_BLOB_GUEST_MASK); + has_guest = (obj->blob_mem == VIRTGPU_BLOB_MEM_GUEST || + obj->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST); - if (!guest && obj->blob) { + if (!has_guest && obj->blob) { gtt->ttm.ttm.func = &virtio_gpu_vram_func; if (ttm_tt_init(>t->ttm.ttm, bo->bdev, size, page_flags, dummy_read_page)) { diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index cb54e1ac5343..420db70018dc 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -1150,9 +1150,9 @@ void virtio_gpu_cmd_unmap(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, - uint32_t ctx_id, uint32_t flags, - uint64_t size, uint64_t memory_id, - uint32_t nents, + uint32_t ctx_id, uint32_t blob_mem, + uint32_t blob_flags, uint64_t blob_id, + uint64_t size, uint32_t nents, struct virtio_gpu_mem_entry *ents) { struct virtio_gpu_resource_create_blob *cmd_p; @@ -1164,9 +1164,10 @@ virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB); cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); - cmd_p->flags = cpu_to_le32(flags); + cmd_p->blob_mem = cpu_to_le32(blob_mem); + cmd_p->blob_flags = cpu_to_le32(blob_flags); + cmd_p->blob_id = cpu_to_le64(blob_id); cmd_p->size = cpu_to_le64(size); - cmd_p->memory_id = cpu_to_le64(memory_id); cmd_p->nr_entries = cpu_to_le32(nents); vbuf->data_buf = ents; diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 6b9c8a5f87e7..00da2d183dca 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -70,6 +70,7 @@ struct drm_virtgpu_execbuffer { __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ }; + #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ #define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ @@ -150,26 +151,28 @@ struct drm_virtgpu_get_caps { }; struct drm_virtgpu_resource_create_blob { -#define VIRTGPU_RES_BLOB_GUEST_MASK 0x000f -#define VIRTGPU_RES_BLOB_GUEST_NONE 0x0000 -#define VIRTGPU_RES_BLOB_GUEST_SYSTEM 0x0001 - -#define VIRTGPU_RES_BLOB_HOST_MASK 0x00f0 -#define VIRTGPU_RES_BLOB_HOST_NONE 0x0000 -#define VIRTGPU_RES_BLOB_HOST 0x0010 - -#define VIRTGPU_RES_BLOB_USE_MASK 0x0f00 -#define VIRTGPU_RES_BLOB_USE_NONE 0x0000 -#define VIRTGPU_RES_BLOB_USE_MAPPABLE 0x0100 -#define VIRTGPU_RES_BLOB_USE_SHAREABLE 0x0200 -#define VIRTGPU_RES_BLOB_USE_CROSS_DEVICE 0x0400 - __u32 flags; +#define VIRTGPU_BLOB_MEM_GUEST 0x0001 +#define VIRTGPU_BLOB_MEM_HOST 0x0002 +#define VIRTGPU_BLOB_MEM_HOST_GUEST 0x0003 + +#define VIRTGPU_BLOB_FLAG_MAPPABLE 0x0001 +#define VIRTGPU_BLOB_FLAG_SHAREABLE 0x0002 +#define VIRTGPU_BLOB_FLAG_CROSS_DEVICE 0x0004 + /* zero is invalid blob_mem */ + __u32 blob_mem; + __u32 blob_flags; __u32 bo_handle; __u32 res_handle; + __u64 size; + + /* + * for 3D contexts with VIRTGPU_BLOB_MEM_HOSTGUEST and + * VIRTGPU_BLOB_MEM_HOST otherwise, must be zero. + */ + __u32 pad; __u32 cmd_size; __u64 cmd; - __u64 size; - __u64 memory_id; + __u64 blob_id; }; #define DRM_IOCTL_VIRTGPU_MAP \ diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index cd303076225e..f44af31f7990 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -84,6 +84,9 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, + VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, + VIRTIO_GPU_CMD_RESOURCE_MAP, + VIRTIO_GPU_CMD_RESOURCE_UNMAP, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -94,9 +97,6 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D, VIRTIO_GPU_CMD_SUBMIT_3D, - VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, - VIRTIO_GPU_CMD_RESOURCE_MAP, - VIRTIO_GPU_CMD_RESOURCE_UNMAP, /* cursor commands */ VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300, @@ -113,7 +113,6 @@ enum virtio_gpu_ctrl_type { /* CHROMIUM: legacy responses */ VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY = 0x1104, - /* CHROMIUM: success responses */ VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO = 0x11FF, @@ -303,6 +302,7 @@ struct virtio_gpu_cmd_submit { }; #define VIRTIO_GPU_CAPSET_VIRGL 1 +#define VIRTIO_GPU_CAPSET_VIRGL2 2 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { @@ -393,29 +393,25 @@ struct virtio_gpu_resp_resource_uuid { __u8 uuid[16]; }; - /* VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */ struct virtio_gpu_resource_create_blob { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; -#define VIRTIO_GPU_RES_BLOB_GUEST_MASK 0x000f -#define VIRTIO_GPU_RES_BLOB_GUEST_NONE 0x0000 -#define VIRTIO_GPU_RES_BLOB_GUEST_SYSTEM 0x0001 - -#define VIRTIO_GPU_RES_BLOB_HOST_MASK 0x00f0 -#define VIRTIO_GPU_RES_BLOB_HOST_NONE 0x0000 -#define VIRTIO_GPU_RES_BLOB_HOST 0x0010 - -#define VIRTIO_GPU_RES_BLOB_USE_MASK 0x0f00 -#define VIRTIO_GPU_RES_BLOB_USE_NONE 0x0000 -#define VIRTIO_GPU_RES_BLOB_USE_MAPPABLE 0x0100 -#define VIRTIO_GPU_RES_BLOB_USE_SHAREABLE 0x0200 -#define VIRTIO_GPU_RES_BLOB_USE_CROSS_DEVICE 0x0400 - __le32 flags; +#define VIRTIO_GPU_BLOB_MEM_GUEST 0x0001 +#define VIRTIO_GPU_BLOB_MEM_HOST3D 0x0002 +#define VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST 0x0003 +#define VIRTIO_GPU_BLOB_MEM_HOSTSYS 0x0004 +#define VIRTIO_GPU_BLOB_MEM_HOSTSYS_GUEST 0x0005 + +#define VIRTIO_GPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIRTIO_GPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +#define VIRTIO_GPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 + /* zero is invalid blob mem */ + __le32 blob_mem; + __le32 blob_flags; + __le64 blob_id; __le64 size; - __le64 memory_id; __le32 nr_entries; - __le32 padding; /* * sizeof(nr_entries * virtio_gpu_mem_entry) bytes follow */ -- GitLab From 8c25f26eadb54ca00a0340e79dd31c516d6ff708 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 28 Apr 2020 11:24:20 -0700 Subject: [PATCH 1085/1278] ANDROID: cfi: remove unnecessary include This change makes it possible to compile CFI for x86, which doesn't provide this header file. Bug: 145297900 Change-Id: I60ad190bb0c2296b67eef2194b72f381e7f94e2c Signed-off-by: Sami Tolvanen --- kernel/cfi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/cfi.c b/kernel/cfi.c index 967b0755c00e..b23f6ede42ca 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -12,7 +12,6 @@ #include #include #include -#include #include /* Compiler-defined handler names */ -- GitLab From 194910dc6bf05f75800e172d1dc6f03df9fec94d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 28 Apr 2020 11:24:38 -0700 Subject: [PATCH 1086/1278] ANDROID: kbuild: export LTO and CFI flags Export the compiler flags for LTO and CFI, so we can filter them out elsewhere when needed. Bug: 145297900 Change-Id: I873d0f9ebef8f510eb5b56ab2e1888324f4bb84d Signed-off-by: Sami Tolvanen --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index e6d391a1e946..6d8d5c2e79f9 100644 --- a/Makefile +++ b/Makefile @@ -855,11 +855,11 @@ export DISABLE_LTO_CLANG endif ifdef CONFIG_LTO -lto-flags := $(lto-clang-flags) -KBUILD_CFLAGS += $(lto-flags) +LTO_CFLAGS := $(lto-clang-flags) +KBUILD_CFLAGS += $(LTO_CFLAGS) DISABLE_LTO := $(DISABLE_LTO_CLANG) -export DISABLE_LTO +export LTO_CFLAGS DISABLE_LTO # LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override # the linker and flags for vmlinux_link. @@ -885,12 +885,12 @@ endif ifdef CONFIG_CFI # cfi-flags are re-tested in prepare-compiler-check -cfi-flags := $(cfi-clang-flags) -KBUILD_CFLAGS += $(cfi-flags) +CFI_CFLAGS := $(cfi-clang-flags) +KBUILD_CFLAGS += $(CFI_CFLAGS) DISABLE_CFI := $(DISABLE_CFI_CLANG) DISABLE_LTO += $(DISABLE_CFI) -export DISABLE_CFI +export CFI_CFLAGS DISABLE_CFI endif ifdef CONFIG_SHADOW_CALL_STACK -- GitLab From 5d8fc94e6f897323e10912667987f6a91b092f0f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 29 Apr 2020 17:28:46 -0700 Subject: [PATCH 1087/1278] ANDROID: kbuild: don't select LD_DEAD_CODE_DATA_ELIMINATION with LTO Instead of enabling -gc-sections, which can break the kernel by dropping sections that are actually needed, just merge the sections also when LTO is enabled. Bug: 145297900 Change-Id: I86aecd543d303eea4d30e7ce49cf5fcb3ed24307 Signed-off-by: Sami Tolvanen --- arch/Kconfig | 1 - include/asm-generic/vmlinux.lds.h | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index b27eac589618..defcb5a00417 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -650,7 +650,6 @@ config LTO_CLANG depends on !KASAN select LTO select THIN_ARCHIVES - select LD_DEAD_CODE_DATA_ELIMINATION help This option enables clang's Link Time Optimization (LTO), which allows the compiler to optimize the kernel globally at link time. If you diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e60ea67cffa4..3972a2a90268 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -65,11 +65,11 @@ * .data. We don't want to pull in .data..other sections, which Linux * has defined. Same for text and bss. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define TEXT_CFI_MAIN .text.cfi .text.[0-9a-zA-Z_]*.cfi -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..compoundliteral* .data..L* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* .bss..L* #else #define TEXT_MAIN .text #define TEXT_CFI_MAIN .text.cfi -- GitLab From 5199ee3773eef6da478fc0741bd3e70a43d92c25 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 15 Apr 2019 09:49:56 -0700 Subject: [PATCH 1088/1278] UPSTREAM: x86/build/lto: Fix truncated .bss with -fdata-sections With CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y, we compile the kernel with -fdata-sections, which also splits the .bss section. The new section, with a new .bss.* name, which pattern gets missed by the main x86 linker script which only expects the '.bss' name. This results in the discarding of the second part and a too small, truncated .bss section and an unhappy, non-working kernel. Use the common BSS_MAIN macro in the linker script to properly capture and merge all the generated BSS sections. Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Borislav Petkov Cc: Kees Cook Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190415164956.124067-1-samitolvanen@google.com [ Extended the changelog. ] Signed-off-by: Ingo Molnar (cherry picked from commit 6a03469a1edc94da52b65478f1e00837add869a3) Bug: 145297900 Change-Id: Idb0369583e5a0671364e15b3aaa347c372f31dd4 Signed-off-by: Sami Tolvanen --- arch/x86/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 2384a2ae5ec3..b2f6e4f3e927 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -352,7 +352,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) - *(.bss) + *(BSS_MAIN) . = ALIGN(PAGE_SIZE); __bss_stop = .; } -- GitLab From 756ba7f0f59cfde43c13c9dd4841db9a907bcc27 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 2 Apr 2019 11:59:46 -0700 Subject: [PATCH 1089/1278] ANDROID: x86/cpu/vmware: use the full form of inl in VMWARE_PORT LLVM's assembler doesn't accept the short form inl (%%dx) instruction, but instead insists on the output register to be explicitly specified: :1:7: error: invalid operand for instruction inl (%dx) ^ LLVM ERROR: Error parsing inline asm Bug: 133186739 Bug: 145297900 Change-Id: I0519034f4a66bd72f23d206d4638578836a49ff5 Signed-off-by: Sami Tolvanen --- arch/x86/kernel/cpu/vmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d805202c63cd..917840ed5fe4 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -45,7 +45,7 @@ #define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx)" : \ + __asm__("inl (%%dx), %%eax" : \ "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ "0"(VMWARE_HYPERVISOR_MAGIC), \ "1"(VMWARE_PORT_CMD_##cmd), \ -- GitLab From ac2a6090821950b36343cbad363ccb6445de9cfe Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 16 Apr 2019 14:40:00 -0700 Subject: [PATCH 1090/1278] ANDROID: x86/vdso: disable LTO only for VDSO Instead of disabling LTO for all the code in the directory, only disable it for the VDSO itself. Bug: 133186739 Bug: 145297900 Change-Id: I02e34b75e022982f9884e238aec89486890dd4bb Signed-off-by: Sami Tolvanen --- arch/x86/entry/vdso/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index ab7f730cf7f2..d64dd8cbee3b 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,7 +3,6 @@ # Building vDSO images for x86. # -KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y @@ -72,7 +71,7 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(DISABLE_LTO) ifdef CONFIG_RETPOLINE ifneq ($(RETPOLINE_VDSO_CFLAGS),) @@ -150,6 +149,8 @@ KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(LTO_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CFI_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) -- GitLab From 1c7e2378b4a1b6861fbe45ded9ee34c61e64d528 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 10 Apr 2019 14:17:20 -0700 Subject: [PATCH 1091/1278] ANDROID: x86: add support for CONFIG_LTO_CLANG Bug: 133186739 Bug: 145297900 Change-Id: I8e7a8976f9381233f28badc324145b9c4c7eacb0 Signed-off-by: Sami Tolvanen --- arch/x86/Kconfig | 1 + arch/x86/Makefile | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fdc42d2bf7d9..eec5beff8a44 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_LTO_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 62ec0e709e8a..6dabb49af3f5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -235,6 +235,11 @@ ifdef CONFIG_X86_64 LDFLAGS += $(call ld-option, -z max-page-size=0x200000) endif +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ + -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release -- GitLab From 63d9ea52d9b9ef7dd37c7c6b2a95e630e503a092 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 30 Apr 2019 13:23:12 -0700 Subject: [PATCH 1092/1278] ANDROID: x86: disable STACK_VALIDATION with LTO_CLANG Disable CONFIG_STACK_VALIDATION with LTO, because objtool doesn't understand LLVM IR. Bug: 133186739 Bug: 145297900 Change-Id: I3f2bd3f0ce6c80f88bd00394453b543f884188f2 Signed-off-by: Sami Tolvanen --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index eec5beff8a44..a9b831061e84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -173,7 +173,7 @@ config X86 select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION - select HAVE_STACK_VALIDATION if X86_64 + select HAVE_STACK_VALIDATION if X86_64 && !LTO_CLANG select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER -- GitLab From de9dea05d66b2fd886a6997f0b7f87d14002fd64 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 29 Oct 2019 14:13:37 -0700 Subject: [PATCH 1093/1278] BACKPORT: x86/vmlinux: Actually use _etext for the end of the text segment Various calculations are using the end of the exception table (which does not need to be executable) as the end of the text segment. Instead, in preparation for moving the exception table into RO_DATA, move _etext after the exception table and update the calculations. Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dave Hansen Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-ia64@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: Michael Ellerman Cc: Michal Simek Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Ross Zwisler Cc: Segher Boessenkool Cc: Thomas Gleixner Cc: Thomas Lendacky Cc: Will Deacon Cc: x86-ml Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20191029211351.13243-16-keescook@chromium.org (cherry picked from commit b907693883fdcff5b492cf0cd02a0e264623055e) Bug: 145297900 Change-Id: I922603ddd5c9db930059c682526c51d2181b858a Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/sections.h | 1 - arch/x86/kernel/vmlinux.lds.S | 6 ++---- arch/x86/mm/init_64.c | 6 +++--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 5c019d23d06b..524e979c642c 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -6,7 +6,6 @@ #include extern char __brk_base[], __brk_limit[]; -extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index b2f6e4f3e927..756dcb95dbed 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -131,16 +131,14 @@ SECTIONS *(.text.__x86.indirect_thunk) __indirect_thunk_end = .; #endif - - /* End of text section */ - _etext = .; } :text = 0x9090 NOTES :text :note EXCEPTION_TABLE(16) :text = 0x9090 - /* .text should occupy whole number of pages */ + /* End of text section, which should occupy whole number of pages */ + _etext = .; . = ALIGN(PAGE_SIZE); X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 624edfbff02d..4937d6f7c256 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1184,7 +1184,7 @@ int kernel_set_to_readonly; void set_kernel_text_rw(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1203,7 +1203,7 @@ void set_kernel_text_rw(void) void set_kernel_text_ro(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1222,7 +1222,7 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long rodata_start = PFN_ALIGN(__start_rodata); unsigned long end = (unsigned long) &__end_rodata_hpage_align; - unsigned long text_end = PFN_ALIGN(&__stop___ex_table); + unsigned long text_end = PFN_ALIGN(&_etext); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); unsigned long all_end; -- GitLab From 593d68055d913a8a68f866d25a57419384ca95aa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Sep 2019 22:35:27 +0200 Subject: [PATCH 1094/1278] UPSTREAM: crypto: x86 - Rename functions to avoid conflict with crypto/sha256.h Rename static / file-local functions so that they do not conflict with the functions declared in crypto/sha256.h. This is a preparation patch for folding crypto/sha256.h into crypto/sha.h. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu (cherry picked from commit eb7d6ba882f1c508f5387416375d4c2f99debc7b) Bug: 145297900 Change-Id: I71c660caea1b28089d57e85ee94aad454e0e8d02 Signed-off-by: Sami Tolvanen --- arch/x86/crypto/sha256_ssse3_glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 9e79baf03a4b..d291795862f8 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -44,8 +44,8 @@ asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); -static int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha256_transform_fn *sha256_xform) +static int _sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_transform_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -83,7 +83,7 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data, static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_ssse3); + return _sha256_update(desc, data, len, sha256_transform_ssse3); } static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, @@ -152,7 +152,7 @@ asmlinkage void sha256_transform_avx(u32 *digest, const char *data, static int sha256_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_avx); + return _sha256_update(desc, data, len, sha256_transform_avx); } static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, @@ -236,7 +236,7 @@ asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_rorx); + return _sha256_update(desc, data, len, sha256_transform_rorx); } static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, @@ -318,7 +318,7 @@ asmlinkage void sha256_ni_transform(u32 *digest, const char *data, static int sha256_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_ni_transform); + return _sha256_update(desc, data, len, sha256_ni_transform); } static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, -- GitLab From 218a1cac4902570988d0a72afcd9424665619661 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Jan 2020 19:57:29 -0800 Subject: [PATCH 1095/1278] FROMLIST: crypto, x86/sha: Eliminate casts on asm implementations In order to avoid CFI function prototype mismatches, this removes the casts on assembly implementations of sha1/256/512 accelerators. The safety checks from BUILD_BUG_ON() remain. Additionally, this renames various arguments for clarity, as suggested by Eric Biggers. Signed-off-by: Kees Cook (am from https://lore.kernel.org/patchwork/patch/1179963/) Link: https://lore.kernel.org/lkml/202001141955.C4136E9C5@keescook Bug: 145297900 Change-Id: I331d710f11775b66229acedc59861876a2d6708b Signed-off-by: Sami Tolvanen --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 6 +-- arch/x86/crypto/sha1_ssse3_asm.S | 14 ++++-- arch/x86/crypto/sha1_ssse3_glue.c | 70 +++++++++++--------------- arch/x86/crypto/sha256-avx-asm.S | 4 +- arch/x86/crypto/sha256-avx2-asm.S | 4 +- arch/x86/crypto/sha256-ssse3-asm.S | 6 ++- arch/x86/crypto/sha256_ssse3_glue.c | 34 ++++++------- arch/x86/crypto/sha512-avx-asm.S | 11 ++-- arch/x86/crypto/sha512-avx2-asm.S | 11 ++-- arch/x86/crypto/sha512-ssse3-asm.S | 13 +++-- arch/x86/crypto/sha512_ssse3_glue.c | 31 ++++++------ 11 files changed, 102 insertions(+), 102 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 9f712a7dfd79..7e578fa5d0a7 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -62,11 +62,11 @@ *Visit http://software.intel.com/en-us/articles/ *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ * - *Updates 20-byte SHA-1 record in 'hash' for even number of - *'num_blocks' consecutive 64-byte blocks + *Updates 20-byte SHA-1 record at start of 'state', from 'input', for + *even number of 'blocks' consecutive 64-byte blocks. * *extern "C" void sha1_transform_avx2( - * int *hash, const char* input, size_t num_blocks ); + * struct sha1_state *state, const u8* input, int blocks ); */ #include diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 6204bd53528c..5fc0bf7e6a03 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -461,9 +461,13 @@ W_PRECALC_SSSE3 movdqu \a,\b .endm -/* SSSE3 optimized implementation: - * extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); +/* + * SSSE3 optimized implementation: + * + * extern "C" void sha1_transform_ssse3(struct sha1_state *state, + * const u8 *data, int blocks); + * + * Note that struct sha1_state is assumed to begin with u32 state[5]. */ SHA1_VECTOR_ASM sha1_transform_ssse3 @@ -549,8 +553,8 @@ W_PRECALC_AVX /* AVX optimized implementation: - * extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); + * extern "C" void sha1_transform_avx(struct sha1_state *state, + * const u8 *data, int blocks); */ SHA1_VECTOR_ASM sha1_transform_avx diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index fc61739150e7..9691962756a7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -31,11 +31,8 @@ #include #include -typedef void (sha1_transform_fn)(u32 *digest, const char *data, - unsigned int rounds); - static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha1_transform_fn *sha1_xform) + unsigned int len, sha1_block_fn *sha1_xform) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -43,48 +40,47 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); - /* make sure casting to sha1_block_fn() is safe */ + /* + * Make sure struct sha1_state begins directly with the SHA1 + * 160-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); kernel_fpu_begin(); - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_xform); + sha1_base_do_update(desc, data, len, sha1_xform); kernel_fpu_end(); return 0; } static int sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) + unsigned int len, u8 *out, sha1_block_fn *sha1_xform) { if (!irq_fpu_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_xform); - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); + sha1_base_do_update(desc, data, len, sha1_xform); + sha1_base_do_finalize(desc, sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } -asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_ssse3(struct sha1_state *state, + const u8 *data, int blocks); static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_transform_ssse3); + return sha1_update(desc, data, len, sha1_transform_ssse3); } static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_transform_ssse3); + return sha1_finup(desc, data, len, out, sha1_transform_ssse3); } /* Add padding and return the message digest. */ @@ -124,21 +120,19 @@ static void unregister_sha1_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha1_transform_avx(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_avx(struct sha1_state *state, + const u8 *data, int blocks); static int sha1_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_transform_avx); + return sha1_update(desc, data, len, sha1_transform_avx); } static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_transform_avx); + return sha1_finup(desc, data, len, out, sha1_transform_avx); } static int sha1_avx_final(struct shash_desc *desc, u8 *out) @@ -196,8 +190,8 @@ static inline void unregister_sha1_avx(void) { } #if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ -asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks); static bool avx2_usable(void) { @@ -209,28 +203,26 @@ static bool avx2_usable(void) return false; } -static void sha1_apply_transform_avx2(u32 *digest, const char *data, - unsigned int rounds) +static void sha1_apply_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks) { /* Select the optimal transform based on data block size */ - if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) - sha1_transform_avx2(digest, data, rounds); + if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(state, data, blocks); else - sha1_transform_avx(digest, data, rounds); + sha1_transform_avx(state, data, blocks); } static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_apply_transform_avx2); + return sha1_update(desc, data, len, sha1_apply_transform_avx2); } static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_apply_transform_avx2); + return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2); } static int sha1_avx2_final(struct shash_desc *desc, u8 *out) @@ -274,21 +266,19 @@ static inline void unregister_sha1_avx2(void) { } #endif #ifdef CONFIG_AS_SHA1_NI -asmlinkage void sha1_ni_transform(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, + int rounds); static int sha1_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_ni_transform); + return sha1_update(desc, data, len, sha1_ni_transform); } static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_ni_transform); + return sha1_finup(desc, data, len, out, sha1_ni_transform); } static int sha1_ni_final(struct shash_desc *desc, u8 *out) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 001bbcf93c79..b6e037ee6661 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -341,8 +341,8 @@ a = TMP_ .endm ######################################################################## -## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 1420db15dcdd..2e6ebc904a3a 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -520,8 +520,8 @@ STACK_SIZE = _RSP + _RSP_SIZE .endm ######################################################################## -## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index c6c05ed2c16a..ab7d9f05ff78 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -347,8 +347,10 @@ a = TMP_ .endm ######################################################################## -## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, +## int blocks); +## arg 1 : pointer to state +## (struct sha256_state is assumed to begin with u32 state[8]) ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index d291795862f8..2fd8bdaa6604 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -40,12 +40,11 @@ #include #include -asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, - u64 rounds); -typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); +asmlinkage void sha256_transform_ssse3(struct sha256_state *state, + const u8 *data, int blocks); static int _sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha256_transform_fn *sha256_xform) + unsigned int len, sha256_block_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -53,28 +52,29 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data, (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_update(desc, data, len); - /* make sure casting to sha256_block_fn() is safe */ + /* + * Make sure struct sha256_state begins directly with the SHA256 + * 256-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); kernel_fpu_begin(); - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_xform); + sha256_base_do_update(desc, data, len, sha256_xform); kernel_fpu_end(); return 0; } static int sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) + unsigned int len, u8 *out, sha256_block_fn *sha256_xform) { if (!irq_fpu_usable()) return crypto_sha256_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_xform); - sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); + sha256_base_do_update(desc, data, len, sha256_xform); + sha256_base_do_finalize(desc, sha256_xform); kernel_fpu_end(); return sha256_base_finish(desc, out); @@ -146,8 +146,8 @@ static void unregister_sha256_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha256_transform_avx(u32 *digest, const char *data, - u64 rounds); +asmlinkage void sha256_transform_avx(struct sha256_state *state, + const u8 *data, int blocks); static int sha256_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -230,8 +230,8 @@ static inline void unregister_sha256_avx(void) { } #endif #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) -asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, - u64 rounds); +asmlinkage void sha256_transform_rorx(struct sha256_state *state, + const u8 *data, int blocks); static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -312,8 +312,8 @@ static inline void unregister_sha256_avx2(void) { } #endif #ifdef CONFIG_AS_SHA256_NI -asmlinkage void sha256_ni_transform(u32 *digest, const char *data, - u64 rounds); /*unsigned int rounds);*/ +asmlinkage void sha256_ni_transform(struct sha256_state *digest, + const u8 *data, int rounds); static int sha256_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 39235fefe6f7..8f6fe09cba54 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -271,11 +271,12 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_avx(void* D, const void* M, u64 L) -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks +# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks ######################################################################## ENTRY(sha512_transform_avx) cmp $0, msglen diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index b16d56005162..43d4d641804c 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -563,11 +563,12 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_rorx(void* D, const void* M, uint64_t L)# -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks +# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks ######################################################################## ENTRY(sha512_transform_rorx) # Allocate Stack Space diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 66bbd9058a90..46da903f5538 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -269,11 +269,14 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_ssse3(void* D, const void* M, u64 L)# -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks. +## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data, +## int blocks); +# (struct sha512_state is assumed to begin with u64 state[8]) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks. ######################################################################## ENTRY(sha512_transform_ssse3) diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 2b0e2a6825f3..1db6c4ed53cd 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -39,13 +39,11 @@ #include -asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, - u64 rounds); - -typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds); +asmlinkage void sha512_transform_ssse3(struct sha512_state *state, + const u8 *data, int blocks); static int sha512_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha512_transform_fn *sha512_xform) + unsigned int len, sha512_block_fn *sha512_xform) { struct sha512_state *sctx = shash_desc_ctx(desc); @@ -53,28 +51,29 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); - /* make sure casting to sha512_block_fn() is safe */ + /* + * Make sure struct sha512_state begins directly with the SHA512 + * 512-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); kernel_fpu_begin(); - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_xform); + sha512_base_do_update(desc, data, len, sha512_xform); kernel_fpu_end(); return 0; } static int sha512_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) + unsigned int len, u8 *out, sha512_block_fn *sha512_xform) { if (!irq_fpu_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_xform); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); + sha512_base_do_update(desc, data, len, sha512_xform); + sha512_base_do_finalize(desc, sha512_xform); kernel_fpu_end(); return sha512_base_finish(desc, out); @@ -146,8 +145,8 @@ static void unregister_sha512_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha512_transform_avx(u64 *digest, const char *data, - u64 rounds); +asmlinkage void sha512_transform_avx(struct sha512_state *state, + const u8 *data, int blocks); static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { @@ -229,8 +228,8 @@ static inline void unregister_sha512_avx(void) { } #endif #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) -asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, - u64 rounds); +asmlinkage void sha512_transform_rorx(struct sha512_state *state, + const u8 *data, int blocks); static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) -- GitLab From 2e9d1012fdd2858697981cda5cb81aa0b7933a66 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Oct 2019 11:42:11 -0700 Subject: [PATCH 1096/1278] ANDROID: BACKPORT: x86/extable: Do not mark exception callback as CFI The exception table entries are constructed out of a relative offset and point to the actual function, not the CFI table entry. For now, just mark the caller as not checking CFI. The failure is most visible at boot with CONFIG_DEBUG_RODATA_TEST=y. Bug: 145297900 Change-Id: Ia5e9da358c41b8e8a08e253926ed31775817a4e6 Signed-off-by: Kees Cook Signed-off-by: Sami Tolvanen --- arch/x86/mm/extable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 9fe656c42aa5..2b0450a3c8ab 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -170,6 +170,7 @@ bool ex_has_fault_handler(unsigned long ip) return handler == ex_handler_fault; } +__nocfi int fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *e; -- GitLab From bb941db62be35f195ede8bfdba766b93eafae27d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 Nov 2019 14:47:30 -0800 Subject: [PATCH 1097/1278] ANDROID: BACKPORT: x86, relocs: Ignore __typeid__ relocations The __typeid__* symbols aren't actually relocations, so they can be ignored during relocation generation. Bug: 145297900 Change-Id: I41d0e093b3cdc3665be1722551011da5ec3eb940 Signed-off-by: Kees Cook Signed-off-by: Sami Tolvanen --- arch/x86/tools/relocs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 220e97841e49..4334dfb24ed6 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -46,6 +46,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" "VDSO|" + "__typeid__|" "__crc_)", /* @@ -781,6 +782,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, add_reloc(&relocs32neg, offset); break; + case R_X86_64_8: + if (!shn_abs || !is_reloc(S_ABS, symname)) + die("Non-whitelisted %s relocation: %s\n", + rel_type(r_type), symname); + break; + case R_X86_64_32: case R_X86_64_32S: case R_X86_64_64: -- GitLab From 9355c9b806334dd4a552955d1c9d9499fc0fa611 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 15 Jan 2020 15:29:21 -0800 Subject: [PATCH 1098/1278] ANDROID: BACKPORT: x86, module: Ignore __typeid__ relocations Also ignore these relocations when loading modules. Bug: 145297900 Change-Id: I3d00f93deb3cf370ce25f8672f90e410e87ab79e Signed-off-by: Sami Tolvanen --- arch/x86/kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f58336af095c..fcd4d7b7d330 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -201,6 +201,10 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; #endif break; + case R_X86_64_8: + if (!strncmp(strtab + sym->st_name, "__typeid__", 10)) + break; + /* fallthrough */ default: pr_err("%s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); -- GitLab From 09ae6b7b166d4ed61fbbd35e2450605e6973f8fa Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 29 Apr 2020 17:29:12 -0700 Subject: [PATCH 1099/1278] ANDROID: x86: disable CFI for do_syscall_* x86 doesn't use syscall wrappers in 4.14, which means do_syscall_* function end up making an indirect call using a mismatching function pointer. Disable CFI to work around the type mismatch. Bug: 145297900 Change-Id: I91dd2bd94f9b5174c03cc3d3a1007061f7fe332e Signed-off-by: Sami Tolvanen --- arch/x86/entry/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 389800344f69..ba5698dca969 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -271,7 +271,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) } #ifdef CONFIG_X86_64 -__visible void do_syscall_64(struct pt_regs *regs) +__nocfi __visible void do_syscall_64(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); unsigned long nr = regs->orig_ax; @@ -305,7 +305,7 @@ __visible void do_syscall_64(struct pt_regs *regs) * extremely hot in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +static __nocfi __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); unsigned int nr = (unsigned int)regs->orig_ax; -- GitLab From 68d62c3ce509ad66a3e6a98e017938db20eec780 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 30 Apr 2020 09:40:32 -0700 Subject: [PATCH 1100/1278] ANDROID: crypto: aesni: fix function types for aesni_(enc|dec) Use a prototype that matches the function pointer type to avoid CFI failures. Bug: 145297900 Change-Id: I64ac0a4c80e499b74e6416ce6d015f1afc4f0373 Signed-off-by: Sami Tolvanen --- arch/x86/crypto/aesni-intel_glue.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c690ddc78c03..6dc0af7f2388 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -74,10 +74,8 @@ struct aesni_xts_ctx { asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); -asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); -asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); +asmlinkage void aesni_enc(void *ctx, u8 *out, const u8 *in); +asmlinkage void aesni_dec(void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len); asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, -- GitLab From f8da3c2933306d777a0011c3389fd439effe9daf Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 9 Jan 2020 13:11:05 -0800 Subject: [PATCH 1101/1278] ANDROID: x86: map CFI jump tables in pti_clone_entry_text Allow CFI enabled entry code to make indirect calls by also mapping CFI jump tables, and add a check to ensure the jump table section is not empty. Bug: 145297900 Change-Id: I1204c50a139ba62234f3bb4699c50921a831162b Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/sections.h | 1 + arch/x86/kernel/vmlinux.lds.S | 11 +++++++++++ arch/x86/mm/pti.c | 9 +++++++++ 3 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 524e979c642c..4c85f6db1e3d 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -6,6 +6,7 @@ #include extern char __brk_base[], __brk_limit[]; +extern char __cfi_jt_start[], __cfi_jt_end[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 756dcb95dbed..2e994fff9164 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -131,6 +131,13 @@ SECTIONS *(.text.__x86.indirect_thunk) __indirect_thunk_end = .; #endif + +#ifdef CONFIG_CFI_CLANG + . = ALIGN(PAGE_SIZE); + __cfi_jt_start = .; + *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) + __cfi_jt_end = .; +#endif } :text = 0x9090 NOTES :text :note @@ -412,3 +419,7 @@ INIT_PER_CPU(irq_stack_union); "kexec control code size is too big"); #endif +#ifdef CONFIG_CFI_CLANG +. = ASSERT((__cfi_jt_end - __cfi_jt_start > 0), + "CFI jump table is empty"); +#endif diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 33c6ee9aebbd..639f56dc626a 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -367,6 +367,15 @@ static void __init pti_clone_entry_text(void) pti_clone_pmds((unsigned long) __entry_text_start, (unsigned long) __irqentry_text_end, _PAGE_RW | _PAGE_GLOBAL); + + /* + * If CFI is enabled, also map jump tables, so the entry code can + * make indirect calls. + */ + if (IS_ENABLED(CONFIG_CFI_CLANG)) + pti_clone_pmds((unsigned long) __cfi_jt_start, + (unsigned long) __cfi_jt_end, + _PAGE_RW | _PAGE_GLOBAL); } /* -- GitLab From f7bd8023a51552d114d7476b754db1079039c05f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 30 Apr 2020 09:41:11 -0700 Subject: [PATCH 1102/1278] ANDROID: cuttlefish_defconfig: enable LTO and CFI Enable LTO and CFI also in x86_64_cuttlefish_defconfig. Bug: 145297900 Change-Id: Ibf24d4da73789f49fd6a37a540f6b6a9132902dd Signed-off-by: Sami Tolvanen --- arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 25689b73e594..a776ef26d4b5 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -40,6 +40,8 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_LTO_CLANG=y +CONFIG_CFI_CLANG=y CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -- GitLab From b564958e6e2afff53b95b95076cef198447ea0f0 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 29 Apr 2020 08:00:18 -0700 Subject: [PATCH 1103/1278] ANDROID: Incremental fs: Add setattr call As was, chmod would change the cached inode's mode, which would persist until the inode was uncached. Fix to change mode of backing file, but make sure mount files are read only, backing files are always writeable. Test: App no longer fails with incfs errors Bug: 154972299 Signed-off-by: Paul Lawrence Change-Id: I40517331f24329484387c6b880f1517f887b29f6 (cherry picked from commit fe4fae35fe307a15cacc5e6693a98bf5140e643b) --- fs/incfs/vfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index ec028fc96303..a2932d3a4e8f 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -71,6 +71,7 @@ static struct inode *alloc_inode(struct super_block *sb); static void free_inode(struct inode *inode); static void evict_inode(struct inode *inode); +static int incfs_setattr(struct dentry *dentry, struct iattr *ia); static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size); static ssize_t incfs_setxattr(struct dentry *d, const char *name, @@ -101,7 +102,8 @@ static const struct inode_operations incfs_dir_inode_ops = { .rename = dir_rename_wrap, .unlink = dir_unlink, .link = dir_link, - .rmdir = dir_rmdir + .rmdir = dir_rmdir, + .setattr = incfs_setattr, }; static const struct file_operations incfs_dir_fops = { @@ -161,7 +163,7 @@ static const struct file_operations incfs_log_file_ops = { }; static const struct inode_operations incfs_file_inode_ops = { - .setattr = simple_setattr, + .setattr = incfs_setattr, .getattr = simple_getattr, .listxattr = incfs_listxattr }; @@ -372,6 +374,7 @@ static int inode_set(struct inode *inode, void *opaque) inode->i_mapping->a_ops = &incfs_address_space_ops; inode->i_op = &incfs_file_inode_ops; inode->i_fop = &incfs_file_ops; + inode->i_mode &= ~0222; } else if (S_ISDIR(inode->i_mode)) { inode->i_size = 0; inode->i_blocks = 1; @@ -2032,6 +2035,45 @@ static void evict_inode(struct inode *inode) clear_inode(inode); } +static int incfs_setattr(struct dentry *dentry, struct iattr *ia) +{ + struct dentry_info *di = get_incfs_dentry(dentry); + struct dentry *backing_dentry; + struct inode *backing_inode; + int error; + + if (ia->ia_valid & ATTR_SIZE) + return -EINVAL; + + if (!di) + return -EINVAL; + backing_dentry = di->backing_path.dentry; + if (!backing_dentry) + return -EINVAL; + + backing_inode = d_inode(backing_dentry); + + /* incfs files are readonly, but the backing files must be writeable */ + if (S_ISREG(backing_inode->i_mode)) { + if ((ia->ia_valid & ATTR_MODE) && (ia->ia_mode & 0222)) + return -EINVAL; + + ia->ia_mode |= 0222; + } + + inode_lock(d_inode(backing_dentry)); + error = notify_change(backing_dentry, ia, NULL); + inode_unlock(d_inode(backing_dentry)); + + if (error) + return error; + + if (S_ISREG(backing_inode->i_mode)) + ia->ia_mode &= ~0222; + + return simple_setattr(dentry, ia); +} + static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size) { -- GitLab From 3b977e3a4e7a01786344906024621b0fd84dca71 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 30 Apr 2020 09:27:15 -0700 Subject: [PATCH 1104/1278] ANDROID: Fix kernel build regressions from virtio-gpu-next patches Fix the following: drivers/gpu/drm/virtio/virtgpu_ioctl.c:587:8: warning: Variable 'buf' is not assigned a value. [unassignedVariable] drivers/gpu/drm/virtio/virtgpu_ioctl.c:624:9: warning: Local variable 'buf' shadows outer variable [shadowVariable] drivers/gpu/drm/virtio/virtgpu_ioctl.c:678:50: warning: Either the condition 'rc_blob->cmd_size&&vfpriv' is redundant or there is possible null pointer dereference: vfpriv. [nullPointerRedundantCheck] drivers/gpu/drm/virtio/virtgpu_ioctl.c:709:2: warning: 'buf' may be used uninitialized in this function [-Wmaybe-uninitialized] drivers/gpu/drm/virtio/virtgpu_kms.c:191:9: error: implicit declaration of function 'pci_request_region'; did you mean 'pci_request_regions'? [-Werror=implicit-function-declaration] Bug: 155396530 Signed-off-by: Lingfeng Yang Change-Id: I10f4172cfa520b4cd80c3c7ec31996f1b6f2c298 --- drivers/gpu/drm/virtio/Kconfig | 2 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig index 0c384d9a2b75..46010fc78f52 100644 --- a/drivers/gpu/drm/virtio/Kconfig +++ b/drivers/gpu/drm/virtio/Kconfig @@ -1,6 +1,6 @@ config DRM_VIRTIO_GPU tristate "Virtio GPU driver" - depends on DRM && VIRTIO && MMU + depends on DRM && VIRTIO && MMU && PCI select DRM_KMS_HELPER select DRM_TTM help diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index b8daaebc5dd6..4235e142ff84 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -584,7 +584,6 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - void *buf; uint32_t device_blob_mem = 0; int ret, si, nents; uint32_t handle = 0; @@ -620,7 +619,7 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOSTSYS_GUEST; } - if (rc_blob->cmd_size && vfpriv) { + if (rc_blob->cmd_size) { void *buf; void __user *cmd = u64_to_user_ptr(rc_blob->cmd); @@ -672,7 +671,7 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, fence = virtio_gpu_fence_alloc(vgdev); if (!fence) { ret = -ENOMEM; - goto err_free_buf; + goto err_free_obj; } virtio_gpu_cmd_resource_create_blob(vgdev, obj, vfpriv->ctx_id, @@ -705,8 +704,6 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, err_fence_put: dma_fence_put(&fence->f); -err_free_buf: - kfree(buf); err_free_obj: drm_gem_object_release(&obj->gem_base); return ret; -- GitLab From f5aa5b2e5825628b684ec25729e2df19964a9a33 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2020 20:37:43 -0700 Subject: [PATCH 1105/1278] FROMGIT: f2fs: fix missing check for f2fs_unlock_op This fixes what Chao pointed. Upstream patch should be fine. Fixes: 8c77bd62d6e4 ("f2fs: fix quota_sync failure due to f2fs_lock_op") Change-Id: Ia697c8cf3897895392bbc0364d7d76f16ec9049e Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6ca827f9ca99..f5a747d8b044 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1098,7 +1098,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + if (!IS_NOQUOTA(inode)) + f2fs_unlock_op(sbi); spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) @@ -1124,7 +1125,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: - f2fs_unlock_op(sbi); + if (!IS_NOQUOTA(inode)) + f2fs_unlock_op(sbi); return -EAGAIN; } -- GitLab From fcb3929d3893b0968276fe973c9baa5c3b3ca2f5 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 6 Nov 2019 12:25:02 +0000 Subject: [PATCH 1106/1278] ext4: fix extent_status fragmentation for plain files commit 4068664e3cd2312610ceac05b74c4cf1853b8325 upstream. Extents are cached in read_extent_tree_block(); as a result, extents are not cached for inodes with depth == 0 when we try to find the extent using ext4_find_extent(). The result of the lookup is cached in ext4_map_blocks() but is only a subset of the extent on disk. As a result, the contents of extents status cache can get very badly fragmented for certain workloads, such as a random 4k read workload. File size of /mnt/test is 33554432 (8192 blocks of 4096 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 8191: 40960.. 49151: 8192: last,eof $ perf record -e 'ext4:ext4_es_*' /root/bin/fio --name=t --direct=0 --rw=randread --bs=4k --filesize=32M --size=32M --filename=/mnt/test $ perf script | grep ext4_es_insert_extent | head -n 10 fio 131 [000] 13.975421: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [494/1) mapped 41454 status W fio 131 [000] 13.975939: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6064/1) mapped 47024 status W fio 131 [000] 13.976467: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6907/1) mapped 47867 status W fio 131 [000] 13.976937: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3850/1) mapped 44810 status W fio 131 [000] 13.977440: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3292/1) mapped 44252 status W fio 131 [000] 13.977931: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6882/1) mapped 47842 status W fio 131 [000] 13.978376: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3117/1) mapped 44077 status W fio 131 [000] 13.978957: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [2896/1) mapped 43856 status W fio 131 [000] 13.979474: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [7479/1) mapped 48439 status W Fix this by caching the extents for inodes with depth == 0 in ext4_find_extent(). [ Renamed ext4_es_cache_extents() to ext4_cache_extents() since this newly added function is not in extents_cache.c, and to avoid potential visual confusion with ext4_es_cache_extent(). -TYT ] Signed-off-by: Dmitry Monakhov Link: https://lore.kernel.org/r/20191106122502.19986-1-dmonakhov@gmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fa6ae9014e8f..34b79825efdc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -510,6 +510,30 @@ int ext4_ext_check_inode(struct inode *inode) return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); } +static void ext4_cache_extents(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); + ext4_lblk_t prev = 0; + int i; + + for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { + unsigned int status = EXTENT_STATUS_WRITTEN; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); + int len = ext4_ext_get_actual_len(ex); + + if (prev && (prev != lblk)) + ext4_es_cache_extent(inode, prev, lblk - prev, ~0, + EXTENT_STATUS_HOLE); + + if (ext4_ext_is_unwritten(ex)) + status = EXTENT_STATUS_UNWRITTEN; + ext4_es_cache_extent(inode, lblk, len, + ext4_ext_pblock(ex), status); + prev = lblk + len; + } +} + static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, struct inode *inode, ext4_fsblk_t pblk, int depth, @@ -540,26 +564,7 @@ __read_extent_tree_block(const char *function, unsigned int line, */ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { struct ext4_extent_header *eh = ext_block_hdr(bh); - struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); - ext4_lblk_t prev = 0; - int i; - - for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { - unsigned int status = EXTENT_STATUS_WRITTEN; - ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); - int len = ext4_ext_get_actual_len(ex); - - if (prev && (prev != lblk)) - ext4_es_cache_extent(inode, prev, - lblk - prev, ~0, - EXTENT_STATUS_HOLE); - - if (ext4_ext_is_unwritten(ex)) - status = EXTENT_STATUS_UNWRITTEN; - ext4_es_cache_extent(inode, lblk, len, - ext4_ext_pblock(ex), status); - prev = lblk + len; - } + ext4_cache_extents(inode, eh); } return bh; errout: @@ -907,6 +912,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[0].p_bh = NULL; i = depth; + if (!(flags & EXT4_EX_NOCACHE) && depth == 0) + ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { ext_debug("depth %d: num %d, max %d\n", -- GitLab From bcf49bd648195332352717a9a537765da6c2c23c Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 8 Jan 2018 15:54:44 +0100 Subject: [PATCH 1107/1278] net: ipv4: emulate READ_ONCE() on ->hdrincl bit-field in raw_sendmsg() commit 20b50d79974ea3192e8c3ab7faf4e536e5f14d8f upstream. Commit 8f659a03a0ba ("net: ipv4: fix for a race condition in raw_sendmsg") fixed the issue of possibly inconsistent ->hdrincl handling due to concurrent updates by reading this bit-field member into a local variable and using the thus stabilized value in subsequent tests. However, aforementioned commit also adds the (correct) comment that /* hdrincl should be READ_ONCE(inet->hdrincl) * but READ_ONCE() doesn't work with bit fields */ because as it stands, the compiler is free to shortcut or even eliminate the local variable at its will. Note that I have not seen anything like this happening in reality and thus, the concern is a theoretical one. However, in order to be on the safe side, emulate a READ_ONCE() on the bit-field by doing it on the local 'hdrincl' variable itself: int hdrincl = inet->hdrincl; hdrincl = READ_ONCE(hdrincl); This breaks the chain in the sense that the compiler is not allowed to replace subsequent reads from hdrincl with reloads from inet->hdrincl. Fixes: 8f659a03a0ba ("net: ipv4: fix for a race condition in raw_sendmsg") Signed-off-by: Nicolai Stange Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- net/ipv4/raw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 53a11894f9e4..261a9813b88c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -520,9 +520,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; /* hdrincl should be READ_ONCE(inet->hdrincl) - * but READ_ONCE() doesn't work with bit fields + * but READ_ONCE() doesn't work with bit fields. + * Doing this indirectly yields the same result. */ hdrincl = inet->hdrincl; + hdrincl = READ_ONCE(hdrincl); /* * Check the flags. */ -- GitLab From 48eaa2b5d52c431769ebea561b6464271a832533 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Feb 2018 14:32:48 +0100 Subject: [PATCH 1108/1278] net: ipv4: avoid unused variable warning for sysctl commit 773daa3caf5d3f87fdb1ab43e9c1b367a38fa394 upstream. The newly introudced ip_min_valid_pmtu variable is only used when CONFIG_SYSCTL is set: net/ipv4/route.c:135:12: error: 'ip_min_valid_pmtu' defined but not used [-Werror=unused-variable] This moves it to the other variables like it, to avoid the harmless warning. Fixes: c7272c2f1229 ("net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68") Signed-off-by: Arnd Bergmann Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 05fe1d007544..3c298ec32200 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,8 +133,6 @@ static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; - /* * Interface to generic destination cache. */ @@ -2869,6 +2867,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; +static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, -- GitLab From bfebac06dd5fde6abff56316c0451753daf5e92f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Sep 2018 00:51:20 +0100 Subject: [PATCH 1109/1278] keys: Fix the use of the C++ keyword "private" in uapi/linux/keyctl.h commit 2ecefa0a15fd0ef88b9cd5d15ceb813008136431 upstream. The keyctl_dh_params struct in uapi/linux/keyctl.h contains the symbol "private" which means that the header file will cause compilation failure if #included in to a C++ program. Further, the patch that added the same struct to the keyutils package named the symbol "priv", not "private". The previous attempt to fix this (commit 8a2336e549d3) did so by simply renaming the kernel's copy of the field to dh_private, but this then breaks existing userspace and as such has been reverted (commit 8c0f9f5b309d). [And note, to those who think that wrapping the struct in extern "C" {} will work: it won't; that only changes how symbol names are presented to the assembler and linker.]. Instead, insert an anonymous union around the "private" member and add a second member in there with the name "priv" to match the one in the keyutils package. The "private" member is then wrapped in !__cplusplus cpp-conditionals to hide it from C++. Fixes: ddbb41148724 ("KEYS: Add KEYCTL_DH_COMPUTE command") Fixes: 8a2336e549d3 ("uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name") Signed-off-by: David Howells cc: Randy Dunlap cc: Lubomir Rintel cc: James Morris cc: Mat Martineau cc: Stephan Mueller cc: Andrew Morton cc: Linus Torvalds cc: stable@vger.kernel.org Signed-off-by: James Morris Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/keyctl.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 7b8c9e19bad1..0f3cb13db8e9 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -65,7 +65,12 @@ /* keyctl structures */ struct keyctl_dh_params { - __s32 private; + union { +#ifndef __cplusplus + __s32 private; +#endif + __s32 priv; + }; __s32 prime; __s32 base; }; -- GitLab From 521eb0edffb28eb0c52d519b2d54ae05e1e5289d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 4 Sep 2019 09:56:03 -0700 Subject: [PATCH 1110/1278] drm/msm: Use the correct dma_sync calls harder commit 9f614197c744002f9968e82c649fdf7fe778e1e7 upstream. Looks like the dma_sync calls don't do what we want on armv7 either. Fixes: Unable to handle kernel paging request at virtual address 50001000 pgd = (ptrval) [50001000] *pgd=00000000 Internal error: Oops: 805 [#1] SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.3.0-rc6-00271-g9f159ae07f07 #4 Hardware name: Freescale i.MX53 (Device Tree Support) PC is at v7_dma_clean_range+0x20/0x38 LR is at __dma_page_cpu_to_dev+0x28/0x90 pc : [] lr : [] psr: 20000013 sp : d80b5a88 ip : de96c000 fp : d840ce6c r10: 00000000 r9 : 00000001 r8 : d843e010 r7 : 00000000 r6 : 00008000 r5 : ddb6c000 r4 : 00000000 r3 : 0000003f r2 : 00000040 r1 : 50008000 r0 : 50001000 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 70004019 DAC: 00000051 Process swapper/0 (pid: 1, stack limit = 0x(ptrval)) Signed-off-by: Rob Clark Fixes: 3de433c5b38a ("drm/msm: Use the correct dma_sync calls in msm_gem") Tested-by: Fabio Estevam Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 300c4624aa6c..b9403851f37f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -61,7 +61,7 @@ static void sync_for_device(struct msm_gem_object *msm_obj) { struct device *dev = msm_obj->base.dev->dev; - if (get_dma_ops(dev)) { + if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, msm_obj->sgt->nents, DMA_BIDIRECTIONAL); } else { @@ -74,7 +74,7 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj) { struct device *dev = msm_obj->base.dev->dev; - if (get_dma_ops(dev)) { + if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, msm_obj->sgt->nents, DMA_BIDIRECTIONAL); } else { -- GitLab From 4ab07ea502ab75214580cd2ae4f6888d21f2dcb8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Oct 2018 01:49:48 +0000 Subject: [PATCH 1111/1278] crypto: mxs-dcp - make symbols 'sha1_null_hash' and 'sha256_null_hash' static commit ce4e45842de3eb54b8dd6e081765d741f5b92b56 upstream. Fixes the following sparse warnings: drivers/crypto/mxs-dcp.c:39:15: warning: symbol 'sha1_null_hash' was not declared. Should it be static? drivers/crypto/mxs-dcp.c:43:15: warning: symbol 'sha256_null_hash' was not declared. Should it be static? Fixes: c709eebaf5c5 ("crypto: mxs-dcp - Fix SHA null hashes and output length") Signed-off-by: Wei Yongjun Signed-off-by: Herbert Xu Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/mxs-dcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 5a4b8aee22a8..eb569cf06309 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -37,11 +37,11 @@ * Null hashes to align with hw behavior on imx6sl and ull * these are flipped for consistency with hw output */ -const uint8_t sha1_null_hash[] = +static const uint8_t sha1_null_hash[] = "\x09\x07\xd8\xaf\x90\x18\x60\x95\xef\xbf" "\x55\x32\x0d\x4b\x6b\x5e\xee\xa3\x39\xda"; -const uint8_t sha256_null_hash[] = +static const uint8_t sha256_null_hash[] = "\x55\xb8\x52\x78\x1b\x99\x95\xa4" "\x4c\x93\x9b\x64\xe4\x41\xae\x27" "\x24\xb9\x6f\x99\xc8\xf4\xfb\x9a" -- GitLab From f400768a2a73a9d439b7e2e2c76851d3b71f207c Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 19 Mar 2019 15:39:21 +0000 Subject: [PATCH 1112/1278] vti4: removed duplicate log message. commit 01ce31c57b3f07c91c9d45bbaf126124cce83a5d upstream. Removed info log-message if ipip tunnel registration fails during module-initialization: it adds nothing to the error message that is written on all failures. Fixes: dd9ee3444014e ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel") Signed-off-by: Jeremy Sowden Signed-off-by: Steffen Klassert Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_vti.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index b930ab5cf140..c1693d75e196 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -681,10 +681,8 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); - if (err < 0) { - pr_info("%s: cant't register tunnel\n",__func__); + if (err < 0) goto xfrm_tunnel_failed; - } msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); -- GitLab From deb946eeaa7f17f8fe91ed320e94411a73523454 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 12 Mar 2020 11:58:06 +0200 Subject: [PATCH 1113/1278] watchdog: reset last_hw_keepalive time at start [ Upstream commit 982bb70517aef2225bad1d802887b733db492cc0 ] Currently the watchdog core does not initialize the last_hw_keepalive time during watchdog startup. This will cause the watchdog to be pinged immediately if enough time has passed from the system boot-up time, and some types of watchdogs like K3 RTI does not like this. To avoid the issue, setup the last_hw_keepalive time during watchdog startup. Signed-off-by: Tero Kristo Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200302200426.6492-3-t-kristo@ti.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/watchdog_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index b30fb637ae94..52e03f1c76e3 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -245,6 +245,7 @@ static int watchdog_start(struct watchdog_device *wdd) if (err == 0) { set_bit(WDOG_ACTIVE, &wdd->status); wd_data->last_keepalive = started_at; + wd_data->last_hw_keepalive = started_at; watchdog_update_worker(wdd); } -- GitLab From b9808cb4b2cc22ae3506509e27cc26c036b736f1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:53 -0700 Subject: [PATCH 1114/1278] scsi: lpfc: Fix kasan slab-out-of-bounds error in lpfc_unreg_login [ Upstream commit 38503943c89f0bafd9e3742f63f872301d44cbea ] The following kasan bug was called out: BUG: KASAN: slab-out-of-bounds in lpfc_unreg_login+0x7c/0xc0 [lpfc] Read of size 2 at addr ffff889fc7c50a22 by task lpfc_worker_3/6676 ... Call Trace: dump_stack+0x96/0xe0 ? lpfc_unreg_login+0x7c/0xc0 [lpfc] print_address_description.constprop.6+0x1b/0x220 ? lpfc_unreg_login+0x7c/0xc0 [lpfc] ? lpfc_unreg_login+0x7c/0xc0 [lpfc] __kasan_report.cold.9+0x37/0x7c ? lpfc_unreg_login+0x7c/0xc0 [lpfc] kasan_report+0xe/0x20 lpfc_unreg_login+0x7c/0xc0 [lpfc] lpfc_sli_def_mbox_cmpl+0x334/0x430 [lpfc] ... When processing the completion of a "Reg Rpi" login mailbox command in lpfc_sli_def_mbox_cmpl, a call may be made to lpfc_unreg_login. The vpi is extracted from the completing mailbox context and passed as an input for the next. However, the vpi stored in the mailbox command context is an absolute vpi, which for SLI4 represents both base + offset. When used with a non-zero base component, (function id > 0) this results in an out-of-range access beyond the allocated phba->vpi_ids array. Fix by subtracting the function's base value to get an accurate vpi number. Link: https://lore.kernel.org/r/20200322181304.37655-2-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_sli.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d8e0ba68879c..480d2d467f7a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2271,6 +2271,8 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) !pmb->u.mb.mbxStatus) { rpi = pmb->u.mb.un.varWords[0]; vpi = pmb->u.mb.un.varRegLogin.vpi; + if (phba->sli_rev == LPFC_SLI_REV4) + vpi -= phba->sli4_hba.max_cfg_param.vpi_base; lpfc_unreg_login(phba, vpi, rpi, pmb); pmb->vport = vport; pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; -- GitLab From 7435d3c34bd5e5b0a796211b05a947c32ab24127 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 6 Mar 2020 09:34:20 +0800 Subject: [PATCH 1115/1278] ceph: return ceph_mdsc_do_request() errors from __get_parent() [ Upstream commit c6d50296032f0b97473eb2e274dc7cc5d0173847 ] Return the error returned by ceph_mdsc_do_request(). Otherwise, r_target_inode ends up being NULL this ends up returning ENOENT regardless of the error. Signed-off-by: Qiujun Huang Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/export.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 3c59ad180ef0..4cfe1154d4c7 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -151,6 +151,11 @@ static struct dentry *__get_parent(struct super_block *sb, req->r_num_caps = 1; err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err) { + ceph_mdsc_put_request(req); + return ERR_PTR(err); + } + inode = req->r_target_inode; if (inode) ihold(inode); -- GitLab From 968b1dd03e74563d05b78f8b30227a521c982cce Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 10 Mar 2020 19:34:20 +0800 Subject: [PATCH 1116/1278] ceph: don't skip updating wanted caps when cap is stale [ Upstream commit 0aa971b6fd3f92afef6afe24ef78d9bb14471519 ] 1. try_get_cap_refs() fails to get caps and finds that mds_wanted does not include what it wants. It returns -ESTALE. 2. ceph_get_caps() calls ceph_renew_caps(). ceph_renew_caps() finds that inode has cap, so it calls ceph_check_caps(). 3. ceph_check_caps() finds that issued caps (without checking if it's stale) already includes caps wanted by open file, so it skips updating wanted caps. Above events can cause an infinite loop inside ceph_get_caps(). Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c3a3ee74e2d8..1b5a50848b5b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1863,8 +1863,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, } /* want more caps from mds? */ - if (want & ~(cap->mds_wanted | cap->issued)) - goto ack; + if (want & ~cap->mds_wanted) { + if (want & ~(cap->mds_wanted | cap->issued)) + goto ack; + if (!__cap_is_valid(cap)) + goto ack; + } /* things we might delay */ if ((cap->issued & ~retain) == 0 && -- GitLab From 54cf27c79d50646481b4291ebef620d05da29405 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 16 Mar 2020 11:32:14 +0100 Subject: [PATCH 1117/1278] pwm: rcar: Fix late Runtime PM enablement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1451a3eed24b5fd6a604683f0b6995e0e7e16c79 ] Runtime PM should be enabled before calling pwmchip_add(), as PWM users can appear immediately after the PWM chip has been added. Likewise, Runtime PM should be disabled after the removal of the PWM chip. Fixes: ed6c1476bf7f16d5 ("pwm: Add support for R-Car PWM Timer") Signed-off-by: Geert Uytterhoeven Reviewed-by: Uwe Kleine-König Reviewed-by: Laurent Pinchart Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-rcar.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 0fcf94ffad32..c298bec25a90 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -236,24 +236,28 @@ static int rcar_pwm_probe(struct platform_device *pdev) rcar_pwm->chip.base = -1; rcar_pwm->chip.npwm = 1; + pm_runtime_enable(&pdev->dev); + ret = pwmchip_add(&rcar_pwm->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM chip: %d\n", ret); + pm_runtime_disable(&pdev->dev); return ret; } - pm_runtime_enable(&pdev->dev); - return 0; } static int rcar_pwm_remove(struct platform_device *pdev) { struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&rcar_pwm->chip); pm_runtime_disable(&pdev->dev); - return pwmchip_remove(&rcar_pwm->chip); + return ret; } static const struct of_device_id rcar_pwm_of_table[] = { -- GitLab From 304a02096a3ab59508a5ae0b078f06da215e4869 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 24 Mar 2020 15:58:50 +0800 Subject: [PATCH 1118/1278] scsi: iscsi: Report unbind session event when the target has been removed [ Upstream commit 13e60d3ba287d96eeaf1deaadba51f71578119a3 ] If the daemon is restarted or crashes while logging out of a session, the unbind session event sent by the kernel is not processed and is lost. When the daemon starts again, the session can't be unbound because the daemon is waiting for the event message. However, the kernel has already logged out and the event will not be resent. When iscsid restart is complete, logout session reports error: Logging out of session [sid: 6, target: iqn.xxxxx, portal: xx.xx.xx.xx,3260] iscsiadm: Could not logout of [sid: 6, target: iscsiadm -m node iqn.xxxxx, portal: xx.xx.xx.xx,3260]. iscsiadm: initiator reported error (9 - internal error) iscsiadm: Could not logout of all requested sessions Make sure the unbind event is emitted. [mkp: commit desc and applied by hand since patch was mangled] Link: https://lore.kernel.org/r/4eab1771-2cb3-8e79-b31c-923652340e99@huawei.com Reviewed-by: Lee Duncan Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index aecb563a2b4e..958901523469 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2010,7 +2010,7 @@ static void __iscsi_unbind_session(struct work_struct *work) if (session->target_id == ISCSI_MAX_TARGET) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - return; + goto unbind_session_exit; } target_id = session->target_id; @@ -2022,6 +2022,8 @@ static void __iscsi_unbind_session(struct work_struct *work) ida_simple_remove(&iscsi_sess_ida, target_id); scsi_remove_target(&session->dev); + +unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); } -- GitLab From 7e06e9ec81b953d4bb9f03f8a5eafbbd69a0456d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 20:53:57 +0200 Subject: [PATCH 1119/1278] ASoC: Intel: atom: Take the drv->lock mutex before calling sst_send_slot_map() [ Upstream commit 81630dc042af998b9f58cd8e2c29dab9777ea176 ] sst_send_slot_map() uses sst_fill_and_send_cmd_unlocked() because in some places it is called with the drv->lock mutex already held. So it must always be called with the mutex locked. This commit adds missing locking in the sst_set_be_modules() code-path. Fixes: 24c8d14192cc ("ASoC: Intel: mrfld: add DSP core controls") Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200402185359.3424-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/atom/sst-atom-controls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/atom/sst-atom-controls.c b/sound/soc/intel/atom/sst-atom-controls.c index 6044b3bbb121..999eb3ba7867 100644 --- a/sound/soc/intel/atom/sst-atom-controls.c +++ b/sound/soc/intel/atom/sst-atom-controls.c @@ -974,7 +974,9 @@ static int sst_set_be_modules(struct snd_soc_dapm_widget *w, dev_dbg(c->dev, "Enter: widget=%s\n", w->name); if (SND_SOC_DAPM_EVENT_ON(event)) { + mutex_lock(&drv->lock); ret = sst_send_slot_map(drv); + mutex_unlock(&drv->lock); if (ret) return ret; ret = sst_send_pipe_module_params(w, k); -- GitLab From 1a461aa9842bb737efe2ebc3e89b2a3dc2818c0e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:10 -0700 Subject: [PATCH 1120/1278] kernel/gcov/fs.c: gcov_seq_next() should increase position index [ Upstream commit f4d74ef6220c1eda0875da30457bef5c7111ab06 ] If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Peter Oberparleiter Cc: Al Viro Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: NeilBrown Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/f65c6ee7-bd00-f910-2f8a-37cc67e4ff88@virtuozzo.com Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/gcov/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 6e40ff6be083..291e0797125b 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -109,9 +109,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) { struct gcov_iterator *iter = data; + (*pos)++; if (gcov_iter_next(iter)) return NULL; - (*pos)++; return iter; } -- GitLab From 1058a30206ebe17df1174dd08363122e552dd924 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:53 -0700 Subject: [PATCH 1121/1278] selftests: kmod: fix handling test numbers above 9 [ Upstream commit 6d573a07528308eb77ec072c010819c359bebf6e ] get_test_count() and get_test_enabled() were broken for test numbers above 9 due to awk interpreting a field specification like '$0010' as octal rather than decimal. Fix it by stripping the leading zeroes. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200318230515.171692-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- tools/testing/selftests/kmod/kmod.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 7956ea3be667..eed5d5b81226 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -502,18 +502,23 @@ function test_num() fi } -function get_test_count() +function get_test_data() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + local field_num=$(echo $1 | sed 's/^0*//') + echo $ALL_TESTS | awk '{print $'$field_num'}' +} + +function get_test_count() +{ + TEST_DATA=$(get_test_data $1) LAST_TWO=${TEST_DATA#*:*} echo ${LAST_TWO%:*} } function get_test_enabled() { - test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + TEST_DATA=$(get_test_data $1) echo ${TEST_DATA#*:*:} } -- GitLab From 73240ee761ff17688cc5de7cebe8dba0057b7d1b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:13 -0700 Subject: [PATCH 1122/1278] ipc/util.c: sysvipc_find_ipc() should increase position index [ Upstream commit 89163f93c6f969da5811af5377cc10173583123b ] If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Waiman Long Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Al Viro Cc: Ingo Molnar Cc: NeilBrown Cc: Peter Oberparleiter Cc: Steven Rostedt Link: http://lkml.kernel.org/r/b7a20945-e315-8bb0-21e6-3875c14a8494@virtuozzo.com Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- ipc/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/util.c b/ipc/util.c index 79b30eee32cd..7989f5e53219 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -750,13 +750,13 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } + *new_pos = pos + 1; if (total >= ids->in_use) return NULL; for (; pos < IPCMNI; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { - *new_pos = pos + 1; rcu_read_lock(); ipc_lock_object(ipc); return ipc; -- GitLab From 5f5000339321cb469bd68958b6e6dd30c4e2d0ee Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 27 Mar 2020 13:45:02 +0100 Subject: [PATCH 1123/1278] s390/cio: avoid duplicated 'ADD' uevents [ Upstream commit 05ce3e53f375295c2940390b2b429e506e07655c ] The common I/O layer delays the ADD uevent for subchannels and delegates generating this uevent to the individual subchannel drivers. The io_subchannel driver will do so when the associated ccw_device has been registered -- but unconditionally, so more ADD uevents will be generated if a subchannel has been unbound from the io_subchannel driver and later rebound. To fix this, only generate the ADD event if uevents were still suppressed for the device. Fixes: fa1a8c23eb7d ("s390: cio: Delay uevents for subchannels") Message-Id: <20200327124503.9794-2-cohuck@redhat.com> Reported-by: Boris Fiuczynski Reviewed-by: Peter Oberparleiter Reviewed-by: Boris Fiuczynski Signed-off-by: Cornelia Huck Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- drivers/s390/cio/device.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e5c32f4b5287..d2203cd17813 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -828,8 +828,10 @@ static void io_subchannel_register(struct ccw_device *cdev) * Now we know this subchannel will stay, we can throw * our delayed uevent. */ - dev_set_uevent_suppress(&sch->dev, 0); - kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + if (dev_get_uevent_suppress(&sch->dev)) { + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } /* make it known to the system */ ret = ccw_device_add(cdev); if (ret) { @@ -1037,8 +1039,11 @@ static int io_subchannel_probe(struct subchannel *sch) * Throw the delayed uevent for the subchannel, register * the ccw_device and exit. */ - dev_set_uevent_suppress(&sch->dev, 0); - kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + if (dev_get_uevent_suppress(&sch->dev)) { + /* should always be the case for the console */ + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } cdev = sch_get_cdev(sch); rc = ccw_device_add(cdev); if (rc) { -- GitLab From 41fc491dfc3d69e79a5b82ef82843c900351ba76 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 16 Mar 2020 11:32:15 +0100 Subject: [PATCH 1124/1278] pwm: renesas-tpu: Fix late Runtime PM enablement [ Upstream commit d5a3c7a4536e1329a758e14340efd0e65252bd3d ] Runtime PM should be enabled before calling pwmchip_add(), as PWM users can appear immediately after the PWM chip has been added. Likewise, Runtime PM should always be disabled after the removal of the PWM chip, even if the latter failed. Fixes: 99b82abb0a35b073 ("pwm: Add Renesas TPU PWM driver") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-renesas-tpu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index 29267d12fb4c..9c7962f2f0aa 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -423,16 +423,17 @@ static int tpu_probe(struct platform_device *pdev) tpu->chip.base = -1; tpu->chip.npwm = TPU_CHANNEL_MAX; + pm_runtime_enable(&pdev->dev); + ret = pwmchip_add(&tpu->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM chip\n"); + pm_runtime_disable(&pdev->dev); return ret; } dev_info(&pdev->dev, "TPU PWM %d registered\n", tpu->pdev->id); - pm_runtime_enable(&pdev->dev); - return 0; } @@ -442,12 +443,10 @@ static int tpu_remove(struct platform_device *pdev) int ret; ret = pwmchip_remove(&tpu->chip); - if (ret) - return ret; pm_runtime_disable(&pdev->dev); - return 0; + return ret; } #ifdef CONFIG_OF -- GitLab From 8d42db7f280dd1650c831dee519f9c150c7bb66d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 3 Feb 2020 13:35:35 -0800 Subject: [PATCH 1125/1278] pwm: bcm2835: Dynamically allocate base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2c25b07e5ec119cab609e41407a1fb3fa61442f5 ] The newer 2711 and 7211 chips have two PWM controllers and failure to dynamically allocate the PWM base would prevent the second PWM controller instance being probed for succeeding with an -EEXIST error from alloc_pwms(). Fixes: e5a06dc5ac1f ("pwm: Add BCM2835 PWM driver") Signed-off-by: Florian Fainelli Acked-by: Uwe Kleine-König Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-bcm2835.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index db001cba937f..e340ad79a1ec 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -166,6 +166,7 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) pc->chip.dev = &pdev->dev; pc->chip.ops = &bcm2835_pwm_ops; + pc->chip.base = -1; pc->chip.npwm = 2; pc->chip.of_xlate = of_pwm_xlate_with_flags; pc->chip.of_pwm_n_cells = 3; -- GitLab From d6f70697d02b5cfef26e531ff7a3dd1f3b0dd87d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Apr 2020 16:14:27 +0200 Subject: [PATCH 1126/1278] perf/core: Disable page faults when getting phys address [ Upstream commit d3296fb372bf7497b0e5d0478c4e7a677ec6f6e9 ] We hit following warning when running tests on kernel compiled with CONFIG_DEBUG_ATOMIC_SLEEP=y: WARNING: CPU: 19 PID: 4472 at mm/gup.c:2381 __get_user_pages_fast+0x1a4/0x200 CPU: 19 PID: 4472 Comm: dummy Not tainted 5.6.0-rc6+ #3 RIP: 0010:__get_user_pages_fast+0x1a4/0x200 ... Call Trace: perf_prepare_sample+0xff1/0x1d90 perf_event_output_forward+0xe8/0x210 __perf_event_overflow+0x11a/0x310 __intel_pmu_pebs_event+0x657/0x850 intel_pmu_drain_pebs_nhm+0x7de/0x11d0 handle_pmi_common+0x1b2/0x650 intel_pmu_handle_irq+0x17b/0x370 perf_event_nmi_handler+0x40/0x60 nmi_handle+0x192/0x590 default_do_nmi+0x6d/0x150 do_nmi+0x2f9/0x3c0 nmi+0x8e/0xd7 While __get_user_pages_fast() is IRQ-safe, it calls access_ok(), which warns on: WARN_ON_ONCE(!in_task() && !pagefault_disabled()) Peter suggested disabling page faults around __get_user_pages_fast(), which gets rid of the warning in access_ok() call. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200407141427.3184722-1-jolsa@kernel.org Signed-off-by: Sasha Levin --- kernel/events/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 845c8a1a9d30..c16ce11049de 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6119,9 +6119,12 @@ static u64 perf_virt_to_phys(u64 virt) * Try IRQ-safe __get_user_pages_fast first. * If failed, leave phys_addr as 0. */ - if ((current->mm != NULL) && - (__get_user_pages_fast(virt, 1, 0, &p) == 1)) - phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + if (current->mm != NULL) { + pagefault_disable(); + if (__get_user_pages_fast(virt, 1, 0, &p) == 1) + phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + pagefault_enable(); + } if (p) put_page(p); -- GitLab From bf2b525b1831cc8e73ed1a2f5e44919bb4a556c0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 5 Oct 2019 14:03:57 +0200 Subject: [PATCH 1127/1278] PCI/ASPM: Allow re-enabling Clock PM [ Upstream commit 35efea32b26f9aacc99bf07e0d2cdfba2028b099 ] Previously Clock PM could not be re-enabled after being disabled by pci_disable_link_state() because clkpm_capable was reset. Change this by adding a clkpm_disable field similar to aspm_disable. Link: https://lore.kernel.org/r/4e8a66db-7d53-4a66-c26c-f0037ffaa705@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 6f58767b5190..400031622b76 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -80,6 +80,7 @@ struct pcie_link_state { u32 clkpm_capable:1; /* Clock PM capable? */ u32 clkpm_enabled:1; /* Current Clock PM state */ u32 clkpm_default:1; /* Default Clock PM state by BIOS */ + u32 clkpm_disable:1; /* Clock PM disabled */ /* Exit latencies */ struct aspm_latency latency_up; /* Upstream direction exit latency */ @@ -177,8 +178,11 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) static void pcie_set_clkpm(struct pcie_link_state *link, int enable) { - /* Don't enable Clock PM if the link is not Clock PM capable */ - if (!link->clkpm_capable) + /* + * Don't enable Clock PM if the link is not Clock PM capable + * or Clock PM is disabled + */ + if (!link->clkpm_capable || link->clkpm_disable) enable = 0; /* Need nothing if the specified equals to current state */ if (link->clkpm_enabled == enable) @@ -208,7 +212,8 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) } link->clkpm_enabled = enabled; link->clkpm_default = enabled; - link->clkpm_capable = (blacklist) ? 0 : capable; + link->clkpm_capable = capable; + link->clkpm_disable = blacklist ? 1 : 0; } static bool pcie_retrain_link(struct pcie_link_state *link) @@ -1052,10 +1057,9 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) link->aspm_disable |= ASPM_STATE_L1; pcie_config_aspm_link(link, policy_to_aspm_state(link)); - if (state & PCIE_LINK_STATE_CLKPM) { - link->clkpm_capable = 0; - pcie_set_clkpm(link, 0); - } + if (state & PCIE_LINK_STATE_CLKPM) + link->clkpm_disable = 1; + pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); if (sem) up_read(&pci_bus_sem); -- GitLab From 69e4bfba095a3a03015c35f6fbdfaafff5f8d26b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sun, 26 Apr 2020 09:06:17 +0200 Subject: [PATCH 1128/1278] mm, slub: restore the original intention of prefetch_freepointer() commit 0882ff9190e3bc51e2d78c3aadd7c690eeaa91d5 upstream. In SLUB, prefetch_freepointer() is used when allocating an object from cache's freelist, to make sure the next object in the list is cache-hot, since it's probable it will be allocated soon. Commit 2482ddec670f ("mm: add SLUB free list pointer obfuscation") has unintentionally changed the prefetch in a way where the prefetch is turned to a real fetch, and only the next->next pointer is prefetched. In case there is not a stream of allocations that would benefit from prefetching, the extra real fetch might add a useless cache miss to the allocation. Restore the previous behavior. Link: http://lkml.kernel.org/r/20180809085245.22448-1-vbabka@suse.cz Fixes: 2482ddec670f ("mm: add SLUB free list pointer obfuscation") Signed-off-by: Vlastimil Babka Acked-by: Kees Cook Cc: Daniel Micay Cc: Eric Dumazet Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Matthias Schiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sven Eckelmann Signed-off-by: Sasha Levin --- mm/slub.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 3c1a16f03b2b..481518c3f61a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -269,8 +269,7 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) static void prefetch_freepointer(const struct kmem_cache *s, void *object) { - if (object) - prefetch(freelist_dereference(s, object + s->offset)); + prefetch(object + s->offset); } static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) -- GitLab From e62f0f18405006eae5b362c6b3182cda1d1b3950 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Mon, 20 Apr 2020 15:26:54 +0530 Subject: [PATCH 1129/1278] cxgb4: fix large delays in PTP synchronization [ Upstream commit bd019427bf3623ee3c7d2845cf921bbf4c14846c ] Fetching PTP sync information from mailbox is slow and can take up to 10 milliseconds. Reduce this unnecessary delay by directly reading the information from the corresponding registers. Fixes: 9c33e4208bce ("cxgb4: Add PTP Hardware Clock (PHC) support") Signed-off-by: Manoj Malviya Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 27 +++++-------------- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 3 +++ 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index 758f2b836328..ff7e58a8c90f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -311,32 +311,17 @@ static int cxgb4_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) */ static int cxgb4_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { - struct adapter *adapter = (struct adapter *)container_of(ptp, - struct adapter, ptp_clock_info); - struct fw_ptp_cmd c; + struct adapter *adapter = container_of(ptp, struct adapter, + ptp_clock_info); u64 ns; - int err; - - memset(&c, 0, sizeof(c)); - c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) | - FW_CMD_REQUEST_F | - FW_CMD_READ_F | - FW_PTP_CMD_PORTID_V(0)); - c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); - c.u.ts.sc = FW_PTP_SC_GET_TIME; - err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), &c); - if (err < 0) { - dev_err(adapter->pdev_dev, - "PTP: %s error %d\n", __func__, -err); - return err; - } + ns = t4_read_reg(adapter, T5_PORT_REG(0, MAC_PORT_PTP_SUM_LO_A)); + ns |= (u64)t4_read_reg(adapter, + T5_PORT_REG(0, MAC_PORT_PTP_SUM_HI_A)) << 32; /* convert to timespec*/ - ns = be64_to_cpu(c.u.ts.tm); *ts = ns_to_timespec64(ns); - - return err; + return 0; } /** diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index dac90837842b..d3df6962cf43 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1810,6 +1810,9 @@ #define MAC_PORT_CFG2_A 0x818 +#define MAC_PORT_PTP_SUM_LO_A 0x990 +#define MAC_PORT_PTP_SUM_HI_A 0x994 + #define MPS_CMN_CTL_A 0x9000 #define COUNTPAUSEMCRX_S 5 -- GitLab From e7dff38799ee7f3e867a880b26697b03d3b74744 Mon Sep 17 00:00:00 2001 From: John Haxby Date: Sat, 18 Apr 2020 16:30:49 +0100 Subject: [PATCH 1130/1278] ipv6: fix restrict IPV6_ADDRFORM operation [ Upstream commit 82c9ae440857840c56e05d4fb1427ee032531346 ] Commit b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation") fixed a problem found by syzbot an unfortunate logic error meant that it also broke IPV6_ADDRFORM. Rearrange the checks so that the earlier test is just one of the series of checks made before moving the socket from IPv6 to IPv4. Fixes: b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation") Signed-off-by: John Haxby Cc: stable@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ipv6_sockglue.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8c492471b0da..337b43d4c3eb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -185,15 +185,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol == IPPROTO_TCP) { - if (sk->sk_prot != &tcpv6_prot) { - retv = -EBUSY; - break; - } - break; - } else { + } + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; break; } + if (sk->sk_protocol != IPPROTO_TCP) + break; if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; -- GitLab From 484ed10c38b13027e14cd7a7fc313d7367025609 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 23 Apr 2020 13:40:47 +0000 Subject: [PATCH 1131/1278] macsec: avoid to set wrong mtu [ Upstream commit 7f327080364abccf923fa5a5b24e038eb0ba1407 ] When a macsec interface is created, the mtu is calculated with the lower interface's mtu value. If the mtu of lower interface is lower than the length, which is needed by macsec interface, macsec's mtu value will be overflowed. So, if the lower interface's mtu is too low, macsec interface's mtu should be set to 0. Test commands: ip link add dummy0 mtu 10 type dummy ip link add macsec0 link dummy0 type macsec ip link show macsec0 Before: 11: macsec0@dummy0: mtu 4294967274 After: 11: macsec0@dummy0: mtu 0 Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 5959e8817a1b..926e2eb528fd 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3209,11 +3209,11 @@ static int macsec_newlink(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); + rx_handler_func_t *rx_handler; + u8 icv_len = DEFAULT_ICV_LEN; struct net_device *real_dev; - int err; + int err, mtu; sci_t sci; - u8 icv_len = DEFAULT_ICV_LEN; - rx_handler_func_t *rx_handler; if (!tb[IFLA_LINK]) return -EINVAL; @@ -3229,7 +3229,11 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); - dev->mtu = real_dev->mtu - icv_len - macsec_extra_len(true); + mtu = real_dev->mtu - icv_len - macsec_extra_len(true); + if (mtu < 0) + dev->mtu = 0; + else + dev->mtu = mtu; rx_handler = rtnl_dereference(real_dev->rx_handler); if (rx_handler && rx_handler != macsec_handle_frame) -- GitLab From 15d8e92bd3df4e864c5bee9a3b97c6134982be84 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 20 Apr 2020 13:29:40 +0000 Subject: [PATCH 1132/1278] macvlan: fix null dereference in macvlan_device_event() [ Upstream commit 4dee15b4fd0d61ec6bbd179238191e959d34cf7a ] In the macvlan_device_event(), the list_first_entry_or_null() is used. This function could return null pointer if there is no node. But, the macvlan module doesn't check the null pointer. So, null-ptr-deref would occur. bond0 | +----+-----+ | | macvlan0 macvlan1 | | dummy0 dummy1 The problem scenario. If dummy1 is removed, 1. ->dellink() of dummy1 is called. 2. NETDEV_UNREGISTER of dummy1 notification is sent to macvlan module. 3. ->dellink() of macvlan1 is called. 4. NETDEV_UNREGISTER of macvlan1 notification is sent to bond module. 5. __bond_release_one() is called and it internally calls dev_set_mac_address(). 6. dev_set_mac_address() calls the ->ndo_set_mac_address() of macvlan1, which is macvlan_set_mac_address(). 7. macvlan_set_mac_address() calls the dev_set_mac_address() with dummy1. 8. NETDEV_CHANGEADDR of dummy1 is sent to macvlan module. 9. In the macvlan_device_event(), it calls list_first_entry_or_null(). At this point, dummy1 and macvlan1 were removed. So, list_first_entry_or_null() will return NULL. Test commands: ip netns add nst ip netns exec nst ip link add bond0 type bond for i in {0..10} do ip netns exec nst ip link add dummy$i type dummy ip netns exec nst ip link add macvlan$i link dummy$i \ type macvlan mode passthru ip netns exec nst ip link set macvlan$i master bond0 done ip netns del nst Splat looks like: [ 40.585687][ T146] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP DEI [ 40.587249][ T146] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 40.588342][ T146] CPU: 1 PID: 146 Comm: kworker/u8:2 Not tainted 5.7.0-rc1+ #532 [ 40.589299][ T146] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 40.590469][ T146] Workqueue: netns cleanup_net [ 40.591045][ T146] RIP: 0010:macvlan_device_event+0x4e2/0x900 [macvlan] [ 40.591905][ T146] Code: 00 00 00 00 00 fc ff df 80 3c 06 00 0f 85 45 02 00 00 48 89 da 48 b8 00 00 00 00 00 fc ff d2 [ 40.594126][ T146] RSP: 0018:ffff88806116f4a0 EFLAGS: 00010246 [ 40.594783][ T146] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 40.595653][ T146] RDX: 0000000000000000 RSI: ffff88806547ddd8 RDI: ffff8880540f1360 [ 40.596495][ T146] RBP: ffff88804011a808 R08: fffffbfff4fb8421 R09: fffffbfff4fb8421 [ 40.597377][ T146] R10: ffffffffa7dc2107 R11: 0000000000000000 R12: 0000000000000008 [ 40.598186][ T146] R13: ffff88804011a000 R14: ffff8880540f1000 R15: 1ffff1100c22de9a [ 40.599012][ T146] FS: 0000000000000000(0000) GS:ffff888067800000(0000) knlGS:0000000000000000 [ 40.600004][ T146] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 40.600665][ T146] CR2: 00005572d3a807b8 CR3: 000000005fcf4003 CR4: 00000000000606e0 [ 40.601485][ T146] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 40.602461][ T146] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 40.603443][ T146] Call Trace: [ 40.603871][ T146] ? nf_tables_dump_setelem+0xa0/0xa0 [nf_tables] [ 40.604587][ T146] ? macvlan_uninit+0x100/0x100 [macvlan] [ 40.605212][ T146] ? __module_text_address+0x13/0x140 [ 40.605842][ T146] notifier_call_chain+0x90/0x160 [ 40.606477][ T146] dev_set_mac_address+0x28e/0x3f0 [ 40.607117][ T146] ? netdev_notify_peers+0xc0/0xc0 [ 40.607762][ T146] ? __module_text_address+0x13/0x140 [ 40.608440][ T146] ? notifier_call_chain+0x90/0x160 [ 40.609097][ T146] ? dev_set_mac_address+0x1f0/0x3f0 [ 40.609758][ T146] dev_set_mac_address+0x1f0/0x3f0 [ 40.610402][ T146] ? __local_bh_enable_ip+0xe9/0x1b0 [ 40.611071][ T146] ? bond_hw_addr_flush+0x77/0x100 [bonding] [ 40.611823][ T146] ? netdev_notify_peers+0xc0/0xc0 [ 40.612461][ T146] ? bond_hw_addr_flush+0x77/0x100 [bonding] [ 40.613213][ T146] ? bond_hw_addr_flush+0x77/0x100 [bonding] [ 40.613963][ T146] ? __local_bh_enable_ip+0xe9/0x1b0 [ 40.614631][ T146] ? bond_time_in_interval.isra.31+0x90/0x90 [bonding] [ 40.615484][ T146] ? __bond_release_one+0x9f0/0x12c0 [bonding] [ 40.616230][ T146] __bond_release_one+0x9f0/0x12c0 [bonding] [ 40.616949][ T146] ? bond_enslave+0x47c0/0x47c0 [bonding] [ 40.617642][ T146] ? lock_downgrade+0x730/0x730 [ 40.618218][ T146] ? check_flags.part.42+0x450/0x450 [ 40.618850][ T146] ? __mutex_unlock_slowpath+0xd0/0x670 [ 40.619519][ T146] ? trace_hardirqs_on+0x30/0x180 [ 40.620117][ T146] ? wait_for_completion+0x250/0x250 [ 40.620754][ T146] bond_netdev_event+0x822/0x970 [bonding] [ 40.621460][ T146] ? __module_text_address+0x13/0x140 [ 40.622097][ T146] notifier_call_chain+0x90/0x160 [ 40.622806][ T146] rollback_registered_many+0x660/0xcf0 [ 40.623522][ T146] ? netif_set_real_num_tx_queues+0x780/0x780 [ 40.624290][ T146] ? notifier_call_chain+0x90/0x160 [ 40.624957][ T146] ? netdev_upper_dev_unlink+0x114/0x180 [ 40.625686][ T146] ? __netdev_adjacent_dev_unlink_neighbour+0x30/0x30 [ 40.626421][ T146] ? mutex_is_locked+0x13/0x50 [ 40.627016][ T146] ? unregister_netdevice_queue+0xf2/0x240 [ 40.627663][ T146] unregister_netdevice_many.part.134+0x13/0x1b0 [ 40.628362][ T146] default_device_exit_batch+0x2d9/0x390 [ 40.628987][ T146] ? unregister_netdevice_many+0x40/0x40 [ 40.629615][ T146] ? dev_change_net_namespace+0xcb0/0xcb0 [ 40.630279][ T146] ? prepare_to_wait_exclusive+0x2e0/0x2e0 [ 40.630943][ T146] ? ops_exit_list.isra.9+0x97/0x140 [ 40.631554][ T146] cleanup_net+0x441/0x890 [ ... ] Fixes: e289fd28176b ("macvlan: fix the problem when mac address changes for passthru mode") Reported-by: syzbot+5035b1f9dc7ea4558d5a@syzkaller.appspotmail.com Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6989e84670e5..3072fc902eca 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1673,7 +1673,7 @@ static int macvlan_device_event(struct notifier_block *unused, struct macvlan_dev, list); - if (macvlan_sync_address(vlan->dev, dev->dev_addr)) + if (vlan && macvlan_sync_address(vlan->dev, dev->dev_addr)) return NOTIFY_BAD; break; -- GitLab From 0388465ab6f945cab1d35eef4a83cc5823a82829 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 23 Apr 2020 15:44:17 -0700 Subject: [PATCH 1133/1278] net: bcmgenet: correct per TX/RX ring statistics [ Upstream commit a6d0b83f25073bdf08b8547aeff961a62c6ab229 ] The change to track net_device_stats per ring to better support SMP missed updating the rx_dropped member. The ndo_get_stats method is also needed to combine the results for ethtool statistics (-S) before filling in the ethtool structure. Fixes: 37a30b435b92 ("net: bcmgenet: Track per TX/RX rings statistics") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 3e3044fe3206..4b3660c63b86 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -973,6 +973,8 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, if (netif_running(dev)) bcmgenet_update_mib_counters(priv); + dev->netdev_ops->ndo_get_stats(dev); + for (i = 0; i < BCMGENET_STATS_LEN; i++) { const struct bcmgenet_stats *s; char *p; @@ -3215,6 +3217,7 @@ static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev) dev->stats.rx_packets = rx_packets; dev->stats.rx_errors = rx_errors; dev->stats.rx_missed_errors = rx_errors; + dev->stats.rx_dropped = rx_dropped; return &dev->stats; } -- GitLab From b8a15c1b91bb2e1a7feafa7ce6b0db6a8b762179 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 15 Apr 2020 16:36:19 +0800 Subject: [PATCH 1134/1278] net: netrom: Fix potential nr_neigh refcnt leak in nr_add_node [ Upstream commit d03f228470a8c0a22b774d1f8d47071e0de4f6dd ] nr_add_node() invokes nr_neigh_get_dev(), which returns a local reference of the nr_neigh object to "nr_neigh" with increased refcnt. When nr_add_node() returns, "nr_neigh" becomes invalid, so the refcount should be decreased to keep refcount balanced. The issue happens in one normal path of nr_add_node(), which forgets to decrease the refcnt increased by nr_neigh_get_dev() and causes a refcnt leak. It should decrease the refcnt before the function returns like other normal paths do. Fix this issue by calling nr_neigh_put() before the nr_add_node() returns. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netrom/nr_route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0c59354e280e..d098bb8d53aa 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -199,6 +199,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, /* refcount initialized at 1 */ spin_unlock_bh(&nr_node_list_lock); + nr_neigh_put(nr_neigh); return 0; } nr_node_lock(nr_node); -- GitLab From 62d3d1d4ff9e6090e3e79f62e0d21493b9b73170 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 23 Apr 2020 13:13:03 +0800 Subject: [PATCH 1135/1278] net/x25: Fix x25_neigh refcnt leak when receiving frame [ Upstream commit f35d12971b4d814cdb2f659d76b42f0c545270b6 ] x25_lapb_receive_frame() invokes x25_get_neigh(), which returns a reference of the specified x25_neigh object to "nb" with increased refcnt. When x25_lapb_receive_frame() returns, local variable "nb" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one path of x25_lapb_receive_frame(). When pskb_may_pull() returns false, the function forgets to decrease the refcnt increased by x25_get_neigh(), causing a refcnt leak. Fix this issue by calling x25_neigh_put() when pskb_may_pull() returns false. Fixes: cb101ed2c3c7 ("x25: Handle undersized/fragmented skbs") Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/x25_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 39231237e1c3..30f71620d4e3 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -120,8 +120,10 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, goto drop; } - if (!pskb_may_pull(skb, 1)) + if (!pskb_may_pull(skb, 1)) { + x25_neigh_put(nb); return 0; + } switch (skb->data[0]) { -- GitLab From a5043538bce7a18ae76bf6c81e35241893d0615f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Apr 2020 07:10:23 -0700 Subject: [PATCH 1136/1278] tcp: cache line align MAX_TCP_HEADER [ Upstream commit 9bacd256f1354883d3c1402655153367982bba49 ] TCP stack is dumb in how it cooks its output packets. Depending on MAX_HEADER value, we might chose a bad ending point for the headers. If we align the end of TCP headers to cache line boundary, we make sure to always use the smallest number of cache lines, which always help. Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index c96302310314..58e8e57787cf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -55,7 +55,7 @@ extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; void tcp_time_wait(struct sock *sk, int state, int timeo); -#define MAX_TCP_HEADER (128 + MAX_HEADER) +#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 #define TCP_MIN_SND_MSS 48 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) -- GitLab From 938be9dfb0fc5e899a56a98336b7768fa8c6f7b5 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 20 Apr 2020 15:01:33 +0000 Subject: [PATCH 1137/1278] team: fix hang in team_mode_get() [ Upstream commit 1c30fbc76b8f0c07c92a8ca4cd7c456612e17eb5 ] When team mode is changed or set, the team_mode_get() is called to check whether the mode module is inserted or not. If the mode module is not inserted, it calls the request_module(). In the request_module(), it creates a child process, which is the "modprobe" process and waits for the done of the child process. At this point, the following locks were used. down_read(&cb_lock()); by genl_rcv() genl_lock(); by genl_rcv_msc() rtnl_lock(); by team_nl_cmd_options_set() mutex_lock(&team->lock); by team_nl_team_get() Concurrently, the team module could be removed by rmmod or "modprobe -r" The __exit function of team module is team_module_exit(), which calls team_nl_fini() and it tries to acquire following locks. down_write(&cb_lock); genl_lock(); Because of the genl_lock() and cb_lock, this process can't be finished earlier than request_module() routine. The problem secenario. CPU0 CPU1 team_mode_get request_module() modprobe -r team_mode_roundrobin team <--(B) modprobe team <--(A) team_mode_roundrobin By request_module(), the "modprobe team_mode_roundrobin" command will be executed. At this point, the modprobe process will decide that the team module should be inserted before team_mode_roundrobin. Because the team module is being removed. By the module infrastructure, the same module insert/remove operations can't be executed concurrently. So, (A) waits for (B) but (B) also waits for (A) because of locks. So that the hang occurs at this point. Test commands: while : do teamd -d & killall teamd & modprobe -rv team_mode_roundrobin & done The approach of this patch is to hold the reference count of the team module if the team module is compiled as a module. If the reference count of the team module is not zero while request_module() is being called, the team module will not be removed at that moment. So that the above scenario could not occur. Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Taehee Yoo Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 3dba58fa3433..396a8c6cb999 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -480,6 +480,9 @@ static const struct team_mode *team_mode_get(const char *kind) struct team_mode_item *mitem; const struct team_mode *mode = NULL; + if (!try_module_get(THIS_MODULE)) + return NULL; + spin_lock(&mode_list_lock); mitem = __find_mode(kind); if (!mitem) { @@ -495,6 +498,7 @@ static const struct team_mode *team_mode_get(const char *kind) } spin_unlock(&mode_list_lock); + module_put(THIS_MODULE); return mode; } -- GitLab From a04a6dfda20a1fd0c47ae533c210638e0e064e09 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:53 -0700 Subject: [PATCH 1138/1278] net: dsa: b53: Fix ARL register definitions [ Upstream commit c2e77a18a7ed65eb48f6e389b6a59a0fd753646a ] The ARL {MAC,VID} tuple and the forward entry were off by 0x10 bytes, which means that when we read/wrote from/to ARL bin index 0, we were actually accessing the ARLA_RWCTRL register. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/b53/b53_regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index e5c86d44667a..1b2a337d673d 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -294,7 +294,7 @@ * * BCM5325 and BCM5365 share most definitions below */ -#define B53_ARLTBL_MAC_VID_ENTRY(n) (0x10 * (n)) +#define B53_ARLTBL_MAC_VID_ENTRY(n) ((0x10 * (n)) + 0x10) #define ARLTBL_MAC_MASK 0xffffffffffffULL #define ARLTBL_VID_S 48 #define ARLTBL_VID_MASK_25 0xff @@ -306,7 +306,7 @@ #define ARLTBL_VALID_25 BIT(63) /* ARL Table Data Entry N Registers (32 bit) */ -#define B53_ARLTBL_DATA_ENTRY(n) ((0x10 * (n)) + 0x08) +#define B53_ARLTBL_DATA_ENTRY(n) ((0x10 * (n)) + 0x18) #define ARLTBL_DATA_PORT_ID_MASK 0x1ff #define ARLTBL_TC(tc) ((3 & tc) << 11) #define ARLTBL_AGE BIT(14) -- GitLab From 0367dba6b8b7a42827ebbc67e8798a42bf5e4318 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Apr 2020 17:13:51 -0600 Subject: [PATCH 1139/1278] xfrm: Always set XFRM_TRANSFORMED in xfrm{4,6}_output_finish [ Upstream commit 0c922a4850eba2e668f73a3f1153196e09abb251 ] IPSKB_XFRM_TRANSFORMED and IP6SKB_XFRM_TRANSFORMED are skb flags set by xfrm code to tell other skb handlers that the packet has been passed through the xfrm output functions. Simplify the code and just always set them rather than conditionally based on netfilter enabled thus making the flag available for other users. Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/xfrm4_output.c | 2 -- net/ipv6/xfrm6_output.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 94b8702603bc..35dbc8eb9396 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -76,9 +76,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; -#endif return xfrm_output(sk, skb); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 29dae7f2ff14..aff901be5353 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -130,9 +130,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif return xfrm_output(sk, skb); } -- GitLab From bd7ecda215cda3e918d4fba2e5a4b9004196fe93 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Apr 2020 17:13:52 -0600 Subject: [PATCH 1140/1278] vrf: Check skb for XFRM_TRANSFORMED flag [ Upstream commit 16b9db1ce34ff00d6c18e82825125cfef0cdfb13 ] To avoid a loop with qdiscs and xfrms, check if the skb has already gone through the qdisc attached to the VRF device and then to the xfrm layer. If so, no need for a second redirect. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Reported-by: Trev Larock Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 03e4fcdfeab7..811fe0bde8a3 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -476,7 +476,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; - if (qdisc_tx_is_default(vrf_dev)) + if (qdisc_tx_is_default(vrf_dev) || + IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_redirect(vrf_dev, skb); @@ -692,7 +693,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; - if (qdisc_tx_is_default(vrf_dev)) + if (qdisc_tx_is_default(vrf_dev) || + IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_redirect(vrf_dev, skb); -- GitLab From 8d952266c3de93b6eea602803b3d6ad532c8fc0b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Mar 2020 21:11:25 -0400 Subject: [PATCH 1141/1278] KEYS: Avoid false positive ENOMEM error on key read [ Upstream commit 4f0882491a148059a52480e753b7f07fc550e188 ] By allocating a kernel buffer with a user-supplied buffer length, it is possible that a false positive ENOMEM error may be returned because the user-supplied length is just too large even if the system do have enough memory to hold the actual key data. Moreover, if the buffer length is larger than the maximum amount of memory that can be returned by kmalloc() (2^(MAX_ORDER-1) number of pages), a warning message will also be printed. To reduce this possibility, we set a threshold (PAGE_SIZE) over which we do check the actual key length first before allocating a buffer of the right size to hold it. The threshold is arbitrary, it is just used to trigger a buffer length check. It does not limit the actual key length as long as there is enough memory to satisfy the memory request. To further avoid large buffer allocation failure due to page fragmentation, kvmalloc() is used to allocate the buffer so that vmapped pages can be used when there is not a large enough contiguous set of pages available for allocation. In the extremely unlikely scenario that the key keeps on being changed and made longer (still <= buflen) in between 2 __keyctl_read_key() calls, the __keyctl_read_key() calling loop in keyctl_read_key() may have to be iterated a large number of times, but definitely not infinite. Signed-off-by: Waiman Long Signed-off-by: David Howells Signed-off-by: Sasha Levin --- security/keys/internal.h | 12 +++++++++ security/keys/keyctl.c | 58 +++++++++++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/security/keys/internal.h b/security/keys/internal.h index e3a573840186..124273e500cf 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include struct iovec; @@ -305,4 +307,14 @@ static inline void key_check(const struct key *key) #endif +/* + * Helper function to clear and free a kvmalloc'ed memory object. + */ +static inline void __kvzfree(const void *addr, size_t len) +{ + if (addr) { + memset((void *)addr, 0, len); + kvfree(addr); + } +} #endif /* _INTERNAL_H */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 4b6a084e323b..c07c2e2b2478 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -330,7 +330,7 @@ long keyctl_update_key(key_serial_t id, payload = NULL; if (plen) { ret = -ENOMEM; - payload = kmalloc(plen, GFP_KERNEL); + payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; @@ -351,7 +351,7 @@ long keyctl_update_key(key_serial_t id, key_ref_put(key_ref); error2: - kzfree(payload); + __kvzfree(payload, plen); error: return ret; } @@ -772,7 +772,8 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) struct key *key; key_ref_t key_ref; long ret; - char *key_data; + char *key_data = NULL; + size_t key_data_len; /* find the key first */ key_ref = lookup_user_key(keyid, 0, 0); @@ -823,24 +824,51 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) * Allocating a temporary buffer to hold the keys before * transferring them to user buffer to avoid potential * deadlock involving page fault and mmap_sem. + * + * key_data_len = (buflen <= PAGE_SIZE) + * ? buflen : actual length of key data + * + * This prevents allocating arbitrary large buffer which can + * be much larger than the actual key length. In the latter case, + * at least 2 passes of this loop is required. */ - key_data = kmalloc(buflen, GFP_KERNEL); + key_data_len = (buflen <= PAGE_SIZE) ? buflen : 0; + for (;;) { + if (key_data_len) { + key_data = kvmalloc(key_data_len, GFP_KERNEL); + if (!key_data) { + ret = -ENOMEM; + goto key_put_out; + } + } - if (!key_data) { - ret = -ENOMEM; - goto key_put_out; - } - ret = __keyctl_read_key(key, key_data, buflen); + ret = __keyctl_read_key(key, key_data, key_data_len); + + /* + * Read methods will just return the required length without + * any copying if the provided length isn't large enough. + */ + if (ret <= 0 || ret > buflen) + break; + + /* + * The key may change (unlikely) in between 2 consecutive + * __keyctl_read_key() calls. In this case, we reallocate + * a larger buffer and redo the key read when + * key_data_len < ret <= buflen. + */ + if (ret > key_data_len) { + if (unlikely(key_data)) + __kvzfree(key_data, key_data_len); + key_data_len = ret; + continue; /* Allocate buffer */ + } - /* - * Read methods will just return the required length without - * any copying if the provided length isn't large enough. - */ - if (ret > 0 && ret <= buflen) { if (copy_to_user(buffer, key_data, ret)) ret = -EFAULT; + break; } - kzfree(key_data); + __kvzfree(key_data, key_data_len); key_put_out: key_put(key); -- GitLab From ed412430664dabcb16bd1c82e20e417ef604a729 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 19 Apr 2020 09:19:26 +0200 Subject: [PATCH 1142/1278] ALSA: hda: Remove ASUS ROG Zenith from the blacklist [ Upstream commit a8cf44f085ac12c0b5b8750ebb3b436c7f455419 ] The commit 3c6fd1f07ed0 ("ALSA: hda: Add driver blacklist") added a new blacklist for the devices that are known to have empty codecs, and one of the entries was ASUS ROG Zenith II (PCI SSID 1043:874f). However, it turned out that the very same PCI SSID is used for the previous model that does have the valid HD-audio codecs and the change broke the sound on it. This patch reverts the corresponding entry as a temporary solution. Although Zenith II and co will see get the empty HD-audio bus again, it'd be merely resource wastes and won't affect the functionality, so it's no end of the world. We'll need to address this later, e.g. by either switching to DMI string matching or using PCI ID & SSID pairs. Fixes: 3c6fd1f07ed0 ("ALSA: hda: Add driver blacklist") Reported-by: Johnathan Smithinovic Cc: Link: https://lore.kernel.org/r/20200419071926.22683-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d392c1ec0b28..46670da04707 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2173,7 +2173,6 @@ static const struct hdac_io_ops pci_hda_io_ops = { * should be ignored from the beginning. */ static const struct snd_pci_quirk driver_blacklist[] = { - SND_PCI_QUIRK(0x1043, 0x874f, "ASUS ROG Zenith II / Strix", 0), SND_PCI_QUIRK(0x1462, 0xcb59, "MSI TRX40 Creator", 0), SND_PCI_QUIRK(0x1462, 0xcb60, "MSI TRX40", 0), {} -- GitLab From 3cb6fd58b54815335686ddf61a38640b0ea82ecf Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 9 Mar 2020 11:02:12 +0100 Subject: [PATCH 1143/1278] iio: adc: stm32-adc: fix sleep in atomic context commit e2042d2936dfc84e9c600fe9b9d0039ca0e54b7d upstream. This commit fixes the following error: "BUG: sleeping function called from invalid context at kernel/irq/chip.c" In DMA mode suppress the trigger irq handler, and make the buffer transfers directly in DMA callback, instead. Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") Signed-off-by: Olivier Moysan Acked-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 258a4712167a..3cfb2d4b2441 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1311,8 +1311,30 @@ static unsigned int stm32_adc_dma_residue(struct stm32_adc *adc) static void stm32_adc_dma_buffer_done(void *data) { struct iio_dev *indio_dev = data; + struct stm32_adc *adc = iio_priv(indio_dev); + int residue = stm32_adc_dma_residue(adc); + + /* + * In DMA mode the trigger services of IIO are not used + * (e.g. no call to iio_trigger_poll). + * Calling irq handler associated to the hardware trigger is not + * relevant as the conversions have already been done. Data + * transfers are performed directly in DMA callback instead. + * This implementation avoids to call trigger irq handler that + * may sleep, in an atomic context (DMA irq handler context). + */ + dev_dbg(&indio_dev->dev, "%s bufi=%d\n", __func__, adc->bufi); - iio_trigger_poll_chained(indio_dev->trig); + while (residue >= indio_dev->scan_bytes) { + u16 *buffer = (u16 *)&adc->rx_buf[adc->bufi]; + + iio_push_to_buffers(indio_dev, buffer); + + residue -= indio_dev->scan_bytes; + adc->bufi += indio_dev->scan_bytes; + if (adc->bufi >= adc->rx_buf_sz) + adc->bufi = 0; + } } static int stm32_adc_dma_start(struct iio_dev *indio_dev) @@ -1648,6 +1670,7 @@ static int stm32_adc_probe(struct platform_device *pdev) { struct iio_dev *indio_dev; struct device *dev = &pdev->dev; + irqreturn_t (*handler)(int irq, void *p) = NULL; struct stm32_adc *adc; int ret; @@ -1730,9 +1753,11 @@ static int stm32_adc_probe(struct platform_device *pdev) if (ret < 0) goto err_clk_disable; + if (!adc->dma_chan) + handler = &stm32_adc_trigger_handler; + ret = iio_triggered_buffer_setup(indio_dev, - &iio_pollfunc_store_time, - &stm32_adc_trigger_handler, + &iio_pollfunc_store_time, handler, &stm32_adc_buffer_setup_ops); if (ret) { dev_err(&pdev->dev, "buffer setup failed\n"); -- GitLab From 34f3873793c9b11d57bf2d64b0a8791cd77c1ac8 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 3 Apr 2020 15:27:13 +0200 Subject: [PATCH 1144/1278] iio: xilinx-xadc: Fix ADC-B powerdown commit e44ec7794d88f918805d700240211a9ec05ed89d upstream. The check for shutting down the second ADC is inverted. This causes it to be powered down when it should be enabled. As a result channels that are supposed to be handled by the second ADC return invalid conversion results. Signed-off-by: Lars-Peter Clausen Fixes: bdc8cda1d010 ("iio:adc: Add Xilinx XADC driver") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/xilinx-xadc-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index e89711b30ae8..744f8ffe6b8d 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -709,13 +709,14 @@ static int xadc_power_adc_b(struct xadc *xadc, unsigned int seq_mode) { uint16_t val; + /* Powerdown the ADC-B when it is not needed. */ switch (seq_mode) { case XADC_CONF1_SEQ_SIMULTANEOUS: case XADC_CONF1_SEQ_INDEPENDENT: - val = XADC_CONF2_PD_ADC_B; + val = 0; break; default: - val = 0; + val = XADC_CONF2_PD_ADC_B; break; } -- GitLab From b77e5d0b4e8ae7d107717b12aab2967684c00a59 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 3 Apr 2020 15:27:14 +0200 Subject: [PATCH 1145/1278] iio: xilinx-xadc: Fix clearing interrupt when enabling trigger commit f954b098fbac4d183219ce5b42d76d6df2aed50a upstream. When enabling the trigger and unmasking the end-of-sequence (EOS) interrupt the EOS interrupt should be cleared from the status register. Otherwise it is possible that it was still set from a previous capture. If that is the case the interrupt would fire immediately even though no conversion has been done yet and stale data is being read from the device. The old code only clears the interrupt if the interrupt was previously unmasked. Which does not make much sense since the interrupt is always masked at this point and in addition masking the interrupt does not clear the interrupt from the status register. So the clearing needs to be done unconditionally. Signed-off-by: Lars-Peter Clausen Fixes: bdc8cda1d010 ("iio:adc: Add Xilinx XADC driver") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 744f8ffe6b8d..aa3560ec0779 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -660,7 +660,7 @@ static int xadc_trigger_set_state(struct iio_trigger *trigger, bool state) spin_lock_irqsave(&xadc->lock, flags); xadc_read_reg(xadc, XADC_AXI_REG_IPIER, &val); - xadc_write_reg(xadc, XADC_AXI_REG_IPISR, val & XADC_AXI_INT_EOS); + xadc_write_reg(xadc, XADC_AXI_REG_IPISR, XADC_AXI_INT_EOS); if (state) val |= XADC_AXI_INT_EOS; else -- GitLab From a2dad369d0ee4c78467163c6a4a22f5b026e991d Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 3 Apr 2020 15:27:15 +0200 Subject: [PATCH 1146/1278] iio: xilinx-xadc: Fix sequencer configuration for aux channels in simultaneous mode commit 8bef455c8b1694547ee59e8b1939205ed9d901a6 upstream. The XADC has two internal ADCs. Depending on the mode it is operating in either one or both of them are used. The device manual calls this continuous (one ADC) and simultaneous (both ADCs) mode. The meaning of the sequencing register for the aux channels changes depending on the mode. In continuous mode each bit corresponds to one of the 16 aux channels. And the single ADC will convert them one by one in order. In simultaneous mode the aux channels are split into two groups the first 8 channels are assigned to the first ADC and the other 8 channels to the second ADC. The upper 8 bits of the sequencing register are unused and the lower 8 bits control both ADCs. This means a bit needs to be set if either the corresponding channel from the first group or the second group (or both) are set. Currently the driver does not have the special handling required for simultaneous mode. Add it. Signed-off-by: Lars-Peter Clausen Fixes: bdc8cda1d010 ("iio:adc: Add Xilinx XADC driver") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/xilinx-xadc-core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index aa3560ec0779..36db28b9099f 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -785,6 +785,16 @@ static int xadc_preenable(struct iio_dev *indio_dev) if (ret) goto err; + /* + * In simultaneous mode the upper and lower aux channels are samples at + * the same time. In this mode the upper 8 bits in the sequencer + * register are don't care and the lower 8 bits control two channels + * each. As such we must set the bit if either the channel in the lower + * group or the upper group is enabled. + */ + if (seq_mode == XADC_CONF1_SEQ_SIMULTANEOUS) + scan_mask = ((scan_mask >> 8) | scan_mask) & 0xff0000; + ret = xadc_write_adc_reg(xadc, XADC_REG_SEQ(1), scan_mask >> 16); if (ret) goto err; -- GitLab From e21c8c03af20932c15d8b1d3bb9cbad9607a6eab Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Mon, 27 Apr 2020 11:34:12 +0100 Subject: [PATCH 1147/1278] fs/namespace.c: fix mountpoint reference counter race A race condition between threads updating mountpoint reference counter affects longterm releases 4.4.220, 4.9.220, 4.14.177 and 4.19.118. The mountpoint reference counter corruption may occur when: * one thread increments m_count member of struct mountpoint [under namespace_sem, but not holding mount_lock] pivot_root() * another thread simultaneously decrements the same m_count [under mount_lock, but not holding namespace_sem] put_mountpoint() unhash_mnt() umount_mnt() mntput_no_expire() To fix this race condition, grab mount_lock before updating m_count in pivot_root(). Reference: CVE-2020-12114 Cc: Al Viro Signed-off-by: Piotr Krysiuk Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index e9c13eedd739..c8acc60c456d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3216,8 +3216,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make certain new is below the root */ if (!is_path_reachable(new_mnt, new.dentry, &root)) goto out4; - root_mp->m_count++; /* pin it so it won't go away */ lock_mount_hash(); + root_mp->m_count++; /* pin it so it won't go away */ detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { -- GitLab From 56d0f59a4ea93d3b550d9c5ef08c337b4acf8133 Mon Sep 17 00:00:00 2001 From: Changming Liu Date: Mon, 20 Apr 2020 23:41:25 -0400 Subject: [PATCH 1148/1278] USB: sisusbvga: Change port variable from signed to unsigned commit 2df7405f79ce1674d73c2786fe1a8727c905d65b upstream. Change a bunch of arguments of wrapper functions which pass signed integer to an unsigned integer which might cause undefined behaviors when sign integer overflow. Signed-off-by: Changming Liu Cc: stable Link: https://lore.kernel.org/r/BL0PR06MB45482D71EA822D75A0E60A2EE5D50@BL0PR06MB4548.namprd06.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/sisusb.c | 20 ++++++++++---------- drivers/usb/misc/sisusbvga/sisusb_init.h | 14 +++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index 3121fa31aabf..a6f88442a53a 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -1198,18 +1198,18 @@ static int sisusb_read_mem_bulk(struct sisusb_usb_data *sisusb, u32 addr, /* High level: Gfx (indexed) register access */ #ifdef INCL_SISUSB_CON -int sisusb_setreg(struct sisusb_usb_data *sisusb, int port, u8 data) +int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data) { return sisusb_write_memio_byte(sisusb, SISUSB_TYPE_IO, port, data); } -int sisusb_getreg(struct sisusb_usb_data *sisusb, int port, u8 *data) +int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 *data) { return sisusb_read_memio_byte(sisusb, SISUSB_TYPE_IO, port, data); } #endif -int sisusb_setidxreg(struct sisusb_usb_data *sisusb, int port, +int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 data) { int ret; @@ -1219,7 +1219,7 @@ int sisusb_setidxreg(struct sisusb_usb_data *sisusb, int port, return ret; } -int sisusb_getidxreg(struct sisusb_usb_data *sisusb, int port, +int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 *data) { int ret; @@ -1229,7 +1229,7 @@ int sisusb_getidxreg(struct sisusb_usb_data *sisusb, int port, return ret; } -int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, int port, u8 idx, +int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand, u8 myor) { int ret; @@ -1244,7 +1244,7 @@ int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, int port, u8 idx, } static int sisusb_setidxregmask(struct sisusb_usb_data *sisusb, - int port, u8 idx, u8 data, u8 mask) + u32 port, u8 idx, u8 data, u8 mask) { int ret; u8 tmp; @@ -1257,13 +1257,13 @@ static int sisusb_setidxregmask(struct sisusb_usb_data *sisusb, return ret; } -int sisusb_setidxregor(struct sisusb_usb_data *sisusb, int port, +int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 myor) { return sisusb_setidxregandor(sisusb, port, index, 0xff, myor); } -int sisusb_setidxregand(struct sisusb_usb_data *sisusb, int port, +int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand) { return sisusb_setidxregandor(sisusb, port, idx, myand, 0x00); @@ -2786,8 +2786,8 @@ static loff_t sisusb_lseek(struct file *file, loff_t offset, int orig) static int sisusb_handle_command(struct sisusb_usb_data *sisusb, struct sisusb_command *y, unsigned long arg) { - int retval, port, length; - u32 address; + int retval, length; + u32 port, address; /* All our commands require the device * to be initialized. diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.h b/drivers/usb/misc/sisusbvga/sisusb_init.h index e79a616f0d26..f7182257f7e1 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_init.h +++ b/drivers/usb/misc/sisusbvga/sisusb_init.h @@ -811,17 +811,17 @@ static const struct SiS_VCLKData SiSUSB_VCLKData[] = { int SiSUSBSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo); int SiSUSBSetVESAMode(struct SiS_Private *SiS_Pr, unsigned short VModeNo); -extern int sisusb_setreg(struct sisusb_usb_data *sisusb, int port, u8 data); -extern int sisusb_getreg(struct sisusb_usb_data *sisusb, int port, u8 * data); -extern int sisusb_setidxreg(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data); +extern int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 * data); +extern int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 data); -extern int sisusb_getidxreg(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 * data); -extern int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand, u8 myor); -extern int sisusb_setidxregor(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 myor); -extern int sisusb_setidxregand(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand); void sisusb_delete(struct kref *kref); -- GitLab From 48f9c7b6d70a6c819c5c7c595186f0d86314c613 Mon Sep 17 00:00:00 2001 From: Jonathan Cox Date: Fri, 10 Apr 2020 14:24:27 -0700 Subject: [PATCH 1149/1278] USB: Add USB_QUIRK_DELAY_CTRL_MSG and USB_QUIRK_DELAY_INIT for Corsair K70 RGB RAPIDFIRE commit be34a5854b4606bd7a160ad3cb43415d623596c7 upstream. The Corsair K70 RGB RAPIDFIRE needs the USB_QUIRK_DELAY_INIT and USB_QUIRK_DELAY_CTRL_MSG to function or it will randomly not respond on boot, just like other Corsair keyboards Signed-off-by: Jonathan Cox Cc: stable Link: https://lore.kernel.org/r/20200410212427.2886-1-jonathan@jdcox.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6c4bb47922ac..27d05f0134de 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -272,6 +272,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair K70 LUX */ { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair K70 RGB RAPDIFIRE */ + { USB_DEVICE(0x1b1c, 0x1b38), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, + /* MIDI keyboard WORLDE MINI */ { USB_DEVICE(0x1c75, 0x0204), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, -- GitLab From 90adc8923a5104d829af94e01176937502743125 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Apr 2020 09:46:19 +0200 Subject: [PATCH 1150/1278] USB: early: Handle AMD's spec-compliant identifiers, too commit 7dbdb53d72a51cea9b921d9dbba54be00752212a upstream. This fixes a bug that causes the USB3 early console to freeze after printing a single line on AMD machines because it can't parse the Transfer TRB properly. The spec at https://www.intel.com/content/dam/www/public/us/en/documents/technical-specifications/extensible-host-controler-interface-usb-xhci.pdf says in section "4.5.1 Device Context Index" that the Context Index, also known as Endpoint ID according to section "1.6 Terms and Abbreviations", is normally computed as `DCI = (Endpoint Number * 2) + Direction`, which matches the current definitions of XDBC_EPID_OUT and XDBC_EPID_IN. However, the numbering in a Debug Capability Context data structure is supposed to be different: Section "7.6.3.2 Endpoint Contexts and Transfer Rings" explains that a Debug Capability Context data structure has the endpoints mapped to indices 0 and 1. Change XDBC_EPID_OUT/XDBC_EPID_IN to the spec-compliant values, add XDBC_EPID_OUT_INTEL/XDBC_EPID_IN_INTEL with Intel's incorrect values, and let xdbc_handle_tx_event() handle both. I have verified that with this patch applied, the USB3 early console works on both an Intel and an AMD machine. Fixes: aeb9dd1de98c ("usb/early: Add driver for xhci debug capability") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20200401074619.8024-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/early/xhci-dbc.c | 8 ++++---- drivers/usb/early/xhci-dbc.h | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index 12fe70beae69..21244c556b81 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -738,19 +738,19 @@ static void xdbc_handle_tx_event(struct xdbc_trb *evt_trb) case COMP_USB_TRANSACTION_ERROR: case COMP_STALL_ERROR: default: - if (ep_id == XDBC_EPID_OUT) + if (ep_id == XDBC_EPID_OUT || ep_id == XDBC_EPID_OUT_INTEL) xdbc.flags |= XDBC_FLAGS_OUT_STALL; - if (ep_id == XDBC_EPID_IN) + if (ep_id == XDBC_EPID_IN || ep_id == XDBC_EPID_IN_INTEL) xdbc.flags |= XDBC_FLAGS_IN_STALL; xdbc_trace("endpoint %d stalled\n", ep_id); break; } - if (ep_id == XDBC_EPID_IN) { + if (ep_id == XDBC_EPID_IN || ep_id == XDBC_EPID_IN_INTEL) { xdbc.flags &= ~XDBC_FLAGS_IN_PROCESS; xdbc_bulk_transfer(NULL, XDBC_MAX_PACKET, true); - } else if (ep_id == XDBC_EPID_OUT) { + } else if (ep_id == XDBC_EPID_OUT || ep_id == XDBC_EPID_OUT_INTEL) { xdbc.flags &= ~XDBC_FLAGS_OUT_PROCESS; } else { xdbc_trace("invalid endpoint id %d\n", ep_id); diff --git a/drivers/usb/early/xhci-dbc.h b/drivers/usb/early/xhci-dbc.h index a516cab0bf4a..6c9200d913da 100644 --- a/drivers/usb/early/xhci-dbc.h +++ b/drivers/usb/early/xhci-dbc.h @@ -123,8 +123,22 @@ struct xdbc_ring { u32 cycle_state; }; -#define XDBC_EPID_OUT 2 -#define XDBC_EPID_IN 3 +/* + * These are the "Endpoint ID" (also known as "Context Index") values for the + * OUT Transfer Ring and the IN Transfer Ring of a Debug Capability Context data + * structure. + * According to the "eXtensible Host Controller Interface for Universal Serial + * Bus (xHCI)" specification, section "7.6.3.2 Endpoint Contexts and Transfer + * Rings", these should be 0 and 1, and those are the values AMD machines give + * you; but Intel machines seem to use the formula from section "4.5.1 Device + * Context Index", which is supposed to be used for the Device Context only. + * Luckily the values from Intel don't overlap with those from AMD, so we can + * just test for both. + */ +#define XDBC_EPID_OUT 0 +#define XDBC_EPID_IN 1 +#define XDBC_EPID_OUT_INTEL 2 +#define XDBC_EPID_IN_INTEL 3 struct xdbc_state { u16 vendor; -- GitLab From b49aa64f1d2526de71ae0354d37026e5861ccb4b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 28 Mar 2020 16:18:11 -0400 Subject: [PATCH 1151/1278] USB: core: Fix free-while-in-use bug in the USB S-Glibrary commit 056ad39ee9253873522f6469c3364964a322912b upstream. FuzzUSB (a variant of syzkaller) found a free-while-still-in-use bug in the USB scatter-gather library: BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:26 [inline] BUG: KASAN: use-after-free in usb_hcd_unlink_urb+0x5f/0x170 drivers/usb/core/hcd.c:1607 Read of size 4 at addr ffff888065379610 by task kworker/u4:1/27 CPU: 1 PID: 27 Comm: kworker/u4:1 Not tainted 5.5.11 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Workqueue: scsi_tmf_2 scmd_eh_abort_handler Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xce/0x128 lib/dump_stack.c:118 print_address_description.constprop.4+0x21/0x3c0 mm/kasan/report.c:374 __kasan_report+0x153/0x1cb mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x152/0x1b0 mm/kasan/generic.c:192 __kasan_check_read+0x11/0x20 mm/kasan/common.c:95 atomic_read include/asm-generic/atomic-instrumented.h:26 [inline] usb_hcd_unlink_urb+0x5f/0x170 drivers/usb/core/hcd.c:1607 usb_unlink_urb+0x72/0xb0 drivers/usb/core/urb.c:657 usb_sg_cancel+0x14e/0x290 drivers/usb/core/message.c:602 usb_stor_stop_transport+0x5e/0xa0 drivers/usb/storage/transport.c:937 This bug occurs when cancellation of the S-G transfer races with transfer completion. When that happens, usb_sg_cancel() may continue to access the transfer's URBs after usb_sg_wait() has freed them. The bug is caused by the fact that usb_sg_cancel() does not take any sort of reference to the transfer, and so there is nothing to prevent the URBs from being deallocated while the routine is trying to use them. The fix is to take such a reference by incrementing the transfer's io->count field while the cancellation is in progres and decrementing it afterward. The transfer's URBs are not deallocated until io->complete is triggered, which happens when io->count reaches zero. Signed-off-by: Alan Stern Reported-and-tested-by: Kyungtae Kim CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2003281615140.14837-100000@netrider.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index e70578e11156..00e80cfe614c 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -586,12 +586,13 @@ void usb_sg_cancel(struct usb_sg_request *io) int i, retval; spin_lock_irqsave(&io->lock, flags); - if (io->status) { + if (io->status || io->count == 0) { spin_unlock_irqrestore(&io->lock, flags); return; } /* shut everything down */ io->status = -ECONNRESET; + io->count++; /* Keep the request alive until we're done */ spin_unlock_irqrestore(&io->lock, flags); for (i = io->entries - 1; i >= 0; --i) { @@ -605,6 +606,12 @@ void usb_sg_cancel(struct usb_sg_request *io) dev_warn(&io->dev->dev, "%s, unlink --> %d\n", __func__, retval); } + + spin_lock_irqsave(&io->lock, flags); + io->count--; + if (!io->count) + complete(&io->complete); + spin_unlock_irqrestore(&io->lock, flags); } EXPORT_SYMBOL_GPL(usb_sg_cancel); -- GitLab From 17f673a9861cb15b98fcae1467647d7817e46109 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 22 Apr 2020 16:09:51 -0400 Subject: [PATCH 1152/1278] USB: hub: Fix handling of connect changes during sleep commit 9f952e26295d977dbfc6fedeaf8c4f112c818d37 upstream. Commit 8099f58f1ecd ("USB: hub: Don't record a connect-change event during reset-resume") wasn't very well conceived. The problem it tried to fix was that if a connect-change event occurred while the system was asleep (such as a device disconnecting itself from the bus when it is suspended and then reconnecting when it resumes) requiring a reset-resume during the system wakeup transition, the hub port's change_bit entry would remain set afterward. This would cause the hub driver to believe another connect-change event had occurred after the reset-resume, which was wrong and would lead the driver to send unnecessary requests to the device (which could interfere with a firmware update). The commit tried to fix this by not setting the change_bit during the wakeup. But this was the wrong thing to do; it means that when a device is unplugged while the system is asleep, the hub driver doesn't realize anything has happened: The change_bit flag which would tell it to handle the disconnect event is clear. The commit needs to be reverted and the problem fixed in a different way. Fortunately an alternative solution was noted in the commit's Changelog: We can continue to set the change_bit entry in hub_activate() but then clear it when a reset-resume occurs. That way the the hub driver will see the change_bit when a device is disconnected but won't see it when the device is still present. That's what this patch does. Reported-and-tested-by: Peter Chen Signed-off-by: Alan Stern Fixes: 8099f58f1ecd ("USB: hub: Don't record a connect-change event during reset-resume") Tested-by: Paul Zimmerman CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2004221602480.11262-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4391192bdd19..d6d09486231d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1195,6 +1195,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) #ifdef CONFIG_PM udev->reset_resume = 1; #endif + /* Don't set the change_bits when the device + * was powered off. + */ + if (test_bit(port1, hub->power_bits)) + set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ @@ -3008,6 +3013,15 @@ static int check_port_resume_type(struct usb_device *udev, if (portchange & USB_PORT_STAT_C_ENABLE) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_ENABLE); + + /* + * Whatever made this reset-resume necessary may have + * turned on the port1 bit in hub->change_bits. But after + * a successful reset-resume we want the bit to be clear; + * if it was on it would indicate that something happened + * following the reset-resume. + */ + clear_bit(port1, hub->change_bits); } return status; -- GitLab From 334c43f8d69fa5aa6487157b927ed9146e40786e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 1 Aug 2018 14:25:39 -0700 Subject: [PATCH 1153/1278] overflow.h: Add arithmetic shift helper commit 0c66847793d1982d1083dc6f7adad60fa265ce9c upstream. Add shift_overflow() helper to assist driver authors in ensuring that shift operations don't cause overflows or other odd conditions. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky [kees: tweaked comments and commit log, dropped unneeded assignment] Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/overflow.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index c8890ec358a7..d309788f4cd2 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -202,4 +202,35 @@ #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ +/** check_shl_overflow() - Calculate a left-shifted value and check overflow + * + * @a: Value to be shifted + * @s: How many bits left to shift + * @d: Pointer to where to store the result + * + * Computes *@d = (@a << @s) + * + * Returns true if '*d' cannot hold the result or when 'a << s' doesn't + * make sense. Example conditions: + * - 'a << s' causes bits to be lost when stored in *d. + * - 's' is garbage (e.g. negative) or so large that the result of + * 'a << s' is guaranteed to be 0. + * - 'a' is negative. + * - 'a << s' sets the sign bit, if any, in '*d'. + * + * '*d' will hold the results of the attempted shift, but is not + * considered "safe for use" if false is returned. + */ +#define check_shl_overflow(a, s, d) ({ \ + typeof(a) _a = a; \ + typeof(s) _s = s; \ + typeof(d) _d = d; \ + u64 _a_full = _a; \ + unsigned int _to_shift = \ + _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ + *_d = (_a_full << _to_shift); \ + (_to_shift != _s || *_d < 0 || _a < 0 || \ + (*_d >> _to_shift) != _a); \ +}) + #endif /* __LINUX_OVERFLOW_H */ -- GitLab From e5de393402985a04fcd6980d3701759e31e2cfff Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 20 Apr 2020 18:14:11 -0700 Subject: [PATCH 1154/1278] vmalloc: fix remap_vmalloc_range() bounds checks commit bdebd6a2831b6fab69eb85cee74a8ba77f1a1cc2 upstream. remap_vmalloc_range() has had various issues with the bounds checks it promises to perform ("This function checks that addr is a valid vmalloc'ed area, and that it is big enough to cover the vma") over time, e.g.: - not detecting pgoff< Signed-off-by: Andrew Morton Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Link: http://lkml.kernel.org/r/20200415222312.236431-1-jannh@google.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/vmcore.c | 2 +- include/linux/vmalloc.h | 2 +- mm/vmalloc.c | 16 +++++++++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index ce400f97370d..aaa7486b6f0d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -459,7 +459,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, tsz)) + kaddr, 0, tsz)) goto fail; size -= tsz; start += tsz; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 29ef33498cb6..98d65e11e837 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -102,7 +102,7 @@ extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, - unsigned long size); + unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 88091fd704f4..63bf3f207e16 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -2246,6 +2247,7 @@ long vwrite(char *buf, char *addr, unsigned long count) * @vma: vma to cover * @uaddr: target user address to start at * @kaddr: virtual address of vmalloc kernel memory + * @pgoff: offset from @kaddr to start at * @size: size of map area * * Returns: 0 for success, -Exxx on failure @@ -2258,9 +2260,15 @@ long vwrite(char *buf, char *addr, unsigned long count) * Similar to remap_pfn_range() (see mm/memory.c) */ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, - void *kaddr, unsigned long size) + void *kaddr, unsigned long pgoff, + unsigned long size) { struct vm_struct *area; + unsigned long off; + unsigned long end_index; + + if (check_shl_overflow(pgoff, PAGE_SHIFT, &off)) + return -EINVAL; size = PAGE_ALIGN(size); @@ -2274,8 +2282,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!(area->flags & VM_USERMAP)) return -EINVAL; - if (kaddr + size > area->addr + get_vm_area_size(area)) + if (check_add_overflow(size, off, &end_index) || + end_index > get_vm_area_size(area)) return -EINVAL; + kaddr += off; do { struct page *page = vmalloc_to_page(kaddr); @@ -2314,7 +2324,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff) { return remap_vmalloc_range_partial(vma, vma->vm_start, - addr + (pgoff << PAGE_SHIFT), + addr, pgoff, vma->vm_end - vma->vm_start); } EXPORT_SYMBOL(remap_vmalloc_range); -- GitLab From b73684f9f53922184d876293229bdcba546f77bf Mon Sep 17 00:00:00 2001 From: Longpeng Date: Mon, 20 Apr 2020 18:13:51 -0700 Subject: [PATCH 1155/1278] mm/hugetlb: fix a addressing exception caused by huge_pte_offset commit 3c1d7e6ccb644d517a12f73a7ff200870926f865 upstream. Our machine encountered a panic(addressing exception) after run for a long time and the calltrace is: RIP: hugetlb_fault+0x307/0xbe0 RSP: 0018:ffff9567fc27f808 EFLAGS: 00010286 RAX: e800c03ff1258d48 RBX: ffffd3bb003b69c0 RCX: e800c03ff1258d48 RDX: 17ff3fc00eda72b7 RSI: 00003ffffffff000 RDI: e800c03ff1258d48 RBP: ffff9567fc27f8c8 R08: e800c03ff1258d48 R09: 0000000000000080 R10: ffffaba0704c22a8 R11: 0000000000000001 R12: ffff95c87b4b60d8 R13: 00005fff00000000 R14: 0000000000000000 R15: ffff9567face8074 FS: 00007fe2d9ffb700(0000) GS:ffff956900e40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffd3bb003b69c0 CR3: 000000be67374000 CR4: 00000000003627e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: follow_hugetlb_page+0x175/0x540 __get_user_pages+0x2a0/0x7e0 __get_user_pages_unlocked+0x15d/0x210 __gfn_to_pfn_memslot+0x3c5/0x460 [kvm] try_async_pf+0x6e/0x2a0 [kvm] tdp_page_fault+0x151/0x2d0 [kvm] ... kvm_arch_vcpu_ioctl_run+0x330/0x490 [kvm] kvm_vcpu_ioctl+0x309/0x6d0 [kvm] do_vfs_ioctl+0x3f0/0x540 SyS_ioctl+0xa1/0xc0 system_call_fastpath+0x22/0x27 For 1G hugepages, huge_pte_offset() wants to return NULL or pudp, but it may return a wrong 'pmdp' if there is a race. Please look at the following code snippet: ... pud = pud_offset(p4d, addr); if (sz != PUD_SIZE && pud_none(*pud)) return NULL; /* hugepage or swap? */ if (pud_huge(*pud) || !pud_present(*pud)) return (pte_t *)pud; pmd = pmd_offset(pud, addr); if (sz != PMD_SIZE && pmd_none(*pmd)) return NULL; /* hugepage or swap? */ if (pmd_huge(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; ... The following sequence would trigger this bug: - CPU0: sz = PUD_SIZE and *pud = 0 , continue - CPU0: "pud_huge(*pud)" is false - CPU1: calling hugetlb_no_page and set *pud to xxxx8e7(PRESENT) - CPU0: "!pud_present(*pud)" is false, continue - CPU0: pmd = pmd_offset(pud, addr) and maybe return a wrong pmdp However, we want CPU0 to return NULL or pudp in this case. We must make sure there is exactly one dereference of pud and pmd. Signed-off-by: Longpeng Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Reviewed-by: Jason Gunthorpe Cc: Matthew Wilcox Cc: Sean Christopherson Cc: Link: http://lkml.kernel.org/r/20200413010342.771-1-longpeng2@huawei.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 310656b4ede6..d6464045d3b9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4745,8 +4745,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, { pgd_t *pgd; p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; + pud_t *pud, pud_entry; + pmd_t *pmd, pmd_entry; pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) @@ -4756,17 +4756,19 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; pud = pud_offset(p4d, addr); - if (sz != PUD_SIZE && pud_none(*pud)) + pud_entry = READ_ONCE(*pud); + if (sz != PUD_SIZE && pud_none(pud_entry)) return NULL; /* hugepage or swap? */ - if (pud_huge(*pud) || !pud_present(*pud)) + if (pud_huge(pud_entry) || !pud_present(pud_entry)) return (pte_t *)pud; pmd = pmd_offset(pud, addr); - if (sz != PMD_SIZE && pmd_none(*pmd)) + pmd_entry = READ_ONCE(*pmd); + if (sz != PMD_SIZE && pmd_none(pmd_entry)) return NULL; /* hugepage or swap? */ - if (pmd_huge(*pmd) || !pmd_present(*pmd)) + if (pmd_huge(pmd_entry) || !pmd_present(pmd_entry)) return (pte_t *)pmd; return NULL; -- GitLab From a8f73ebf7293fff47b9bc4c86c103ff8009fc03f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 20 Apr 2020 18:14:04 -0700 Subject: [PATCH 1156/1278] mm/ksm: fix NULL pointer dereference when KSM zero page is enabled commit 56df70a63ed5d989c1d36deee94cae14342be6e9 upstream. find_mergeable_vma() can return NULL. In this case, it leads to a crash when we access vm_mm(its offset is 0x40) later in write_protect_page. And this case did happen on our server. The following call trace is captured in kernel 4.19 with the following patch applied and KSM zero page enabled on our server. commit e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring") So add a vma check to fix it. BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 Oops: 0000 [#1] SMP NOPTI CPU: 9 PID: 510 Comm: ksmd Kdump: loaded Tainted: G OE 4.19.36.bsk.9-amd64 #4.19.36.bsk.9 RIP: try_to_merge_one_page+0xc7/0x760 Code: 24 58 65 48 33 34 25 28 00 00 00 89 e8 0f 85 a3 06 00 00 48 83 c4 60 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 8b 46 08 a8 01 75 b8 <49> 8b 44 24 40 4c 8d 7c 24 20 b9 07 00 00 00 4c 89 e6 4c 89 ff 48 RSP: 0018:ffffadbdd9fffdb0 EFLAGS: 00010246 RAX: ffffda83ffd4be08 RBX: ffffda83ffd4be40 RCX: 0000002c6e800000 RDX: 0000000000000000 RSI: ffffda83ffd4be40 RDI: 0000000000000000 RBP: ffffa11939f02ec0 R08: 0000000094e1a447 R09: 00000000abe76577 R10: 0000000000000962 R11: 0000000000004e6a R12: 0000000000000000 R13: ffffda83b1e06380 R14: ffffa18f31f072c0 R15: ffffda83ffd4be40 FS: 0000000000000000(0000) GS:ffffa0da43b80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 0000002c77c0a003 CR4: 00000000007626e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ksm_scan_thread+0x115e/0x1960 kthread+0xf5/0x130 ret_from_fork+0x1f/0x30 [songmuchun@bytedance.com: if the vma is out of date, just exit] Link: http://lkml.kernel.org/r/20200416025034.29780-1-songmuchun@bytedance.com [akpm@linux-foundation.org: add the conventional braces, replace /** with /*] Fixes: e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring") Co-developed-by: Xiongchun Duan Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Kirill Tkhai Cc: Hugh Dickins Cc: Yang Shi Cc: Claudio Imbrenda Cc: Markus Elfring Cc: Link: http://lkml.kernel.org/r/20200416025034.29780-1-songmuchun@bytedance.com Link: http://lkml.kernel.org/r/20200414132905.83819-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/ksm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 764486ffcd16..65d4bf52f543 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2074,8 +2074,16 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) down_read(&mm->mmap_sem); vma = find_mergeable_vma(mm, rmap_item->address); - err = try_to_merge_one_page(vma, page, - ZERO_PAGE(rmap_item->address)); + if (vma) { + err = try_to_merge_one_page(vma, page, + ZERO_PAGE(rmap_item->address)); + } else { + /* + * If the vma is out of date, we do not need to + * continue. + */ + err = 0; + } up_read(&mm->mmap_sem); /* * In case of failure, the page was not really empty, so we -- GitLab From e21c20cf8b532259511cbeaef0db2a690d91f92d Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 20 Apr 2020 18:14:23 -0700 Subject: [PATCH 1157/1278] tools/vm: fix cross-compile build commit cf01699ee220c38099eb3e43ce3d10690c8b7060 upstream. Commit 7ed1c1901fe5 ("tools: fix cross-compile var clobbering") moved the setup of the CC variable to tools/scripts/Makefile.include to make the behavior consistent across all the tools Makefiles. As the vm tools missed the include we end up with the wrong CC in a cross-compiling evironment. Fixes: 7ed1c1901fe5 (tools: fix cross-compile var clobbering) Signed-off-by: Lucas Stach Signed-off-by: Andrew Morton Cc: Martin Kelly Cc: Link: http://lkml.kernel.org/r/20200416104748.25243-1-l.stach@pengutronix.de Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- tools/vm/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 20f6cf04377f..9860622cbb15 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm tools # +include ../scripts/Makefile.include + TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api -- GitLab From f025e57b4b50114e6041879e6626c0be3f11f60c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Apr 2020 09:55:29 +0200 Subject: [PATCH 1158/1278] ALSA: usx2y: Fix potential NULL dereference commit 7686e3485253635c529cdd5f416fc640abaf076f upstream. The error handling code in usX2Y_rate_set() may hit a potential NULL dereference when an error occurs before allocating all us->urb[]. Add a proper NULL check for fixing the corner case. Reported-by: Lin Yi Cc: Link: https://lore.kernel.org/r/20200420075529.27203-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/usx2y/usbusx2yaudio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index f93b355756e6..2dfc0abf2e37 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -689,6 +689,8 @@ static int usX2Y_rate_set(struct usX2Ydev *usX2Y, int rate) us->submitted = 2*NOOF_SETRATE_URBS; for (i = 0; i < NOOF_SETRATE_URBS; ++i) { struct urb *urb = us->urb[i]; + if (!urb) + continue; if (urb->status) { if (!err) err = -ENODEV; -- GitLab From a73fb6720ac54928aee888ecea340be1fbcf2052 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 23 Apr 2020 14:18:31 +0800 Subject: [PATCH 1159/1278] ALSA: hda/realtek - Add new codec supported for ALC245 commit 7fbdcd8301a84c09cebfa64f1317a6dafeec9188 upstream. Enable new codec supported for ALC245. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/8c0804738b2c42439f59c39c8437817f@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3fded87817c6..288ea05dfa3c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -334,6 +334,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0233: case 0x10ec0235: case 0x10ec0236: + case 0x10ec0245: case 0x10ec0255: case 0x10ec0256: case 0x10ec0257: @@ -7264,6 +7265,7 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; break; case 0x10ec0215: + case 0x10ec0245: case 0x10ec0285: case 0x10ec0289: spec->codec_variant = ALC269_TYPE_ALC215; @@ -8344,6 +8346,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0234, "ALC234", patch_alc269), HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269), HDA_CODEC_ENTRY(0x10ec0236, "ALC236", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0245, "ALC245", patch_alc269), HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269), HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269), HDA_CODEC_ENTRY(0x10ec0257, "ALC257", patch_alc269), -- GitLab From 83c4d03d001eb25b563e8cb4919ad6c9a0c4b0dd Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 23 Apr 2020 12:54:19 +0800 Subject: [PATCH 1160/1278] ALSA: usb-audio: Fix usb audio refcnt leak when getting spdif commit 59e1947ca09ebd1cae147c08c7c41f3141233c84 upstream. snd_microii_spdif_default_get() invokes snd_usb_lock_shutdown(), which increases the refcount of the snd_usb_audio object "chip". When snd_microii_spdif_default_get() returns, local variable "chip" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in several exception handling paths of snd_microii_spdif_default_get(). When those error scenarios occur such as usb_ifnum_to_if() returns NULL, the function forgets to decrease the refcnt increased by snd_usb_lock_shutdown(), causing a refcnt leak. Fix this issue by jumping to "end" label when those error scenarios occur. Fixes: 447d6275f0c2 ("ALSA: usb-audio: Add sanity checks for endpoint accesses") Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Cc: Link: https://lore.kernel.org/r/1587617711-13200-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_quirks.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index b54f7dab8372..b9ea4a42aee4 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1520,11 +1520,15 @@ static int snd_microii_spdif_default_get(struct snd_kcontrol *kcontrol, /* use known values for that card: interface#1 altsetting#1 */ iface = usb_ifnum_to_if(chip->dev, 1); - if (!iface || iface->num_altsetting < 2) - return -EINVAL; + if (!iface || iface->num_altsetting < 2) { + err = -EINVAL; + goto end; + } alts = &iface->altsetting[1]; - if (get_iface_desc(alts)->bNumEndpoints < 1) - return -EINVAL; + if (get_iface_desc(alts)->bNumEndpoints < 1) { + err = -EINVAL; + goto end; + } ep = get_endpoint(alts, 0)->bEndpointAddress; err = snd_usb_ctl_msg(chip->dev, -- GitLab From b47d6d6f68a37e9e6f1fc1a1cd5d5e02e2171394 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Sat, 18 Apr 2020 20:58:15 +0300 Subject: [PATCH 1161/1278] ALSA: usb-audio: Filter out unsupported sample rates on Focusrite devices commit 1c826792586f526a5a5cd21d55aad388f5bb0b23 upstream. Many Focusrite devices supports a limited set of sample rates per altsetting. These includes audio interfaces with ADAT ports: - Scarlett 18i6, 18i8 1st gen, 18i20 1st gen; - Scarlett 18i8 2nd gen, 18i20 2nd gen; - Scarlett 18i8 3rd gen, 18i20 3rd gen; - Clarett 2Pre USB, 4Pre USB, 8Pre USB. Maximum rate is exposed in the last 4 bytes of Format Type descriptor which has a non-standard bLength = 10. Tested-by: Alexey Skobkin Signed-off-by: Alexander Tsoy Cc: Link: https://lore.kernel.org/r/20200418175815.12211-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/format.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index 2c44386e5569..eeb56d6fe8aa 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -220,6 +220,52 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof return 0; } +/* + * Many Focusrite devices supports a limited set of sampling rates per + * altsetting. Maximum rate is exposed in the last 4 bytes of Format Type + * descriptor which has a non-standard bLength = 10. + */ +static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, + struct audioformat *fp, + unsigned int rate) +{ + struct usb_interface *iface; + struct usb_host_interface *alts; + unsigned char *fmt; + unsigned int max_rate; + + iface = usb_ifnum_to_if(chip->dev, fp->iface); + if (!iface) + return true; + + alts = &iface->altsetting[fp->altset_idx]; + fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, + NULL, UAC_FORMAT_TYPE); + if (!fmt) + return true; + + if (fmt[0] == 10) { /* bLength */ + max_rate = combine_quad(&fmt[6]); + + /* Validate max rate */ + if (max_rate != 48000 && + max_rate != 96000 && + max_rate != 192000 && + max_rate != 384000) { + + usb_audio_info(chip, + "%u:%d : unexpected max rate: %u\n", + fp->iface, fp->altsetting, max_rate); + + return true; + } + + return rate <= max_rate; + } + + return true; +} + /* * Helper function to walk the array of sample rate triplets reported by * the device. The problem is that we need to parse whole array first to @@ -256,6 +302,11 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, } for (rate = min; rate <= max; rate += res) { + /* Filter out invalid rates on Focusrite devices */ + if (USB_ID_VENDOR(chip->usb_id) == 0x1235 && + !focusrite_valid_sample_rate(chip, fp, rate)) + goto skip_rate; + if (fp->rate_table) fp->rate_table[nr_rates] = rate; if (!fp->rate_min || rate < fp->rate_min) @@ -270,6 +321,7 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, break; } +skip_rate: /* avoid endless loop */ if (res == 0) break; -- GitLab From 2fbd15e159f021126a2941d77f22a94b14ccf0e2 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sun, 12 Apr 2020 20:04:12 +0300 Subject: [PATCH 1162/1278] tpm/tpm_tis: Free IRQ if probing fails commit b160c94be5d2816b62c8ac338605668304242959 upstream. Call disable_interrupts() if we have to revert to polling in order not to unnecessarily reserve the IRQ for the life-cycle of the driver. Cc: stable@vger.kernel.org # 4.5.x Reported-by: Hans de Goede Fixes: e3837e74a06d ("tpm_tis: Refactor the interrupt setup") Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index a7d9c0c53fcd..9b1116501f20 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -331,6 +331,9 @@ static void disable_interrupts(struct tpm_chip *chip) u32 intmask; int rc; + if (priv->irq == 0) + return; + rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask); if (rc < 0) intmask = 0; @@ -874,9 +877,12 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, if (irq) { tpm_tis_probe_irq_single(chip, intmask, IRQF_SHARED, irq); - if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) + if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) { dev_err(&chip->dev, FW_BUG "TPM interrupt not working, polling instead\n"); + + disable_interrupts(chip); + } } else { tpm_tis_probe_irq(chip, intmask); } -- GitLab From 7f06d3a6c49499e2eebd47fffd584bfce5b3a38b Mon Sep 17 00:00:00 2001 From: George Wilson Date: Thu, 19 Mar 2020 23:27:58 -0400 Subject: [PATCH 1163/1278] tpm: ibmvtpm: retry on H_CLOSED in tpm_ibmvtpm_send() commit eba5cf3dcb844c82f54d4a857e124824e252206d upstream. tpm_ibmvtpm_send() can fail during PowerVM Live Partition Mobility resume with an H_CLOSED return from ibmvtpm_send_crq(). The PAPR says, 'The "partner partition suspended" transport event disables the associated CRQ such that any H_SEND_CRQ hcall() to the associated CRQ returns H_Closed until the CRQ has been explicitly enabled using the H_ENABLE_CRQ hcall.' This patch adds a check in tpm_ibmvtpm_send() for an H_CLOSED return from ibmvtpm_send_crq() and in that case calls tpm_ibmvtpm_resume() and retries the ibmvtpm_send_crq() once. Cc: stable@vger.kernel.org # 3.7.x Fixes: 132f76294744 ("drivers/char/tpm: Add new device driver to support IBM vTPM") Reported-by: Linh Pham Reviewed-by: Stefan Berger Signed-off-by: George Wilson Tested-by: Linh Pham Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_ibmvtpm.c | 136 ++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 63 deletions(-) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 77e47dc5aacc..569e93e1f06c 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 IBM Corporation + * Copyright (C) 2012-2020 IBM Corporation * * Author: Ashley Lai * @@ -140,6 +140,64 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) return len; } +/** + * ibmvtpm_crq_send_init - Send a CRQ initialize message + * @ibmvtpm: vtpm device struct + * + * Return: + * 0 on success. + * Non-zero on failure. + */ +static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm) +{ + int rc; + + rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD); + if (rc != H_SUCCESS) + dev_err(ibmvtpm->dev, + "%s failed rc=%d\n", __func__, rc); + + return rc; +} + +/** + * tpm_ibmvtpm_resume - Resume from suspend + * + * @dev: device struct + * + * Return: Always 0. + */ +static int tpm_ibmvtpm_resume(struct device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); + int rc = 0; + + do { + if (rc) + msleep(100); + rc = plpar_hcall_norets(H_ENABLE_CRQ, + ibmvtpm->vdev->unit_address); + } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + if (rc) { + dev_err(dev, "Error enabling ibmvtpm rc=%d\n", rc); + return rc; + } + + rc = vio_enable_interrupts(ibmvtpm->vdev); + if (rc) { + dev_err(dev, "Error vio_enable_interrupts rc=%d\n", rc); + return rc; + } + + rc = ibmvtpm_crq_send_init(ibmvtpm); + if (rc) + dev_err(dev, "Error send_init rc=%d\n", rc); + + return rc; +} + /** * tpm_ibmvtpm_send() - Send a TPM command * @chip: tpm chip struct @@ -153,6 +211,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) { struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); + bool retry = true; int rc, sig; if (!ibmvtpm->rtce_buf) { @@ -186,18 +245,27 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) */ ibmvtpm->tpm_processing_cmd = true; +again: rc = ibmvtpm_send_crq(ibmvtpm->vdev, IBMVTPM_VALID_CMD, VTPM_TPM_COMMAND, count, ibmvtpm->rtce_dma_handle); if (rc != H_SUCCESS) { + /* + * H_CLOSED can be returned after LPM resume. Call + * tpm_ibmvtpm_resume() to re-enable the CRQ then retry + * ibmvtpm_send_crq() once before failing. + */ + if (rc == H_CLOSED && retry) { + tpm_ibmvtpm_resume(ibmvtpm->dev); + retry = false; + goto again; + } dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc); - rc = 0; ibmvtpm->tpm_processing_cmd = false; - } else - rc = 0; + } spin_unlock(&ibmvtpm->rtce_lock); - return rc; + return 0; } static void tpm_ibmvtpm_cancel(struct tpm_chip *chip) @@ -275,26 +343,6 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm) return rc; } -/** - * ibmvtpm_crq_send_init - Send a CRQ initialize message - * @ibmvtpm: vtpm device struct - * - * Return: - * 0 on success. - * Non-zero on failure. - */ -static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm) -{ - int rc; - - rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD); - if (rc != H_SUCCESS) - dev_err(ibmvtpm->dev, - "ibmvtpm_crq_send_init failed rc=%d\n", rc); - - return rc; -} - /** * tpm_ibmvtpm_remove - ibm vtpm remove entry point * @vdev: vio device struct @@ -407,44 +455,6 @@ static int ibmvtpm_reset_crq(struct ibmvtpm_dev *ibmvtpm) ibmvtpm->crq_dma_handle, CRQ_RES_BUF_SIZE); } -/** - * tpm_ibmvtpm_resume - Resume from suspend - * - * @dev: device struct - * - * Return: Always 0. - */ -static int tpm_ibmvtpm_resume(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); - int rc = 0; - - do { - if (rc) - msleep(100); - rc = plpar_hcall_norets(H_ENABLE_CRQ, - ibmvtpm->vdev->unit_address); - } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); - - if (rc) { - dev_err(dev, "Error enabling ibmvtpm rc=%d\n", rc); - return rc; - } - - rc = vio_enable_interrupts(ibmvtpm->vdev); - if (rc) { - dev_err(dev, "Error vio_enable_interrupts rc=%d\n", rc); - return rc; - } - - rc = ibmvtpm_crq_send_init(ibmvtpm); - if (rc) - dev_err(dev, "Error send_init rc=%d\n", rc); - - return rc; -} - static bool tpm_ibmvtpm_req_canceled(struct tpm_chip *chip, u8 status) { return (status == 0); -- GitLab From ba46d6dcee90f57964d446c281ef53dea027f42d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Apr 2020 23:40:58 -0700 Subject: [PATCH 1164/1278] KVM: Check validity of resolved slot when searching memslots commit b6467ab142b708dd076f6186ca274f14af379c72 upstream. Check that the resolved slot (somewhat confusingly named 'start') is a valid/allocated slot before doing the final comparison to see if the specified gfn resides in the associated slot. The resolved slot can be invalid if the binary search loop terminated because the search index was incremented beyond the number of used slots. This bug has existed since the binary search algorithm was introduced, but went unnoticed because KVM statically allocated memory for the max number of slots, i.e. the access would only be truly out-of-bounds if all possible slots were allocated and the specified gfn was less than the base of the lowest memslot. Commit 36947254e5f98 ("KVM: Dynamically size memslot array based on number of used slots") eliminated the "all possible slots allocated" condition and made the bug embarrasingly easy to hit. Fixes: 9c1a5d38780e6 ("kvm: optimize GFN to memslot lookup with large slots amount") Reported-by: syzbot+d889b59b2bb87d4047a2@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200408064059.8957-2-sean.j.christopherson@intel.com> Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 30376715a607..2e06ca46f07c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -945,7 +945,7 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } - if (gfn >= memslots[start].base_gfn && + if (start < slots->used_slots && gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); return &memslots[start]; -- GitLab From 1988180a2eaa51c3e9c26e16d7a94424308e0658 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 14 Apr 2020 09:14:14 +0200 Subject: [PATCH 1165/1278] KVM: VMX: Enable machine check support for 32bit targets commit fb56baae5ea509e63c2a068d66a4d8ea91969fca upstream. There is no reason to limit the use of do_machine_check to 64bit targets. MCE handling works for both target familes. Cc: Paolo Bonzini Cc: Sean Christopherson Cc: stable@vger.kernel.org Fixes: a0861c02a981 ("KVM: Add VT-x machine check support") Signed-off-by: Uros Bizjak Message-Id: <20200414071414.45636-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f8e3f3c48283..c139dedec12b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6250,7 +6250,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, */ static void kvm_machine_check(void) { -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_MCE) struct pt_regs regs = { .cs = 3, /* Fake ring 3 no matter what the guest ran on */ .flags = X86_EFLAGS_IF, -- GitLab From 6f7818d90cc6b26d22d4375292ae74e767ee67f6 Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Tue, 14 Apr 2020 22:15:03 +0300 Subject: [PATCH 1166/1278] tty: hvc: fix buffer overflow during hvc_alloc(). commit 9a9fc42b86c06120744555fea43fdcabe297c656 upstream. If there is a lot(more then 16) of virtio-console devices or virtio_console module is reloaded - buffers 'vtermnos' and 'cons_ops' are overflowed. In older kernels it overruns spinlock which leads to kernel freezing: https://bugzilla.redhat.com/show_bug.cgi?id=1786239 To reproduce the issue, you can try simple script that loads/unloads module. Something like this: while [ 1 ] do modprobe virtio_console sleep 2 modprobe -r virtio_console sleep 2 done Description of problem: Guest get 'Call Trace' when loading module "virtio_console" and unloading it frequently - clearly reproduced on kernel-4.18.0: [ 81.498208] ------------[ cut here ]------------ [ 81.499263] pvqspinlock: lock 0xffffffff92080020 has corrupted value 0xc0774ca0! [ 81.501000] WARNING: CPU: 0 PID: 785 at kernel/locking/qspinlock_paravirt.h:500 __pv_queued_spin_unlock_slowpath+0xc0/0xd0 [ 81.503173] Modules linked in: virtio_console fuse xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nft_counter nf_nat_tftp nft_objref nf_conntrack_tftp tun bridge stp llc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nf_tables_set nft_chain_nat_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 nft_chain_route_ipv6 nft_chain_nat_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack nft_chain_route_ipv4 ip6_tables nft_compat ip_set nf_tables nfnetlink sunrpc bochs_drm drm_vram_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm i2c_piix4 pcspkr crct10dif_pclmul crc32_pclmul joydev ghash_clmulni_intel ip_tables xfs libcrc32c sd_mod sg ata_generic ata_piix virtio_net libata crc32c_intel net_failover failover serio_raw virtio_scsi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: virtio_console] [ 81.517019] CPU: 0 PID: 785 Comm: kworker/0:2 Kdump: loaded Not tainted 4.18.0-167.el8.x86_64 #1 [ 81.518639] Hardware name: Red Hat KVM, BIOS 1.12.0-5.scrmod+el8.2.0+5159+d8aa4d83 04/01/2014 [ 81.520205] Workqueue: events control_work_handler [virtio_console] [ 81.521354] RIP: 0010:__pv_queued_spin_unlock_slowpath+0xc0/0xd0 [ 81.522450] Code: 07 00 48 63 7a 10 e8 bf 64 f5 ff 66 90 c3 8b 05 e6 cf d6 01 85 c0 74 01 c3 8b 17 48 89 fe 48 c7 c7 38 4b 29 91 e8 3a 6c fa ff <0f> 0b c3 0f 0b 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 48 [ 81.525830] RSP: 0018:ffffb51a01ffbd70 EFLAGS: 00010282 [ 81.526798] RAX: 0000000000000000 RBX: 0000000000000010 RCX: 0000000000000000 [ 81.528110] RDX: ffff9e66f1826480 RSI: ffff9e66f1816a08 RDI: ffff9e66f1816a08 [ 81.529437] RBP: ffffffff9153ff10 R08: 000000000000026c R09: 0000000000000053 [ 81.530732] R10: 0000000000000000 R11: ffffb51a01ffbc18 R12: ffff9e66cd682200 [ 81.532133] R13: ffffffff9153ff10 R14: ffff9e6685569500 R15: ffff9e66cd682000 [ 81.533442] FS: 0000000000000000(0000) GS:ffff9e66f1800000(0000) knlGS:0000000000000000 [ 81.534914] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 81.535971] CR2: 00005624c55b14d0 CR3: 00000003a023c000 CR4: 00000000003406f0 [ 81.537283] Call Trace: [ 81.537763] __raw_callee_save___pv_queued_spin_unlock_slowpath+0x11/0x20 [ 81.539011] .slowpath+0x9/0xe [ 81.539585] hvc_alloc+0x25e/0x300 [ 81.540237] init_port_console+0x28/0x100 [virtio_console] [ 81.541251] handle_control_message.constprop.27+0x1c4/0x310 [virtio_console] [ 81.542546] control_work_handler+0x70/0x10c [virtio_console] [ 81.543601] process_one_work+0x1a7/0x3b0 [ 81.544356] worker_thread+0x30/0x390 [ 81.545025] ? create_worker+0x1a0/0x1a0 [ 81.545749] kthread+0x112/0x130 [ 81.546358] ? kthread_flush_work_fn+0x10/0x10 [ 81.547183] ret_from_fork+0x22/0x40 [ 81.547842] ---[ end trace aa97649bd16c8655 ]--- [ 83.546539] general protection fault: 0000 [#1] SMP NOPTI [ 83.547422] CPU: 5 PID: 3225 Comm: modprobe Kdump: loaded Tainted: G W --------- - - 4.18.0-167.el8.x86_64 #1 [ 83.549191] Hardware name: Red Hat KVM, BIOS 1.12.0-5.scrmod+el8.2.0+5159+d8aa4d83 04/01/2014 [ 83.550544] RIP: 0010:__pv_queued_spin_lock_slowpath+0x19a/0x2a0 [ 83.551504] Code: c4 c1 ea 12 41 be 01 00 00 00 4c 8d 6d 14 41 83 e4 03 8d 42 ff 49 c1 e4 05 48 98 49 81 c4 40 a5 02 00 4c 03 24 c5 60 48 34 91 <49> 89 2c 24 b8 00 80 00 00 eb 15 84 c0 75 0a 41 0f b6 54 24 14 84 [ 83.554449] RSP: 0018:ffffb51a0323fdb0 EFLAGS: 00010202 [ 83.555290] RAX: 000000000000301c RBX: ffffffff92080020 RCX: 0000000000000001 [ 83.556426] RDX: 000000000000301d RSI: 0000000000000000 RDI: 0000000000000000 [ 83.557556] RBP: ffff9e66f196a540 R08: 000000000000028a R09: ffff9e66d2757788 [ 83.558688] R10: 0000000000000000 R11: 0000000000000000 R12: 646e61725f770b07 [ 83.559821] R13: ffff9e66f196a554 R14: 0000000000000001 R15: 0000000000180000 [ 83.560958] FS: 00007fd5032e8740(0000) GS:ffff9e66f1940000(0000) knlGS:0000000000000000 [ 83.562233] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 83.563149] CR2: 00007fd5022b0da0 CR3: 000000038c334000 CR4: 00000000003406e0 Signed-off-by: Andrew Melnychenko Cc: stable Link: https://lore.kernel.org/r/20200414191503.3471783-1-andrew@daynix.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_console.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index a8d399188242..fc0ef13f2616 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -288,10 +288,6 @@ int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops) vtermnos[index] = vtermno; cons_ops[index] = ops; - /* reserve all indices up to and including this index */ - if (last_hvc < index) - last_hvc = index; - /* check if we need to re-register the kernel console */ hvc_check_console(index); @@ -895,13 +891,22 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, cons_ops[i] == hp->ops) break; - /* no matching slot, just use a counter */ - if (i >= MAX_NR_HVC_CONSOLES) - i = ++last_hvc; + if (i >= MAX_NR_HVC_CONSOLES) { + + /* find 'empty' slot for console */ + for (i = 0; i < MAX_NR_HVC_CONSOLES && vtermnos[i] != -1; i++) { + } + + /* no matching slot, just use a counter */ + if (i == MAX_NR_HVC_CONSOLES) + i = ++last_hvc + MAX_NR_HVC_CONSOLES; + } hp->index = i; - cons_ops[i] = ops; - vtermnos[i] = vtermno; + if (i < MAX_NR_HVC_CONSOLES) { + cons_ops[i] = ops; + vtermnos[i] = vtermno; + } list_add_tail(&(hp->next), &hvc_structs); spin_unlock(&hvc_structs_lock); -- GitLab From 37fe882533e93ff0d6e3606cdbb2a3d17d67cf8a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 17 Apr 2020 12:59:59 +0200 Subject: [PATCH 1167/1278] tty: rocket, avoid OOB access commit 7127d24372bf23675a36edc64d092dc7fd92ebe8 upstream. init_r_port can access pc104 array out of bounds. pc104 is a 2D array defined to have 4 members. Each member has 8 submembers. * we can have more than 4 (PCI) boards, i.e. [board] can be OOB * line is not modulo-ed by anything, so the first line on the second board can be 4, on the 3rd 12 or alike (depending on previously registered boards). It's zero only on the first line of the first board. So even [line] can be OOB, quite soon (with the 2nd registered board already). This code is broken for ages, so just avoid the OOB accesses and don't try to fix it as we would need to find out the correct line number. Use the default: RS232, if we are out. Generally, if anyone needs to set the interface types, a module parameter is past the last thing that should be used for this purpose. The parameters' description says it's for ISA cards anyway. Signed-off-by: Jiri Slaby Cc: stable Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/20200417105959.15201-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/rocket.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c index 32943afacffd..1081810b3e3f 100644 --- a/drivers/tty/rocket.c +++ b/drivers/tty/rocket.c @@ -645,18 +645,21 @@ init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev) tty_port_init(&info->port); info->port.ops = &rocket_port_ops; info->flags &= ~ROCKET_MODE_MASK; - switch (pc104[board][line]) { - case 422: - info->flags |= ROCKET_MODE_RS422; - break; - case 485: - info->flags |= ROCKET_MODE_RS485; - break; - case 232: - default: + if (board < ARRAY_SIZE(pc104) && line < ARRAY_SIZE(pc104_1)) + switch (pc104[board][line]) { + case 422: + info->flags |= ROCKET_MODE_RS422; + break; + case 485: + info->flags |= ROCKET_MODE_RS485; + break; + case 232: + default: + info->flags |= ROCKET_MODE_RS232; + break; + } + else info->flags |= ROCKET_MODE_RS232; - break; - } info->intmask = RXF_TRIG | TXFIFO_MT | SRC_INT | DELTA_CD | DELTA_CTS | DELTA_DSR; if (sInitChan(ctlp, &info->channel, aiop, chan) == 0) { -- GitLab From 33fda70c0f8382d72533f0d1c3f26e8bc76b480f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 22 Apr 2020 16:14:57 -0400 Subject: [PATCH 1168/1278] usb-storage: Add unusual_devs entry for JMicron JMS566 commit 94f9c8c3c404ee1f7aaff81ad4f24aec4e34a78b upstream. Cyril Roelandt reports that his JMicron JMS566 USB-SATA bridge fails to handle WRITE commands with the FUA bit set, even though it claims to support FUA. (Oddly enough, a later version of the same bridge, version 2.03 as opposed to 1.14, doesn't claim to support FUA. Also oddly, the bridge _does_ support FUA when using the UAS transport instead of the Bulk-Only transport -- but this device was blacklisted for uas in commit bc3bdb12bbb3 ("usb-storage: Disable UAS on JMicron SATA enclosure") for apparently unrelated reasons.) This patch adds a usb-storage unusual_devs entry with the BROKEN_FUA flag. This allows the bridge to work properly with usb-storage. Reported-and-tested-by: Cyril Roelandt Signed-off-by: Alan Stern CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2004221613110.11262-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index df8ee83c3f1a..5c3f2eaf59e8 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2342,6 +2342,13 @@ UNUSUAL_DEV( 0x3340, 0xffff, 0x0000, 0x0000, USB_SC_DEVICE,USB_PR_DEVICE,NULL, US_FL_MAX_SECTORS_64 ), +/* Reported by Cyril Roelandt */ +UNUSUAL_DEV( 0x357d, 0x7788, 0x0114, 0x0114, + "JMicron", + "USB to ATA/ATAPI Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA ), + /* Reported by Andrey Rahmatullin */ UNUSUAL_DEV( 0x4102, 0x1020, 0x0100, 0x0100, "iRiver", -- GitLab From 18441f7f5babc53e1bbbd2fdb57483c927cb5367 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 20 Apr 2020 16:24:34 -0400 Subject: [PATCH 1169/1278] audit: check the length of userspace generated audit records commit 763dafc520add02a1f4639b500c509acc0ea8e5b upstream. Commit 756125289285 ("audit: always check the netlink payload length in audit_receive_msg()") fixed a number of missing message length checks, but forgot to check the length of userspace generated audit records. The good news is that you need CAP_AUDIT_WRITE to submit userspace audit records, which is generally only given to trusted processes, so the impact should be limited. Cc: stable@vger.kernel.org Fixes: 756125289285 ("audit: always check the netlink payload length in audit_receive_msg()") Reported-by: syzbot+49e69b4d71a420ceda3e@syzkaller.appspotmail.com Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/audit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/audit.c b/kernel/audit.c index b21a8910f765..aa6d5e39526b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1292,6 +1292,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; + /* exit early if there isn't at least one character to print */ + if (data_len < 2) + return -EINVAL; err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ -- GitLab From fe77aec8265396b50c9a33512759c15a4ec5f8cb Mon Sep 17 00:00:00 2001 From: Gyeongtaek Lee Date: Sat, 18 Apr 2020 13:13:20 +0900 Subject: [PATCH 1170/1278] ASoC: dapm: fixup dapm kcontrol widget commit ebf1474745b4373fdde0fcf32d9d1f369b50b212 upstream. snd_soc_dapm_kcontrol widget which is created by autodisable control should contain correct on_val, mask and shift because it is set when the widget is powered and changed value is applied on registers by following code in dapm_seq_run_coalesced(). mask |= w->mask << w->shift; if (w->power) value |= w->on_val << w->shift; else value |= w->off_val << w->shift; Shift on the mask in dapm_kcontrol_data_alloc() is removed to prevent double shift. And, on_val in dapm_kcontrol_set_value() is modified to get correct value in the dapm_seq_run_coalesced(). Signed-off-by: Gyeongtaek Lee Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/000001d61537$b212f620$1638e260$@samsung.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7861cf7a4488..c42ee8ef544d 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -413,7 +413,7 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, memset(&template, 0, sizeof(template)); template.reg = e->reg; - template.mask = e->mask << e->shift_l; + template.mask = e->mask; template.shift = e->shift_l; template.off_val = snd_soc_enum_item_to_val(e, 0); template.on_val = template.off_val; @@ -539,8 +539,22 @@ static bool dapm_kcontrol_set_value(const struct snd_kcontrol *kcontrol, if (data->value == value) return false; - if (data->widget) - data->widget->on_val = value; + if (data->widget) { + switch (dapm_kcontrol_get_wlist(kcontrol)->widgets[0]->id) { + case snd_soc_dapm_switch: + case snd_soc_dapm_mixer: + case snd_soc_dapm_mixer_named_ctl: + data->widget->on_val = value & data->widget->mask; + break; + case snd_soc_dapm_demux: + case snd_soc_dapm_mux: + data->widget->on_val = value >> data->widget->shift; + break; + default: + data->widget->on_val = value; + break; + } + } data->value = value; -- GitLab From 545f096414d09b7c021425f1dd155a1cea6c38ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2020 10:08:09 +0300 Subject: [PATCH 1171/1278] iwlwifi: pcie: actually release queue memory in TVQM commit b98b33d5560a2d940f3b80f6768a6177bf3dfbc0 upstream. The iwl_trans_pcie_dyn_txq_free() function only releases the frames that may be left on the queue by calling iwl_pcie_gen2_txq_unmap(), but doesn't actually free the DMA ring or byte-count tables for the queue. This leads to pretty large memory leaks (at least before my queue size improvements), in particular in monitor/sniffer mode on channel hopping since this happens on every channel change. This was also now more evident after the move to a DMA pool for the byte count tables, showing messages such as BUG iwlwifi:bc (...): Objects remaining in iwlwifi:bc on __kmem_cache_shutdown() This fixes https://bugzilla.kernel.org/show_bug.cgi?id=206811. Signed-off-by: Johannes Berg Fixes: 6b35ff91572f ("iwlwifi: pcie: introduce a000 TX queues management") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.f5f4c4193ec1.Id5feebc9b4318041913a9c89fc1378bb5454292c@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index bbb39d6ec2ee..f37018d72b44 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1124,6 +1124,9 @@ void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue) iwl_pcie_gen2_txq_unmap(trans, queue); + iwl_pcie_gen2_txq_free_memory(trans, trans_pcie->txq[queue]); + trans_pcie->txq[queue] = NULL; + IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); } -- GitLab From c78004318edacc8a4fc8ea7ad975012b0e941e67 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 23 Mar 2020 09:19:33 +0100 Subject: [PATCH 1172/1278] ARM: imx: provide v7_cpu_resume() only on ARM_CPU_SUSPEND=y commit f1baca8896ae18e12c45552a4c4ae2086aa7e02c upstream. 512a928affd5 ("ARM: imx: build v7_cpu_resume() unconditionally") introduced an unintended linker error for i.MX6 configurations that have ARM_CPU_SUSPEND=n which can happen if neither CONFIG_PM, CONFIG_CPU_IDLE, nor ARM_PSCI_FW are selected. Fix this by having v7_cpu_resume() compiled only when cpu_resume() it calls is available as well. The C declaration for the function remains unguarded to avoid future code inadvertently using a stub and introducing a regression to the bug the original commit fixed. Cc: Fixes: 512a928affd5 ("ARM: imx: build v7_cpu_resume() unconditionally") Reported-by: Clemens Gruber Signed-off-by: Ahmad Fatoum Tested-by: Roland Hieber Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 8cf1a98785a5..1d0923b4a82b 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -87,8 +87,10 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +ifeq ($(CONFIG_ARM_CPU_SUSPEND),y) AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += resume-imx6.o +endif obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o -- GitLab From 65b043bb6a73700154525166ca4fdacd0286931e Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 17 Apr 2020 10:19:08 +1200 Subject: [PATCH 1173/1278] powerpc/setup_64: Set cache-line-size based on cache-block-size commit 94c0b013c98583614e1ad911e8795ca36da34a85 upstream. If {i,d}-cache-block-size is set and {i,d}-cache-line-size is not, use the block-size value for both. Per the devicetree spec cache-line-size is only needed if it differs from the block size. Originally the code would fallback from block size to line size. An error message was printed if both properties were missing. Later the code was refactored to use clearer names and logic but it inadvertently made line size a required property, meaning on systems without a line size property we fall back to the default from the cputable. On powernv (OPAL) platforms, since the introduction of device tree CPU features (5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features")), that has led to the wrong value being used, as the fallback value is incorrect for Power8/Power9 CPUs. The incorrect values flow through to the VDSO and also to the sysconf values, SC_LEVEL1_ICACHE_LINESIZE etc. Fixes: bd067f83b084 ("powerpc/64: Fix naming of cache block vs. cache line") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Chris Packham Reported-by: Qian Cai [mpe: Add even more detail to change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200416221908.7886-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 41b3b2787f23..a1e336901cc8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -466,6 +466,8 @@ static bool __init parse_cache_info(struct device_node *np, lsizep = of_get_property(np, propnames[3], NULL); if (bsizep == NULL) bsizep = lsizep; + if (lsizep == NULL) + lsizep = bsizep; if (lsizep != NULL) lsize = be32_to_cpu(*lsizep); if (bsizep != NULL) -- GitLab From 526e6fcb32164650e7b0aca9ee1c55da3f56d551 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 6 Apr 2020 15:20:15 +0100 Subject: [PATCH 1174/1278] staging: comedi: dt2815: fix writing hi byte of analog output commit ed87d33ddbcd9a1c3b5ae87995da34e6f51a862c upstream. The DT2815 analog output command is 16 bits wide, consisting of the 12-bit sample value in bits 15 to 4, the channel number in bits 3 to 1, and a voltage or current selector in bit 0. Both bytes of the 16-bit command need to be written in turn to a single 8-bit data register. However, the driver currently only writes the low 8-bits. It is broken and appears to have always been broken. Electronic copies of the DT2815 User's Manual seem impossible to find online, but looking at the source code, a best guess for the sequence the driver intended to use to write the analog output command is as follows: 1. Wait for the status register to read 0x00. 2. Write the low byte of the command to the data register. 3. Wait for the status register to read 0x80. 4. Write the high byte of the command to the data register. Step 4 is missing from the driver. Add step 4 to (hopefully) fix the driver. Also add a "FIXME" comment about setting bit 0 of the low byte of the command. Supposedly, it is used to choose between voltage output and current output, but the current driver always sets it to 1. Signed-off-by: Ian Abbott Cc: stable Link: https://lore.kernel.org/r/20200406142015.126982-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt2815.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/comedi/drivers/dt2815.c b/drivers/staging/comedi/drivers/dt2815.c index ce5571971194..9b773c2e140b 100644 --- a/drivers/staging/comedi/drivers/dt2815.c +++ b/drivers/staging/comedi/drivers/dt2815.c @@ -101,6 +101,7 @@ static int dt2815_ao_insn(struct comedi_device *dev, struct comedi_subdevice *s, int ret; for (i = 0; i < insn->n; i++) { + /* FIXME: lo bit 0 chooses voltage output or current output */ lo = ((data[i] & 0x0f) << 4) | (chan << 1) | 0x01; hi = (data[i] & 0xff0) >> 4; @@ -114,6 +115,8 @@ static int dt2815_ao_insn(struct comedi_device *dev, struct comedi_subdevice *s, if (ret) return ret; + outb(hi, dev->iobase + DT2815_DATA); + devpriv->ao_readback[chan] = data[i]; } return i; -- GitLab From 97b294d0e86b3544a3c410b77c640beca692724a Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Mon, 20 Apr 2020 13:44:16 +0800 Subject: [PATCH 1175/1278] staging: comedi: Fix comedi_device refcnt leak in comedi_open commit 332e0e17ad49e084b7db670ef43b5eb59abd9e34 upstream. comedi_open() invokes comedi_dev_get_from_minor(), which returns a reference of the COMEDI device to "dev" with increased refcount. When comedi_open() returns, "dev" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one exception handling path of comedi_open(). When "cfp" allocation is failed, the refcnt increased by comedi_dev_get_from_minor() is not decreased, causing a refcnt leak. Fix this issue by calling comedi_dev_put() on this error path when "cfp" allocation is failed. Fixes: 20f083c07565 ("staging: comedi: prepare support for per-file read and write subdevices") Signed-off-by: Xiyu Yang Cc: stable Signed-off-by: Xin Tan Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/1587361459-83622-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index e19e395b0e44..9f1ec427c168 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2603,8 +2603,10 @@ static int comedi_open(struct inode *inode, struct file *file) } cfp = kzalloc(sizeof(*cfp), GFP_KERNEL); - if (!cfp) + if (!cfp) { + comedi_dev_put(dev); return -ENOMEM; + } cfp->dev = dev; -- GitLab From 82f66b9dfa4c42b73a612466fac53d6d11250e9c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 28 Mar 2020 17:32:42 -0400 Subject: [PATCH 1176/1278] vt: don't hardcode the mem allocation upper bound commit 2717769e204e83e65b8819c5e2ef3e5b6639b270 upstream. The code in vc_do_resize() bounds the memory allocation size to avoid exceeding MAX_ORDER down the kzalloc() call chain and generating a runtime warning triggerable from user space. However, not only is it unwise to use a literal value here, but MAX_ORDER may also be configurable based on CONFIG_FORCE_MAX_ZONEORDER. Let's use KMALLOC_MAX_SIZE instead. Note that prior commit bb1107f7c605 ("mm, slab: make sure that KMALLOC_MAX_SIZE will fit into MAX_ORDER") the KMALLOC_MAX_SIZE value could not be relied upon. Signed-off-by: Nicolas Pitre Cc: # v4.10+ Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.2003281702410.2671@knanqh.ubzr Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 46defa3be9a4..8a4e7879a7a6 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -880,7 +880,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) return 0; - if (new_screen_size > (4 << 20)) + if (new_screen_size > KMALLOC_MAX_SIZE) return -EINVAL; newscreen = kzalloc(new_screen_size, GFP_USER); if (!newscreen) -- GitLab From a35715fad2071792e3bb99112b355a3dc4b6f429 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 18 Apr 2020 17:24:50 +0100 Subject: [PATCH 1177/1278] staging: vt6656: Don't set RCR_MULTICAST or RCR_BROADCAST by default. commit 0f8240bfc070033a4823b19883efd3d38c7735cc upstream. mac80211/users control whether multicast is on or off don't enable it by default. Fixes an issue when multicast/broadcast is always on allowing other beacons through in power save. Fixes: db8f37fa3355 ("staging: vt6656: mac80211 conversion: main_usb add functions...") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/2c24c33d-68c4-f343-bd62-105422418eac@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/main_usb.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index e8ccd800c94f..23b73d54a2ef 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -779,15 +779,11 @@ static void vnt_configure(struct ieee80211_hw *hw, { struct vnt_private *priv = hw->priv; u8 rx_mode = 0; - int rc; *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - rc = vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, - MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); - - if (!rc) - rx_mode = RCR_MULTICAST | RCR_BROADCAST; + vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, + MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); dev_dbg(&priv->usb->dev, "rx mode in = %x\n", rx_mode); -- GitLab From 34e5e58843fb11ff655d9e9dccd7a923fd5bfa11 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 18 Apr 2020 18:37:18 +0100 Subject: [PATCH 1178/1278] staging: vt6656: Fix calling conditions of vnt_set_bss_mode commit 664ba5180234593b4b8517530e8198bf2f7359e2 upstream. vnt_set_bss_mode needs to be called on all changes to BSS_CHANGED_BASIC_RATES, BSS_CHANGED_ERP_PREAMBLE and BSS_CHANGED_ERP_SLOT Remove all other calls and vnt_update_ifs which is called in vnt_set_bss_mode. Fixes an issue that preamble mode is not being updated correctly. Fixes: c12603576e06 ("staging: vt6656: Only call vnt_set_bss_mode on basic rates change.") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/44110801-6234-50d8-c583-9388f04b486c@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/main_usb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 23b73d54a2ef..9a70bc1b5bcd 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -594,8 +594,6 @@ static int vnt_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) priv->op_mode = vif->type; - vnt_set_bss_mode(priv); - /* LED blink on TX */ vnt_mac_set_led(priv, LEDSTS_STS, LEDSTS_INTER); @@ -682,7 +680,6 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->basic_rates = conf->basic_rates; vnt_update_top_rates(priv); - vnt_set_bss_mode(priv); dev_dbg(&priv->usb->dev, "basic rates %x\n", conf->basic_rates); } @@ -711,11 +708,14 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->short_slot_time = false; vnt_set_short_slot_time(priv); - vnt_update_ifs(priv); vnt_set_vga_gain_offset(priv, priv->bb_vga[0]); vnt_update_pre_ed_threshold(priv, false); } + if (changed & (BSS_CHANGED_BASIC_RATES | BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT)) + vnt_set_bss_mode(priv); + if (changed & BSS_CHANGED_TXPOWER) vnt_rf_setpower(priv, priv->current_rate, conf->chandef.chan->hw_value); -- GitLab From 44d8ca07ad2af23fb3e95357164037c19a4de489 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 18 Apr 2020 17:43:24 +0100 Subject: [PATCH 1179/1278] staging: vt6656: Fix drivers TBTT timing counter. commit 09057742af98a39ebffa27fac4f889dc873132de upstream. The drivers TBTT counter is not synchronized with mac80211 timestamp. Reorder the functions and use vnt_update_next_tbtt to do the final synchronize. Fixes: c15158797df6 ("staging: vt6656: implement TSF counter") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/375d0b25-e8bc-c8f7-9b10-6cc705d486ee@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/main_usb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 9a70bc1b5bcd..d19d67c8213c 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -739,12 +739,15 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, vnt_mac_reg_bits_on(priv, MAC_REG_TFTCTL, TFTCTL_TSFCNTREN); - vnt_adjust_tsf(priv, conf->beacon_rate->hw_value, - conf->sync_tsf, priv->current_tsf); - vnt_mac_set_beacon_interval(priv, conf->beacon_int); vnt_reset_next_tbtt(priv, conf->beacon_int); + + vnt_adjust_tsf(priv, conf->beacon_rate->hw_value, + conf->sync_tsf, priv->current_tsf); + + vnt_update_next_tbtt(priv, + conf->sync_tsf, conf->beacon_int); } else { vnt_clear_current_tsf(priv); -- GitLab From 6d28695365b95ffb35fbae1c31aa0a69a50e0dc3 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 18 Apr 2020 22:01:49 +0100 Subject: [PATCH 1180/1278] staging: vt6656: Fix pairwise key entry save. commit 0b59f10b1d8fe8d50944f21f5d403df9303095a8 upstream. The problem is that the group key was saved as VNT_KEY_DEFAULTKEY was over written by the VNT_KEY_GROUP_ADDRESS index. mac80211 could not clear the mac_addr in the default key. The VNT_KEY_DEFAULTKEY is not necesscary so remove it and set as VNT_KEY_GROUP_ADDRESS. mac80211 can clear any key using vnt_mac_disable_keyentry. Fixes: f9ef05ce13e4 ("staging: vt6656: Fix pairwise key for non station modes") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/da2f7e7f-1658-1320-6eee-0f55770ca391@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/key.c | 14 +++----------- drivers/staging/vt6656/main_usb.c | 6 +++++- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/staging/vt6656/key.c b/drivers/staging/vt6656/key.c index cc18cb141bff..5ecc1a97cb44 100644 --- a/drivers/staging/vt6656/key.c +++ b/drivers/staging/vt6656/key.c @@ -91,9 +91,6 @@ static int vnt_set_keymode(struct ieee80211_hw *hw, u8 *mac_addr, case VNT_KEY_PAIRWISE: key_mode |= mode; key_inx = 4; - /* Don't save entry for pairwise key for station mode */ - if (priv->op_mode == NL80211_IFTYPE_STATION) - clear_bit(entry, &priv->key_entry_inuse); break; default: return -EINVAL; @@ -117,7 +114,6 @@ static int vnt_set_keymode(struct ieee80211_hw *hw, u8 *mac_addr, int vnt_set_keys(struct ieee80211_hw *hw, struct ieee80211_sta *sta, struct ieee80211_vif *vif, struct ieee80211_key_conf *key) { - struct ieee80211_bss_conf *conf = &vif->bss_conf; struct vnt_private *priv = hw->priv; u8 *mac_addr = NULL; u8 key_dec_mode = 0; @@ -159,16 +155,12 @@ int vnt_set_keys(struct ieee80211_hw *hw, struct ieee80211_sta *sta, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; } - if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) { + if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) vnt_set_keymode(hw, mac_addr, key, VNT_KEY_PAIRWISE, key_dec_mode, true); - } else { - vnt_set_keymode(hw, mac_addr, key, VNT_KEY_DEFAULTKEY, + else + vnt_set_keymode(hw, mac_addr, key, VNT_KEY_GROUP_ADDRESS, key_dec_mode, true); - vnt_set_keymode(hw, (u8 *)conf->bssid, key, - VNT_KEY_GROUP_ADDRESS, key_dec_mode, true); - } - return 0; } diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index d19d67c8213c..9adab851580c 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -827,8 +827,12 @@ static int vnt_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, return -EOPNOTSUPP; break; case DISABLE_KEY: - if (test_bit(key->hw_key_idx, &priv->key_entry_inuse)) + if (test_bit(key->hw_key_idx, &priv->key_entry_inuse)) { clear_bit(key->hw_key_idx, &priv->key_entry_inuse); + + vnt_mac_disable_keyentry(priv, key->hw_key_idx); + } + default: break; } -- GitLab From 31ee8dfac96aa68a04b05fd7c573dd509a27ed0d Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 14 Apr 2020 11:39:23 +0100 Subject: [PATCH 1181/1278] staging: vt6656: Power save stop wake_up_count wrap around. commit ea81c3486442f4643fc9825a2bb1b430b829bccd upstream. conf.listen_interval can sometimes be zero causing wake_up_count to wrap around up to many beacons too late causing CTRL-EVENT-BEACON-LOSS as in. wpa_supplicant[795]: message repeated 45 times: [..CTRL-EVENT-BEACON-LOSS ] Fixes: 43c93d9bf5e2 ("staging: vt6656: implement power saving code.") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/fce47bb5-7ca6-7671-5094-5c6107302f2b@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/int.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vt6656/int.c b/drivers/staging/vt6656/int.c index c521729c4192..d5d89e836309 100644 --- a/drivers/staging/vt6656/int.c +++ b/drivers/staging/vt6656/int.c @@ -153,7 +153,8 @@ void vnt_int_process_data(struct vnt_private *priv) priv->wake_up_count = priv->hw->conf.listen_interval; - --priv->wake_up_count; + if (priv->wake_up_count) + --priv->wake_up_count; /* Turn on wake up to listen next beacon */ if (priv->wake_up_count == 1) -- GitLab From d87746ffb6e47fab8ef19b3d608edbc4924d2076 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Apr 2020 17:13:57 +0200 Subject: [PATCH 1182/1278] cdc-acm: close race betrween suspend() and acm_softint commit 0afccd7601514c4b83d8cc58c740089cc447051d upstream. Suspend increments a counter, then kills the URBs, then kills the scheduled work. The scheduled work, however, may reschedule the URBs. Fix this by having the work check the counter. Signed-off-by: Oliver Neukum Cc: stable Tested-by: Jonas Karlsson Link: https://lore.kernel.org/r/20200415151358.32664-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5e171e45c685..7444de498ba1 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -575,14 +575,14 @@ static void acm_softint(struct work_struct *work) struct acm *acm = container_of(work, struct acm, work); if (test_bit(EVENT_RX_STALL, &acm->flags)) { - if (!(usb_autopm_get_interface(acm->data))) { + smp_mb(); /* against acm_suspend() */ + if (!acm->susp_count) { for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->read_urbs[i]); usb_clear_halt(acm->dev, acm->in); acm_submit_read_urbs(acm, GFP_KERNEL); - usb_autopm_put_interface(acm->data); + clear_bit(EVENT_RX_STALL, &acm->flags); } - clear_bit(EVENT_RX_STALL, &acm->flags); } if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) -- GitLab From 3edd90f1c5efe9cb31fe6646a767773e36ba8aa9 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Apr 2020 17:13:58 +0200 Subject: [PATCH 1183/1278] cdc-acm: introduce a cool down commit a4e7279cd1d19f48f0af2a10ed020febaa9ac092 upstream. Immediate submission in case of a babbling device can lead to a busy loop. Introducing a delayed work. Signed-off-by: Oliver Neukum Cc: stable Tested-by: Jonas Karlsson Link: https://lore.kernel.org/r/20200415151358.32664-2-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 30 ++++++++++++++++++++++++++++-- drivers/usb/class/cdc-acm.h | 5 ++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7444de498ba1..38709bee4c20 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -424,9 +424,12 @@ static void acm_ctrl_irq(struct urb *urb) exit: retval = usb_submit_urb(urb, GFP_ATOMIC); - if (retval && retval != -EPERM) + if (retval && retval != -EPERM && retval != -ENODEV) dev_err(&acm->control->dev, "%s - usb_submit_urb failed: %d\n", __func__, retval); + else + dev_vdbg(&acm->control->dev, + "control resubmission terminated %d\n", retval); } static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) @@ -442,6 +445,8 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); + } else { + dev_vdbg(&acm->data->dev, "intended failure %d\n", res); } set_bit(index, &acm->read_urbs_free); return res; @@ -484,6 +489,7 @@ static void acm_read_bulk_callback(struct urb *urb) int status = urb->status; bool stopped = false; bool stalled = false; + bool cooldown = false; dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n", rb->index, urb->actual_length, status); @@ -510,6 +516,14 @@ static void acm_read_bulk_callback(struct urb *urb) __func__, status); stopped = true; break; + case -EOVERFLOW: + case -EPROTO: + dev_dbg(&acm->data->dev, + "%s - cooling babbling device\n", __func__); + usb_mark_last_busy(acm->dev); + set_bit(rb->index, &acm->urbs_in_error_delay); + cooldown = true; + break; default: dev_dbg(&acm->data->dev, "%s - nonzero urb status received: %d\n", @@ -531,9 +545,11 @@ static void acm_read_bulk_callback(struct urb *urb) */ smp_mb__after_atomic(); - if (stopped || stalled) { + if (stopped || stalled || cooldown) { if (stalled) schedule_work(&acm->work); + else if (cooldown) + schedule_delayed_work(&acm->dwork, HZ / 2); return; } @@ -585,6 +601,12 @@ static void acm_softint(struct work_struct *work) } } + if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) { + for (i = 0; i < ACM_NR; i++) + if (test_and_clear_bit(i, &acm->urbs_in_error_delay)) + acm_submit_read_urb(acm, i, GFP_NOIO); + } + if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) tty_port_tty_wakeup(&acm->port); } @@ -1374,6 +1396,7 @@ static int acm_probe(struct usb_interface *intf, acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; INIT_WORK(&acm->work, acm_softint); + INIT_DELAYED_WORK(&acm->dwork, acm_softint); init_waitqueue_head(&acm->wioctl); spin_lock_init(&acm->write_lock); spin_lock_init(&acm->read_lock); @@ -1587,6 +1610,7 @@ static void acm_disconnect(struct usb_interface *intf) acm_kill_urbs(acm); cancel_work_sync(&acm->work); + cancel_delayed_work_sync(&acm->dwork); tty_unregister_device(acm_tty_driver, acm->minor); @@ -1629,6 +1653,8 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) acm_kill_urbs(acm); cancel_work_sync(&acm->work); + cancel_delayed_work_sync(&acm->dwork); + acm->urbs_in_error_delay = 0; return 0; } diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 515aad0847ee..30380d28a504 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -108,8 +108,11 @@ struct acm { unsigned long flags; # define EVENT_TTY_WAKEUP 0 # define EVENT_RX_STALL 1 +# define ACM_ERROR_DELAY 3 + unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */ struct usb_cdc_line_coding line; /* bits, stop, parity */ - struct work_struct work; /* work queue entry for line discipline waking up */ + struct work_struct work; /* work queue entry for various purposes*/ + struct delayed_work dwork; /* for cool downs needed in error recovery */ unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */ unsigned int ctrlout; /* output control lines (DTR, RTS) */ struct async_icount iocount; /* counters for control line changes */ -- GitLab From 629d46a6b162fe3226bee434ac893680cfd82ef5 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Apr 2020 16:17:49 +0200 Subject: [PATCH 1184/1278] UAS: no use logging any details in case of ENODEV commit 5963dec98dc52d52476390485f07a29c30c6a582 upstream. Once a device is gone, the internal state does not matter anymore. There is no need to spam the logs. Signed-off-by: Oliver Neukum Cc: stable Fixes: 326349f824619 ("uas: add dead request list") Link: https://lore.kernel.org/r/20200415141750.811-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9d97543449e6..10ff28b9b89c 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -191,6 +191,9 @@ static void uas_log_cmd_state(struct scsi_cmnd *cmnd, const char *prefix, struct uas_cmd_info *ci = (void *)&cmnd->SCp; struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + if (status == -ENODEV) /* too late */ + return; + scmd_printk(KERN_INFO, cmnd, "%s %d uas-tag %d inflight:%s%s%s%s%s%s%s%s%s%s%s%s ", prefix, status, cmdinfo->uas_tag, -- GitLab From 75fe195f72379ed62b2599bfb4e5348b533c8b4a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Apr 2020 16:17:50 +0200 Subject: [PATCH 1185/1278] UAS: fix deadlock in error handling and PM flushing work commit f6cc6093a729ede1ff5658b493237c42b82ba107 upstream. A SCSI error handler and block runtime PM must not allocate memory with GFP_KERNEL. Furthermore they must not wait for tasks allocating memory with GFP_KERNEL. That means that they cannot share a workqueue with arbitrary tasks. Fix this for UAS using a private workqueue. Signed-off-by: Oliver Neukum Fixes: f9dc024a2da1f ("uas: pre_reset and suspend: Fix a few races") Cc: stable Link: https://lore.kernel.org/r/20200415141750.811-2-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 43 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 10ff28b9b89c..20dd8df864c4 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -82,6 +82,19 @@ static void uas_free_streams(struct uas_dev_info *devinfo); static void uas_log_cmd_state(struct scsi_cmnd *cmnd, const char *prefix, int status); +/* + * This driver needs its own workqueue, as we need to control memory allocation. + * + * In the course of error handling and power management uas_wait_for_pending_cmnds() + * needs to flush pending work items. In these contexts we cannot allocate memory + * by doing block IO as we would deadlock. For the same reason we cannot wait + * for anything allocating memory not heeding these constraints. + * + * So we have to control all work items that can be on the workqueue we flush. + * Hence we cannot share a queue and need our own. + */ +static struct workqueue_struct *workqueue; + static void uas_do_work(struct work_struct *work) { struct uas_dev_info *devinfo = @@ -110,7 +123,7 @@ static void uas_do_work(struct work_struct *work) if (!err) cmdinfo->state &= ~IS_IN_WORK_LIST; else - schedule_work(&devinfo->work); + queue_work(workqueue, &devinfo->work); } out: spin_unlock_irqrestore(&devinfo->lock, flags); @@ -135,7 +148,7 @@ static void uas_add_work(struct uas_cmd_info *cmdinfo) lockdep_assert_held(&devinfo->lock); cmdinfo->state |= IS_IN_WORK_LIST; - schedule_work(&devinfo->work); + queue_work(workqueue, &devinfo->work); } static void uas_zap_pending(struct uas_dev_info *devinfo, int result) @@ -1236,7 +1249,31 @@ static struct usb_driver uas_driver = { .id_table = uas_usb_ids, }; -module_usb_driver(uas_driver); +static int __init uas_init(void) +{ + int rv; + + workqueue = alloc_workqueue("uas", WQ_MEM_RECLAIM, 0); + if (!workqueue) + return -ENOMEM; + + rv = usb_register(&uas_driver); + if (rv) { + destroy_workqueue(workqueue); + return -ENOMEM; + } + + return 0; +} + +static void __exit uas_exit(void) +{ + usb_deregister(&uas_driver); + destroy_workqueue(workqueue); +} + +module_init(uas_init); +module_exit(uas_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR( -- GitLab From b4fc711ffae5ba43985e7f17060afa2455a8a0d3 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 2 Apr 2020 10:15:21 +0530 Subject: [PATCH 1186/1278] usb: f_fs: Clear OS Extended descriptor counts to zero in ffs_data_reset() commit 1c2e54fbf1da5e5445a0ab132c862b02ccd8d230 upstream. For userspace functions using OS Descriptors, if a function also supplies Extended Property descriptors currently the counts and lengths stored in the ms_os_descs_ext_prop_{count,name_len,data_len} variables are not getting reset to 0 during an unbind or when the epfiles are closed. If the same function is re-bound and the descriptors are re-written, this results in those count/length variables to monotonically increase causing the VLA allocation in _ffs_func_bind() to grow larger and larger at each bind/unbind cycle and eventually fail to allocate. Fix this by clearing the ms_os_descs_ext_prop count & lengths to 0 in ffs_data_reset(). Fixes: f0175ab51993 ("usb: gadget: f_fs: OS descriptors support") Cc: stable@vger.kernel.org Signed-off-by: Udipto Goswami Signed-off-by: Sriharsha Allenki Reviewed-by: Manu Gautam Link: https://lore.kernel.org/r/20200402044521.9312-1-sallenki@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 819fd77a2da4..2ff7c21bbda5 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1727,6 +1727,10 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs->state = FFS_READ_DESCRIPTORS; ffs->setup_state = FFS_NO_SETUP; ffs->flags = 0; + + ffs->ms_os_descs_ext_prop_count = 0; + ffs->ms_os_descs_ext_prop_name_len = 0; + ffs->ms_os_descs_ext_prop_data_len = 0; } -- GitLab From c7dd89cafddb1434a9e4475049d1ba74e443b1d6 Mon Sep 17 00:00:00 2001 From: Kazuhiro Fujita Date: Fri, 27 Mar 2020 18:17:28 +0000 Subject: [PATCH 1187/1278] serial: sh-sci: Make sure status register SCxSR is read in correct sequence commit 3dc4db3662366306e54ddcbda4804acb1258e4ba upstream. For SCIF and HSCIF interfaces the SCxSR register holds the status of data that is to be read next from SCxRDR register, But where as for SCIFA and SCIFB interfaces SCxSR register holds status of data that is previously read from SCxRDR register. This patch makes sure the status register is read depending on the port types so that errors are caught accordingly. Cc: Signed-off-by: Kazuhiro Fujita Signed-off-by: Hao Bui Signed-off-by: KAZUMI HARADA Signed-off-by: Lad Prabhakar Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1585333048-31828-1-git-send-email-kazuhiro.fujita.jg@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 333de7d3fe86..06cf474072d6 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -841,9 +841,16 @@ static void sci_receive_chars(struct uart_port *port) tty_insert_flip_char(tport, c, TTY_NORMAL); } else { for (i = 0; i < count; i++) { - char c = serial_port_in(port, SCxRDR); - - status = serial_port_in(port, SCxSR); + char c; + + if (port->type == PORT_SCIF || + port->type == PORT_HSCIF) { + status = serial_port_in(port, SCxSR); + c = serial_port_in(port, SCxRDR); + } else { + c = serial_port_in(port, SCxRDR); + status = serial_port_in(port, SCxSR); + } if (uart_handle_sysrq_char(port, c)) { count--; i--; continue; -- GitLab From a63c07ef001fab45ef713fc7c9046bda0720b6c1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 25 Oct 2017 16:59:43 -0700 Subject: [PATCH 1188/1278] xfs: validate sb_logsunit is a multiple of the fs blocksize commit 9c92ee208b1faa0ef2cc899b85fd0607b6fac7fe upstream. Make sure the log stripe unit is sane before proceeding with mounting. AFAICT this means that logsunit has to be 0, 1, or a multiple of the fs block size. Found this by setting the LSB of logsunit in xfs/350 and watching the system crash as soon as we try to write to the log. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Suraj Jitindar Singh Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4e768e606998..360e32220f93 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -608,6 +608,7 @@ xfs_log_mount( xfs_daddr_t blk_offset, int num_bblks) { + bool fatal = xfs_sb_version_hascrc(&mp->m_sb); int error = 0; int min_logfsbs; @@ -659,9 +660,20 @@ xfs_log_mount( XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), XFS_MAX_LOG_BYTES); error = -EINVAL; + } else if (mp->m_sb.sb_logsunit > 1 && + mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) { + xfs_warn(mp, + "log stripe unit %u bytes must be a multiple of block size", + mp->m_sb.sb_logsunit); + error = -EINVAL; + fatal = true; } if (error) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + /* + * Log check errors are always fatal on v5; or whenever bad + * metadata leads to a crash. + */ + if (fatal) { xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); ASSERT(0); goto out_free_log; -- GitLab From f970d41d54c5923b975b8886784ee4dc35c0e423 Mon Sep 17 00:00:00 2001 From: kaixuxia Date: Tue, 3 Sep 2019 21:06:50 -0700 Subject: [PATCH 1189/1278] xfs: Fix deadlock between AGI and AGF with RENAME_WHITEOUT commit bc56ad8c74b8588685c2875de0df8ab6974828ef upstream. When performing rename operation with RENAME_WHITEOUT flag, we will hold AGF lock to allocate or free extents in manipulating the dirents firstly, and then doing the xfs_iunlink_remove() call last to hold AGI lock to modify the tmpfile info, so we the lock order AGI->AGF. The big problem here is that we have an ordering constraint on AGF and AGI locking - inode allocation locks the AGI, then can allocate a new extent for new inodes, locking the AGF after the AGI. Hence the ordering that is imposed by other parts of the code is AGI before AGF. So we get an ABBA deadlock between the AGI and AGF here. Process A: Call trace: ? __schedule+0x2bd/0x620 schedule+0x33/0x90 schedule_timeout+0x17d/0x290 __down_common+0xef/0x125 ? xfs_buf_find+0x215/0x6c0 [xfs] down+0x3b/0x50 xfs_buf_lock+0x34/0xf0 [xfs] xfs_buf_find+0x215/0x6c0 [xfs] xfs_buf_get_map+0x37/0x230 [xfs] xfs_buf_read_map+0x29/0x190 [xfs] xfs_trans_read_buf_map+0x13d/0x520 [xfs] xfs_read_agf+0xa6/0x180 [xfs] ? schedule_timeout+0x17d/0x290 xfs_alloc_read_agf+0x52/0x1f0 [xfs] xfs_alloc_fix_freelist+0x432/0x590 [xfs] ? down+0x3b/0x50 ? xfs_buf_lock+0x34/0xf0 [xfs] ? xfs_buf_find+0x215/0x6c0 [xfs] xfs_alloc_vextent+0x301/0x6c0 [xfs] xfs_ialloc_ag_alloc+0x182/0x700 [xfs] ? _xfs_trans_bjoin+0x72/0xf0 [xfs] xfs_dialloc+0x116/0x290 [xfs] xfs_ialloc+0x6d/0x5e0 [xfs] ? xfs_log_reserve+0x165/0x280 [xfs] xfs_dir_ialloc+0x8c/0x240 [xfs] xfs_create+0x35a/0x610 [xfs] xfs_generic_create+0x1f1/0x2f0 [xfs] ... Process B: Call trace: ? __schedule+0x2bd/0x620 ? xfs_bmapi_allocate+0x245/0x380 [xfs] schedule+0x33/0x90 schedule_timeout+0x17d/0x290 ? xfs_buf_find+0x1fd/0x6c0 [xfs] __down_common+0xef/0x125 ? xfs_buf_get_map+0x37/0x230 [xfs] ? xfs_buf_find+0x215/0x6c0 [xfs] down+0x3b/0x50 xfs_buf_lock+0x34/0xf0 [xfs] xfs_buf_find+0x215/0x6c0 [xfs] xfs_buf_get_map+0x37/0x230 [xfs] xfs_buf_read_map+0x29/0x190 [xfs] xfs_trans_read_buf_map+0x13d/0x520 [xfs] xfs_read_agi+0xa8/0x160 [xfs] xfs_iunlink_remove+0x6f/0x2a0 [xfs] ? current_time+0x46/0x80 ? xfs_trans_ichgtime+0x39/0xb0 [xfs] xfs_rename+0x57a/0xae0 [xfs] xfs_vn_rename+0xe4/0x150 [xfs] ... In this patch we move the xfs_iunlink_remove() call to before acquiring the AGF lock to preserve correct AGI/AGF locking order. [Minor massage required to backport to apply due to removal of out_bmap_cancel: error path label upstream as a result of code rework. Only change was to the last code block removed by the patch. Functionally equivalent to upstream.] Signed-off-by: kaixuxia Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Suraj Jitindar Singh Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 85 +++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cb4833d06467..7cfbe2b0f886 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3035,7 +3035,8 @@ xfs_rename( &dfops, &first_block, spaceres); /* - * Set up the target. + * Check for expected errors before we dirty the transaction + * so we can return an error without a transaction abort. */ if (target_ip == NULL) { /* @@ -3047,6 +3048,46 @@ xfs_rename( if (error) goto out_trans_cancel; } + } else { + /* + * If target exists and it's a directory, check that whether + * it can be destroyed. + */ + if (S_ISDIR(VFS_I(target_ip)->i_mode) && + (!xfs_dir_isempty(target_ip) || + (VFS_I(target_ip)->i_nlink > 2))) { + error = -EEXIST; + goto out_trans_cancel; + } + } + + /* + * Directory entry creation below may acquire the AGF. Remove + * the whiteout from the unlinked list first to preserve correct + * AGI/AGF locking order. This dirties the transaction so failures + * after this point will abort and log recovery will clean up the + * mess. + * + * For whiteouts, we need to bump the link count on the whiteout + * inode. After this point, we have a real link, clear the tmpfile + * state flag from the inode so it doesn't accidentally get misused + * in future. + */ + if (wip) { + ASSERT(VFS_I(wip)->i_nlink == 0); + error = xfs_iunlink_remove(tp, wip); + if (error) + goto out_trans_cancel; + + xfs_bumplink(tp, wip); + xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); + VFS_I(wip)->i_state &= ~I_LINKABLE; + } + + /* + * Set up the target. + */ + if (target_ip == NULL) { /* * If target does not exist and the rename crosses * directories, adjust the target directory link count @@ -3067,22 +3108,6 @@ xfs_rename( goto out_bmap_cancel; } } else { /* target_ip != NULL */ - /* - * If target exists and it's a directory, check that both - * target and source are directories and that target can be - * destroyed, or that neither is a directory. - */ - if (S_ISDIR(VFS_I(target_ip)->i_mode)) { - /* - * Make sure target dir is empty. - */ - if (!(xfs_dir_isempty(target_ip)) || - (VFS_I(target_ip)->i_nlink > 2)) { - error = -EEXIST; - goto out_trans_cancel; - } - } - /* * Link the source inode under the target name. * If the source inode is a directory and we are moving @@ -3175,32 +3200,6 @@ xfs_rename( if (error) goto out_bmap_cancel; - /* - * For whiteouts, we need to bump the link count on the whiteout inode. - * This means that failures all the way up to this point leave the inode - * on the unlinked list and so cleanup is a simple matter of dropping - * the remaining reference to it. If we fail here after bumping the link - * count, we're shutting down the filesystem so we'll never see the - * intermediate state on disk. - */ - if (wip) { - ASSERT(VFS_I(wip)->i_nlink == 0); - error = xfs_bumplink(tp, wip); - if (error) - goto out_bmap_cancel; - error = xfs_iunlink_remove(tp, wip); - if (error) - goto out_bmap_cancel; - xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); - - /* - * Now we have a real link, clear the "I'm a tmpfile" state - * flag from the inode so it doesn't accidentally get misused in - * future. - */ - VFS_I(wip)->i_state &= ~I_LINKABLE; - } - xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) -- GitLab From a5aec8f253540c6c3538ab72c2b14809c1a29e85 Mon Sep 17 00:00:00 2001 From: Clement Leger Date: Fri, 4 Oct 2019 09:37:36 +0200 Subject: [PATCH 1190/1278] remoteproc: Fix wrong rvring index computation commit 00a0eec59ddbb1ce966b19097d8a8d2f777e726a upstream. Index of rvring is computed using pointer arithmetic. However, since rvring->rvdev->vring is the base of the vring array, computation of rvring idx should be reversed. It previously lead to writing at negative indices in the resource table. Signed-off-by: Clement Leger Link: https://lore.kernel.org/r/20191004073736.8327-1-cleger@kalray.eu Signed-off-by: Bjorn Andersson Cc: Doug Anderson Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/remoteproc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index cc733b89560a..8f4fa1a52f05 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -288,7 +288,7 @@ void rproc_free_vring(struct rproc_vring *rvring) { int size = PAGE_ALIGN(vring_size(rvring->len, rvring->align)); struct rproc *rproc = rvring->rvdev->rproc; - int idx = rvring->rvdev->vring - rvring; + int idx = rvring - rvring->rvdev->vring; struct fw_rsc_vdev *rsc; dma_free_coherent(rproc->dev.parent, size, rvring->va, rvring->dma); -- GitLab From 42fa3dc2692e5f0b215b9284efac4dfc070f5dab Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sun, 3 Mar 2019 15:04:18 +0800 Subject: [PATCH 1191/1278] mtd: cfi: fix deadloop in cfi_cmdset_0002.c do_write_buffer commit d9b8a67b3b95a5c5aae6422b8113adc1c2485f2b upstream. In function do_write_buffer(), in the for loop, there is a case chip_ready() returns 1 while chip_good() returns 0, so it never break the loop. To fix this, chip_good() is enough and it should timeout if it stay bad for a while. Fixes: dfeae1073583("mtd: cfi_cmdset_0002: Change write buffer to check correct value") Signed-off-by: Yi Huaijie Signed-off-by: Liu Jian Reviewed-by: Tokunori Ikegami Signed-off-by: Richard Weinberger Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index e773dc6fdd3c..1f0d83086cb0 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -1883,7 +1883,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, continue; } - if (time_after(jiffies, timeo) && !chip_ready(map, adr)) + /* + * We check "time_after" and "!chip_good" before checking "chip_good" to avoid + * the failure due to scheduling. + */ + if (time_after(jiffies, timeo) && !chip_good(map, adr, datum)) break; if (chip_good(map, adr, datum)) { -- GitLab From ffd115f2dca955ce0782e801d488ecfaccde421f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 20 Feb 2020 20:04:03 -0800 Subject: [PATCH 1192/1278] include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap commit 467d12f5c7842896d2de3ced74e4147ee29e97c8 upstream. QEMU has a funny new build error message when I use the upstream kernel headers: CC block/file-posix.o In file included from /home/cborntra/REPOS/qemu/include/qemu/timer.h:4, from /home/cborntra/REPOS/qemu/include/qemu/timed-average.h:29, from /home/cborntra/REPOS/qemu/include/block/accounting.h:28, from /home/cborntra/REPOS/qemu/include/block/block_int.h:27, from /home/cborntra/REPOS/qemu/block/file-posix.c:30: /usr/include/linux/swab.h: In function `__swab': /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:34: error: "sizeof" is not defined, evaluates to 0 [-Werror=undef] 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^~~~~~ /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:41: error: missing binary operator before token "(" 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^ cc1: all warnings being treated as errors make: *** [/home/cborntra/REPOS/qemu/rules.mak:69: block/file-posix.o] Error 1 rm tests/qemu-iotests/socket_scm_helper.o This was triggered by commit d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h"). That patch is doing #include but it uses BITS_PER_LONG. The kernel file asm/bitsperlong.h provide only __BITS_PER_LONG. Let us use the __ variant in swap.h Link: http://lkml.kernel.org/r/20200213142147.17604-1-borntraeger@de.ibm.com Fixes: d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h") Signed-off-by: Christian Borntraeger Cc: Yury Norov Cc: Allison Randal Cc: Joe Perches Cc: Thomas Gleixner Cc: William Breathitt Gray Cc: Torsten Hilbrich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/swab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index fa7f97da5b76..7272f85d6d6a 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -135,9 +135,9 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) static __always_inline unsigned long __swab(const unsigned long y) { -#if BITS_PER_LONG == 64 +#if __BITS_PER_LONG == 64 return __swab64(y); -#else /* BITS_PER_LONG == 32 */ +#else /* __BITS_PER_LONG == 32 */ return __swab32(y); #endif } -- GitLab From 19e6af05a22a670eea3931cc0e83ca58cb3db298 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 12 Apr 2019 21:59:25 +0000 Subject: [PATCH 1193/1278] binder: take read mode of mmap_sem in binder_alloc_free_page() commit 60d4885710836595192c42d3e04b27551d30ec91 upstream. Restore the behavior of locking mmap_sem for reading in binder_alloc_free_page(), as was first done in commit 3013bf62b67a ("binder: reduce mmap_sem write-side lock"). That change was inadvertently reverted by commit 5cec2d2e5839 ("binder: fix race between munmap() and direct reclaim"). In addition, change the name of the label for the error path to accurately reflect that we're taking the lock for reading. Backporting note: This fix is only needed when *both* of the commits mentioned above are applied. That's an unlikely situation since they both landed during the development of v5.1 but only one of them is targeted for stable. Fixes: 5cec2d2e5839 ("binder: fix race between munmap() and direct reclaim") Signed-off-by: Tyler Hicks Acked-by: Todd Kjos Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 9d5cb3b7a7a2..41474eec2181 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -951,8 +951,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, mm = alloc->vma_vm_mm; if (!mmget_not_zero(mm)) goto err_mmget; - if (!down_write_trylock(&mm->mmap_sem)) - goto err_down_write_mmap_sem_failed; + if (!down_read_trylock(&mm->mmap_sem)) + goto err_down_read_mmap_sem_failed; vma = binder_alloc_get_vma(alloc); list_lru_isolate(lru, item); @@ -967,7 +967,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_user_end(alloc, index); } - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); trace_binder_unmap_kernel_start(alloc, index); @@ -982,7 +982,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, mutex_unlock(&alloc->mutex); return LRU_REMOVED_RETRY; -err_down_write_mmap_sem_failed: +err_down_read_mmap_sem_failed: mmput_async(mm); err_mmget: err_page_already_freed: -- GitLab From 1103775062540414c187835af57c7fcb45e6da63 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 31 Jan 2020 16:25:50 -0800 Subject: [PATCH 1194/1278] usb: dwc3: gadget: Do link recovery for SS and SSP commit d0550cd20e52558ecf6847a0f96ebd5d944c17e4 upstream. The controller always supports link recovery for device in SS and SSP. Remove the speed limit check. Also, when the device is in RESUME or RESET state, it means the controller received the resume/reset request. The driver must send the link recovery to acknowledge the request. They are valid states for the driver to send link recovery. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Fixes: ee5cd41c9117 ("usb: dwc3: Update speed checks for SuperSpeedPlus") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 76a0020b0f2e..4149d751719e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1641,7 +1641,6 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc) u32 reg; u8 link_state; - u8 speed; /* * According to the Databook Remote wakeup request should @@ -1651,16 +1650,13 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc) */ reg = dwc3_readl(dwc->regs, DWC3_DSTS); - speed = reg & DWC3_DSTS_CONNECTSPD; - if ((speed == DWC3_DSTS_SUPERSPEED) || - (speed == DWC3_DSTS_SUPERSPEED_PLUS)) - return 0; - link_state = DWC3_DSTS_USBLNKST(reg); switch (link_state) { + case DWC3_LINK_STATE_RESET: case DWC3_LINK_STATE_RX_DET: /* in HS, means Early Suspend */ case DWC3_LINK_STATE_U3: /* in HS, means SUSPEND */ + case DWC3_LINK_STATE_RESUME: break; default: return -EINVAL; -- GitLab From 2934e27a5bd1780e6d8534f37bed43fd6492222d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 28 Mar 2020 18:12:46 -0700 Subject: [PATCH 1195/1278] usb: gadget: udc: bdc: Remove unnecessary NULL checks in bdc_req_complete commit 09b04abb70f096333bef6bc95fa600b662e7ee13 upstream. When building with Clang + -Wtautological-pointer-compare: drivers/usb/gadget/udc/bdc/bdc_ep.c:543:28: warning: comparison of address of 'req->queue' equal to a null pointer is always false [-Wtautological-pointer-compare] if (req == NULL || &req->queue == NULL || &req->usb_req == NULL) ~~~~~^~~~~ ~~~~ drivers/usb/gadget/udc/bdc/bdc_ep.c:543:51: warning: comparison of address of 'req->usb_req' equal to a null pointer is always false [-Wtautological-pointer-compare] if (req == NULL || &req->queue == NULL || &req->usb_req == NULL) ~~~~~^~~~~~~ ~~~~ 2 warnings generated. As it notes, these statements will always evaluate to false so remove them. Fixes: efed421a94e6 ("usb: gadget: Add UDC driver for Broadcom USB3.0 device controller IP BDC") Link: https://github.com/ClangBuiltLinux/linux/issues/749 Signed-off-by: Nathan Chancellor Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/bdc/bdc_ep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/bdc/bdc_ep.c b/drivers/usb/gadget/udc/bdc/bdc_ep.c index bfd8f7ade935..be9f40bc9c12 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_ep.c +++ b/drivers/usb/gadget/udc/bdc/bdc_ep.c @@ -546,7 +546,7 @@ static void bdc_req_complete(struct bdc_ep *ep, struct bdc_req *req, { struct bdc *bdc = ep->bdc; - if (req == NULL || &req->queue == NULL || &req->usb_req == NULL) + if (req == NULL) return; dev_dbg(bdc->dev, "%s ep:%s status:%d\n", __func__, ep->name, status); -- GitLab From 851315332f7d419486ea706ee81791dd1addb870 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 10 Mar 2020 22:16:54 +0800 Subject: [PATCH 1196/1278] iio:ad7797: Use correct attribute_group commit 28535877ac5b2b84f0d394fd67a5ec71c0c48b10 upstream. It should use ad7797_attribute_group in ad7797_info, according to commit ("iio:ad7793: Add support for the ad7796 and ad7797"). Scale is fixed for the ad7796 and not programmable, hence should not have the scale_available attribute. Fixes: fd1a8b912841 ("iio:ad7793: Add support for the ad7796 and ad7797") Signed-off-by: YueHaibing Reviewed-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7793.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 07246a6037e3..f64781d03d5d 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -543,7 +543,7 @@ static const struct iio_info ad7797_info = { .read_raw = &ad7793_read_raw, .write_raw = &ad7793_write_raw, .write_raw_get_fmt = &ad7793_write_raw_get_fmt, - .attrs = &ad7793_attribute_group, + .attrs = &ad7797_attribute_group, .validate_trigger = ad_sd_validate_trigger, .driver_module = THIS_MODULE, }; -- GitLab From ea0f3c4ef7a5499ce47aa3361a252c58efa75323 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 27 Mar 2020 07:50:40 +0300 Subject: [PATCH 1197/1278] nfsd: memory corruption in nfsd4_lock() commit e1e8399eee72e9d5246d4d1bcacd793debe34dd3 upstream. New struct nfsd4_blocked_lock allocated in find_or_allocate_block() does not initialized nbl_list and nbl_lru. If conflock allocation fails rollback can call list_del_init() access uninitialized fields and corrupt memory. v2: just initialize nbl_list and nbl_lru right after nbl allocation. Fixes: 76d348fadff5 ("nfsd: have nfsd4_lock use blocking locks for v4.1+ lock") Signed-off-by: Vasily Averin Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fca8b2e7fbeb..d5d1c70bb927 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -246,6 +246,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, if (!nbl) { nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { + INIT_LIST_HEAD(&nbl->nbl_list); + INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, -- GitLab From 8e38d5f1c5e3d7e9c6b840e4de815e2c7717c868 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 27 Mar 2020 23:28:26 +0100 Subject: [PATCH 1198/1278] i2c: altera: use proper variable to hold errno commit edb2c9dd3948738ef030c32b948543e84f4d3f81 upstream. device_property_read_u32() returns errno or 0, so we should use the integer variable 'ret' and not the u32 'val' to hold the retval. Fixes: 0560ad576268 ("i2c: altera: Add Altera I2C Controller driver") Signed-off-by: Wolfram Sang Reviewed-by: Thor Thayer Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-altera.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index a1cdcfc74acf..8915ee30a5b4 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -395,7 +395,6 @@ static int altr_i2c_probe(struct platform_device *pdev) struct altr_i2c_dev *idev = NULL; struct resource *res; int irq, ret; - u32 val; idev = devm_kzalloc(&pdev->dev, sizeof(*idev), GFP_KERNEL); if (!idev) @@ -422,17 +421,17 @@ static int altr_i2c_probe(struct platform_device *pdev) init_completion(&idev->msg_complete); spin_lock_init(&idev->lock); - val = device_property_read_u32(idev->dev, "fifo-size", + ret = device_property_read_u32(idev->dev, "fifo-size", &idev->fifo_size); - if (val) { + if (ret) { dev_err(&pdev->dev, "FIFO size set to default of %d\n", ALTR_I2C_DFLT_FIFO_SZ); idev->fifo_size = ALTR_I2C_DFLT_FIFO_SZ; } - val = device_property_read_u32(idev->dev, "clock-frequency", + ret = device_property_read_u32(idev->dev, "clock-frequency", &idev->bus_clk_rate); - if (val) { + if (ret) { dev_err(&pdev->dev, "Default to 100kHz\n"); idev->bus_clk_rate = 100000; /* default clock rate */ } -- GitLab From a49df6a1ccefdd8b0f815ae4e9b627888b2a57b1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Apr 2020 12:27:08 -0300 Subject: [PATCH 1199/1278] net/cxgb4: Check the return from t4_query_params properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c799fca8baf18d1bbbbad6c3b736eefbde8bdb90 upstream. Positive return values are also failures that don't set val, although this probably can't happen. Fixes gcc 10 warning: drivers/net/ethernet/chelsio/cxgb4/t4_hw.c: In function ‘t4_phy_fw_ver’: drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:3747:14: warning: ‘val’ may be used uninitialized in this function [-Wmaybe-uninitialized] 3747 | *phy_fw_ver = val; Fixes: 01b6961410b7 ("cxgb4: Add PHY firmware support for T420-BT cards") Signed-off-by: Jason Gunthorpe Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 39bcf27902e4..0f126ce4645f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3609,7 +3609,7 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver) FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_PHYFW_VERSION)); ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); - if (ret < 0) + if (ret) return ret; *phy_fw_ver = val; return 0; -- GitLab From d5388ae4224766b4c282d79dae6bfb2c6548234a Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 15 Apr 2020 16:42:33 +0200 Subject: [PATCH 1200/1278] ARM: dts: bcm283x: Disable dsi0 node commit 90444b958461a5f8fc299ece0fe17eab15cba1e1 upstream. Since its inception the module was meant to be disabled by default, but the original commit failed to add the relevant property. Fixes: 4aba4cf82054 ("ARM: dts: bcm2835: Add the DSI module nodes and clocks") Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Eric Anholt Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm283x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index fdb018e1278f..9d1e1061d8af 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -454,6 +454,7 @@ "dsi0_ddr2", "dsi0_ddr"; + status = "disabled"; }; thermal: thermal@7e212000 { -- GitLab From f24768983d207054df476aa5d940e756002168ed Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 Apr 2020 11:28:42 -0700 Subject: [PATCH 1201/1278] perf/core: fix parent pid/tid in task exit events commit f3bed55e850926614b9898fe982f66d2541a36a5 upstream. Current logic yields the child task as the parent. Before: $ perf record bash -c "perf list > /dev/null" $ perf script -D |grep 'FORK\|EXIT' 4387036190981094 0x5a70 [0x30]: PERF_RECORD_FORK(10472:10472):(10470:10470) 4387036606207580 0xf050 [0x30]: PERF_RECORD_EXIT(10472:10472):(10472:10472) 4387036607103839 0x17150 [0x30]: PERF_RECORD_EXIT(10470:10470):(10470:10470) ^ Note the repeated values here -------------------/ After: 383281514043 0x9d8 [0x30]: PERF_RECORD_FORK(2268:2268):(2266:2266) 383442003996 0x2180 [0x30]: PERF_RECORD_EXIT(2268:2268):(2266:2266) 383451297778 0xb70 [0x30]: PERF_RECORD_EXIT(2266:2266):(2265:2265) Fixes: 94d5d1b2d891 ("perf_counter: Report the cloning task as parent on perf_counter_fork()") Reported-by: KP Singh Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200417182842.12522-1-irogers@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c16ce11049de..5636c9c48545 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6610,10 +6610,17 @@ static void perf_event_task_output(struct perf_event *event, goto out; task_event->event_id.pid = perf_event_pid(event, task); - task_event->event_id.ppid = perf_event_pid(event, current); - task_event->event_id.tid = perf_event_tid(event, task); - task_event->event_id.ptid = perf_event_tid(event, current); + + if (task_event->event_id.header.type == PERF_RECORD_EXIT) { + task_event->event_id.ppid = perf_event_pid(event, + task->real_parent); + task_event->event_id.ptid = perf_event_pid(event, + task->real_parent); + } else { /* PERF_RECORD_FORK */ + task_event->event_id.ppid = perf_event_pid(event, current); + task_event->event_id.ptid = perf_event_tid(event, current); + } task_event->event_id.time = perf_event_clock(event); -- GitLab From bc0a359b484ef6e0f1c925367689f270580e8c07 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 20 Apr 2020 18:14:17 -0700 Subject: [PATCH 1202/1278] mm: shmem: disable interrupt when acquiring info->lock in userfaultfd_copy path commit 94b7cc01da5a3cc4f3da5e0ff492ef008bb555d6 upstream. Syzbot reported the below lockdep splat: WARNING: possible irq lock inversion dependency detected 5.6.0-rc7-syzkaller #0 Not tainted -------------------------------------------------------- syz-executor.0/10317 just changed the state of lock: ffff888021d16568 (&(&info->lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline] ffff888021d16568 (&(&info->lock)->rlock){+.+.}, at: shmem_mfill_atomic_pte+0x1012/0x21c0 mm/shmem.c:2407 but this lock was taken by another, SOFTIRQ-safe lock in the past: (&(&xa->xa_lock)->rlock#5){..-.} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&info->lock)->rlock); local_irq_disable(); lock(&(&xa->xa_lock)->rlock#5); lock(&(&info->lock)->rlock); lock(&(&xa->xa_lock)->rlock#5); *** DEADLOCK *** The full report is quite lengthy, please see: https://lore.kernel.org/linux-mm/alpine.LSU.2.11.2004152007370.13597@eggly.anvils/T/#m813b412c5f78e25ca8c6c7734886ed4de43f241d It is because CPU 0 held info->lock with IRQ enabled in userfaultfd_copy path, then CPU 1 is splitting a THP which held xa_lock and info->lock in IRQ disabled context at the same time. If softirq comes in to acquire xa_lock, the deadlock would be triggered. The fix is to acquire/release info->lock with *_irq version instead of plain spin_{lock,unlock} to make it softirq safe. Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Reported-by: syzbot+e27980339d305f2dbfd9@syzkaller.appspotmail.com Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Tested-by: syzbot+e27980339d305f2dbfd9@syzkaller.appspotmail.com Acked-by: Hugh Dickins Cc: Andrea Arcangeli Link: http://lkml.kernel.org/r/1587061357-122619-1-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 0b6db162083c..f9a1e0ba259f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2330,11 +2330,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, lru_cache_add_anon(page); - spin_lock(&info->lock); + spin_lock_irq(&info->lock); info->alloced++; inode->i_blocks += BLOCKS_PER_PAGE; shmem_recalc_inode(inode); - spin_unlock(&info->lock); + spin_unlock_irq(&info->lock); inc_mm_counter(dst_mm, mm_counter_file(page)); page_add_file_rmap(page, false); -- GitLab From 37e30a7a8cb1c910c893f7490a74d09f61bfc8d6 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Sat, 18 Apr 2020 16:26:53 -0700 Subject: [PATCH 1203/1278] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX BPF_B [ Upstream commit aee194b14dd2b2bde6252b3acf57d36dccfc743a ] This patch fixes an encoding bug in emit_stx for BPF_B when the source register is BPF_REG_FP. The current implementation for BPF_STX BPF_B in emit_stx saves one REX byte when the operands can be encoded using Mod-R/M alone. The lower 8 bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers, (e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit equivalents (%sil, %dil, %bpl, %spl). The current code checks if the source for BPF_STX BPF_B is BPF_REG_1 or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the required REX prefix. However, it misses the case when the source is BPF_REG_FP (mapped to %rbp). The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand will read from register %ch instead of the correct %bpl. This patch fixes the problem by fixing and refactoring the check on which registers need the extra REX byte. Since no BPF registers map to %rsp, there is no need to handle %spl. Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT") Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com Signed-off-by: Sasha Levin --- arch/x86/net/bpf_jit_comp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index cdb386fa7101..0415c0cd4a19 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -153,6 +153,19 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_AX)); } +/* + * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 + * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte + * of encoding. al,cl,dl,bl have simpler encoding. + */ +static bool is_ereg_8l(u32 reg) +{ + return is_ereg(reg) || + (1 << reg) & (BIT(BPF_REG_1) | + BIT(BPF_REG_2) | + BIT(BPF_REG_FP)); +} + /* add modifiers if 'reg' maps to x64 registers r8..r15 */ static u8 add_1mod(u8 byte, u32 reg) { @@ -770,9 +783,8 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) + /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); -- GitLab From 99f7af9f5b6e442cf650608854722885e282b9f3 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Tue, 7 Apr 2020 19:27:39 +0200 Subject: [PATCH 1204/1278] x86: hyperv: report value of misc_features [ Upstream commit 97d9f1c43bedd400301d6f1eff54d46e8c636e47 ] A few kernel features depend on ms_hyperv.misc_features, but unlike its siblings ->features and ->hints, the value was never reported during boot. Signed-off-by: Olaf Hering Link: https://lore.kernel.org/r/20200407172739.31371-1-olaf@aepfle.de Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mshyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c0201b11e9e2..a6b323a3a630 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -178,8 +178,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS); -- GitLab From 27b8978b06883024a46fc38d8569100f48986984 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 12 Apr 2020 13:11:11 -0700 Subject: [PATCH 1205/1278] xfs: fix partially uninitialized structure in xfs_reflink_remap_extent [ Upstream commit c142932c29e533ee892f87b44d8abc5719edceec ] In the reflink extent remap function, it turns out that uirec (the block mapping corresponding only to the part of the passed-in mapping that got unmapped) was not fully initialized. Specifically, br_state was not being copied from the passed-in struct to the uirec. This could lead to unpredictable results such as the reflinked mapping being marked unwritten in the destination file. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Sasha Levin --- fs/xfs/xfs_reflink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 37e603bf1591..db7f9fdd20a3 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1125,6 +1125,7 @@ xfs_reflink_remap_extent( uirec.br_startblock = irec->br_startblock + rlen; uirec.br_startoff = irec->br_startoff + rlen; uirec.br_blockcount = unmap_len - rlen; + uirec.br_state = irec->br_state; unmap_len = rlen; /* If this isn't a real mapping, we're done. */ -- GitLab From 9b04bd4db63612609acc9b262aa9668464c3319b Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Wed, 8 Apr 2020 15:26:10 +0200 Subject: [PATCH 1206/1278] scsi: target: fix PR IN / READ FULL STATUS for FC [ Upstream commit 8fed04eb79a74cbf471dfaa755900a51b37273ab ] Creation of the response to READ FULL STATUS fails for FC based reservations. Reason is the too high loop limit (< 24) in fc_get_pr_transport_id(). The string representation of FC WWPN is 23 chars long only ("11:22:33:44:55:66:77:88"). So when i is 23, the loop body is executed a last time for the ending '\0' of the string and thus hex2bin() reports an error. Link: https://lore.kernel.org/r/20200408132610.14623-3-bstroesser@ts.fujitsu.com Signed-off-by: Bodo Stroesser Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_fabric_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 95aa47ac4dcd..f8621fe67376 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -76,7 +76,7 @@ static int fc_get_pr_transport_id( * encoded TransportID. */ ptr = &se_nacl->initiatorname[0]; - for (i = 0; i < 24; ) { + for (i = 0; i < 23; ) { if (!strncmp(&ptr[i], ":", 1)) { i++; continue; -- GitLab From 1b429bdee8242be5a22685c758bc658222e88a5c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Apr 2020 13:23:25 -0500 Subject: [PATCH 1207/1278] objtool: Fix CONFIG_UBSAN_TRAP unreachable warnings [ Upstream commit bd841d6154f5f41f8a32d3c1b0bc229e326e640a ] CONFIG_UBSAN_TRAP causes GCC to emit a UD2 whenever it encounters an unreachable code path. This includes __builtin_unreachable(). Because the BUG() macro uses __builtin_unreachable() after it emits its own UD2, this results in a double UD2. In this case objtool rightfully detects that the second UD2 is unreachable: init/main.o: warning: objtool: repair_env_string()+0x1c8: unreachable instruction We weren't able to figure out a way to get rid of the double UD2s, so just silence the warning. Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Reviewed-by: Miroslav Benes Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/6653ad73c6b59c049211bd7c11ed3809c20ee9f5.1585761021.git.jpoimboe@redhat.com Signed-off-by: Sasha Levin --- tools/objtool/check.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ccd5319d1284..04fc04b4ab67 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2062,14 +2062,27 @@ static bool ignore_unreachable_insn(struct instruction *insn) !strcmp(insn->sec->name, ".altinstr_aux")) return true; + if (!insn->func) + return false; + + /* + * CONFIG_UBSAN_TRAP inserts a UD2 when it sees + * __builtin_unreachable(). The BUG() macro has an unreachable() after + * the UD2, which causes GCC's undefined trap logic to emit another UD2 + * (or occasionally a JMP to UD2). + */ + if (list_prev_entry(insn, list)->dead_end && + (insn->type == INSN_BUG || + (insn->type == INSN_JUMP_UNCONDITIONAL && + insn->jump_dest && insn->jump_dest->type == INSN_BUG))) + return true; + /* * Check if this (or a subsequent) instruction is related to * CONFIG_UBSAN or CONFIG_KASAN. * * End the search at 5 instructions to avoid going into the weeds. */ - if (!insn->func) - return false; for (i = 0; i < 5; i++) { if (is_kasan_insn(insn) || is_ubsan_insn(insn)) -- GitLab From 0424695f8fc0edf363c32feed6a73fe6432f167d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Apr 2020 13:23:26 -0500 Subject: [PATCH 1208/1278] objtool: Support Clang non-section symbols in ORC dump [ Upstream commit 8782e7cab51b6bf01a5a86471dd82228af1ac185 ] Historically, the relocation symbols for ORC entries have only been section symbols: .text+0: sp:sp+8 bp:(und) type:call end:0 However, the Clang assembler is aggressive about stripping section symbols. In that case we will need to use function symbols: freezing_slow_path+0: sp:sp+8 bp:(und) type:call end:0 In preparation for the generation of such entries in "objtool orc generate", add support for reading them in "objtool orc dump". Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/b811b5eb1a42602c3b523576dc5efab9ad1c174d.1585761021.git.jpoimboe@redhat.com Signed-off-by: Sasha Levin --- tools/objtool/orc_dump.c | 44 ++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index c3343820916a..7cbbbdd932f1 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -78,7 +78,7 @@ int orc_dump(const char *_objname) char *name; size_t nr_sections; Elf64_Addr orc_ip_addr = 0; - size_t shstrtab_idx; + size_t shstrtab_idx, strtab_idx = 0; Elf *elf; Elf_Scn *scn; GElf_Shdr sh; @@ -139,6 +139,8 @@ int orc_dump(const char *_objname) if (!strcmp(name, ".symtab")) { symtab = data; + } else if (!strcmp(name, ".strtab")) { + strtab_idx = i; } else if (!strcmp(name, ".orc_unwind")) { orc = data->d_buf; orc_size = sh.sh_size; @@ -150,7 +152,7 @@ int orc_dump(const char *_objname) } } - if (!symtab || !orc || !orc_ip) + if (!symtab || !strtab_idx || !orc || !orc_ip) return 0; if (orc_size % sizeof(*orc) != 0) { @@ -171,21 +173,29 @@ int orc_dump(const char *_objname) return -1; } - scn = elf_getscn(elf, sym.st_shndx); - if (!scn) { - WARN_ELF("elf_getscn"); - return -1; - } - - if (!gelf_getshdr(scn, &sh)) { - WARN_ELF("gelf_getshdr"); - return -1; - } - - name = elf_strptr(elf, shstrtab_idx, sh.sh_name); - if (!name || !*name) { - WARN_ELF("elf_strptr"); - return -1; + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) { + scn = elf_getscn(elf, sym.st_shndx); + if (!scn) { + WARN_ELF("elf_getscn"); + return -1; + } + + if (!gelf_getshdr(scn, &sh)) { + WARN_ELF("gelf_getshdr"); + return -1; + } + + name = elf_strptr(elf, shstrtab_idx, sh.sh_name); + if (!name) { + WARN_ELF("elf_strptr"); + return -1; + } + } else { + name = elf_strptr(elf, strtab_idx, sym.st_name); + if (!name) { + WARN_ELF("elf_strptr"); + return -1; + } } printf("%s+%llx:", name, (unsigned long long)rela.r_addend); -- GitLab From fd23dba918103f537793f8813237f53b9bdcaecb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 26 Mar 2020 09:03:58 +0100 Subject: [PATCH 1209/1278] xen/xenbus: ensure xenbus_map_ring_valloc() returns proper grant status [ Upstream commit 6b51fd3f65a22e3d1471b18a1d56247e246edd46 ] xenbus_map_ring_valloc() maps a ring page and returns the status of the used grant (0 meaning success). There are Xen hypervisors which might return the value 1 for the status of a failed grant mapping due to a bug. Some callers of xenbus_map_ring_valloc() test for errors by testing the returned status to be less than zero, resulting in no error detected and crashing later due to a not available ring page. Set the return value of xenbus_map_ring_valloc() to GNTST_general_error in case the grant status reported by Xen is greater than zero. This is part of XSA-316. Signed-off-by: Juergen Gross Reviewed-by: Wei Liu Link: https://lore.kernel.org/r/20200326080358.1018-1-jgross@suse.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/xenbus/xenbus_client.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index a1c17000129b..e94a61eaeceb 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -450,7 +450,14 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, unsigned int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); + int err; + + err = ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); + /* Some hypervisors are buggy and can return 1. */ + if (err > 0) + err = GNTST_general_error; + + return err; } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); -- GitLab From d1659d5af518bfe9f26a3ee29e0234e0565e7a3b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 14 Apr 2020 09:32:55 -0700 Subject: [PATCH 1210/1278] arm64: Delete the space separator in __emit_inst [ Upstream commit c9a4ef66450145a356a626c833d3d7b1668b3ded ] In assembly, many instances of __emit_inst(x) expand to a directive. In a few places __emit_inst(x) is used as an assembler macro argument. For example, in arch/arm64/kvm/hyp/entry.S ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) expands to the following by the C preprocessor: alternative_insn nop, .inst (0xd500401f | ((0) << 16 | (4) << 5) | ((!!1) << 8)), 4, 1 Both comma and space are separators, with an exception that content inside a pair of parentheses/quotes is not split, so the clang integrated assembler splits the arguments to: nop, .inst, (0xd500401f | ((0) << 16 | (4) << 5) | ((!!1) << 8)), 4, 1 GNU as preprocesses the input with do_scrub_chars(). Its arm64 backend (along with many other non-x86 backends) sees: alternative_insn nop,.inst(0xd500401f|((0)<<16|(4)<<5)|((!!1)<<8)),4,1 # .inst(...) is parsed as one argument while its x86 backend sees: alternative_insn nop,.inst (0xd500401f|((0)<<16|(4)<<5)|((!!1)<<8)),4,1 # The extra space before '(' makes the whole .inst (...) parsed as two arguments The non-x86 backend's behavior is considered unintentional (https://sourceware.org/bugzilla/show_bug.cgi?id=25750). So drop the space separator inside `.inst (...)` to make the clang integrated assembler work. Suggested-by: Ilie Halip Signed-off-by: Fangrui Song Reviewed-by: Mark Rutland Link: https://github.com/ClangBuiltLinux/linux/issues/939 Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/sysreg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 50a89bcf9072..2564dd429ab6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -60,7 +60,9 @@ #ifndef CONFIG_BROKEN_GAS_INST #ifdef __ASSEMBLY__ -#define __emit_inst(x) .inst (x) +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) #else #define __emit_inst(x) ".inst " __stringify((x)) "\n\t" #endif -- GitLab From 1e68f07c8dd0bb4c874a77da3fc5799c5143456a Mon Sep 17 00:00:00 2001 From: yangerkun Date: Wed, 26 Feb 2020 12:10:02 +0800 Subject: [PATCH 1211/1278] ext4: use matching invalidatepage in ext4_writepage [ Upstream commit c2a559bc0e7ed5a715ad6b947025b33cb7c05ea7 ] Run generic/388 with journal data mode sometimes may trigger the warning in ext4_invalidatepage. Actually, we should use the matching invalidatepage in ext4_writepage. Signed-off-by: yangerkun Signed-off-by: Theodore Ts'o Reviewed-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200226041002.13914-1-yangerkun@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5b0d5ca2c2b2..0cbb241488ec 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2123,7 +2123,7 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - ext4_invalidatepage(page, 0, PAGE_SIZE); + inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return -EIO; } -- GitLab From 48eecff398154fc624f6477012245be620d291bc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 13 Apr 2020 22:30:52 -0400 Subject: [PATCH 1212/1278] ext4: increase wait time needed before reuse of deleted inode numbers [ Upstream commit a17a9d935dc4a50acefaf319d58030f1da7f115a ] Current wait times have proven to be too short to protect against inode reuses that lead to metadata inconsistencies. Now that we will retry the inode allocation if we can't find any recently deleted inodes, it's a lot safer to increase the recently deleted time from 5 seconds to a minute. Link: https://lore.kernel.org/r/20200414023925.273867-1-tytso@mit.edu Google-Bug-Id: 36602237 Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2a480c0ef1bc..96efe53855a0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -673,7 +673,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * block has been written back to disk. (Yes, these values are * somewhat arbitrary...) */ -#define RECENTCY_MIN 5 +#define RECENTCY_MIN 60 #define RECENTCY_DIRTY 300 static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) -- GitLab From 019abc55c93d2442c0e0411f9677fbeeedd527dd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 13 Apr 2020 23:33:05 -0400 Subject: [PATCH 1213/1278] ext4: convert BUG_ON's to WARN_ON's in mballoc.c [ Upstream commit 907ea529fc4c3296701d2bfc8b831dd2a8121a34 ] If the in-core buddy bitmap gets corrupted (or out of sync with the block bitmap), issue a WARN_ON and try to recover. In most cases this involves skipping trying to allocate out of a particular block group. We can end up declaring the file system corrupted, which is fair, since the file system probably should be checked before we proceed any further. Link: https://lore.kernel.org/r/20200414035649.293164-1-tytso@mit.edu Google-Bug-Id: 34811296 Google-Bug-Id: 34639169 Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 745a89d30a57..d7cedfaa1cc0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1952,7 +1952,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, int free; free = e4b->bd_info->bb_free; - BUG_ON(free <= 0); + if (WARN_ON(free <= 0)) + return; i = e4b->bd_info->bb_first_free; @@ -1973,7 +1974,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, } mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); - BUG_ON(ex.fe_len <= 0); + if (WARN_ON(ex.fe_len <= 0)) + break; if (free < ex.fe_len) { ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " -- GitLab From 331999a770bb5e75b5dab906a9f6aad680bb0959 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 17 Apr 2020 11:28:53 +0200 Subject: [PATCH 1214/1278] hwmon: (jc42) Fix name to have no illegal characters [ Upstream commit c843b382e61b5f28a3d917712c69a344f632387c ] The jc42 driver passes I2C client's name as hwmon device name. In case of device tree probed devices this ends up being part of the compatible string, "jc-42.4-temp". This name contains hyphens and the hwmon core doesn't like this: jc42 2-0018: hwmon: 'jc-42.4-temp' is not a valid name attribute, please fix This changes the name to "jc42" which doesn't have any illegal characters. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20200417092853.31206-1-s.hauer@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/jc42.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index e5234f953a6d..b6e5aaa54963 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -527,7 +527,7 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) } data->config = config; - hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, + hwmon_dev = devm_hwmon_device_register_with_info(dev, "jc42", data, &jc42_chip_info, NULL); return PTR_ERR_OR_ZERO(hwmon_dev); -- GitLab From add40f8f1a84d7c8feb76e8ab7f197a5b0627bd8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Dec 2018 12:29:13 -0500 Subject: [PATCH 1215/1278] ext4: avoid declaring fs inconsistent due to invalid file handles commit 8a363970d1dc38c4ec4ad575c862f776f468d057 upstream. If we receive a file handle, either from NFS or open_by_handle_at(2), and it points at an inode which has not been initialized, and the file system has metadata checksums enabled, we shouldn't try to get the inode, discover the checksum is invalid, and then declare the file system as being inconsistent. This can be reproduced by creating a test file system via "mke2fs -t ext4 -O metadata_csum /tmp/foo.img 8M", mounting it, cd'ing into that directory, and then running the following program. #define _GNU_SOURCE #include struct handle { struct file_handle fh; unsigned char fid[MAX_HANDLE_SZ]; }; int main(int argc, char **argv) { struct handle h = {{8, 1 }, { 12, }}; open_by_handle_at(AT_FDCWD, &h.fh, O_RDONLY); return 0; } Google-Bug-Id: 120690101 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Ashwin H Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 15 +++++++++++++-- fs/ext4/ialloc.c | 2 +- fs/ext4/inode.c | 49 +++++++++++++++++++++++++++++++++--------------- fs/ext4/ioctl.c | 2 +- fs/ext4/namei.c | 4 ++-- fs/ext4/resize.c | 5 +++-- fs/ext4/super.c | 19 +++++-------------- fs/ext4/xattr.c | 5 +++-- 8 files changed, 62 insertions(+), 39 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4aa0f8f7d9a0..6c5b4301ee37 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2509,8 +2509,19 @@ int do_journal_get_write_access(handle_t *handle, #define FALL_BACK_TO_NONDELALLOC 1 #define CONVERT_INLINE_DATA 2 -extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); +typedef enum { + EXT4_IGET_NORMAL = 0, + EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */ + EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */ +} ext4_iget_flags; + +extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, + ext4_iget_flags flags, const char *function, + unsigned int line); + +#define ext4_iget(sb, ino, flags) \ + __ext4_iget((sb), (ino), (flags), __func__, __LINE__) + extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 96efe53855a0..85c2a7ea5ea2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1239,7 +1239,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) if (!ext4_test_bit(bit, bitmap_bh->b_data)) goto bad_orphan; - inode = ext4_iget(sb, ino); + inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { err = PTR_ERR(inode); ext4_error(sb, "couldn't read orphan inode %lu (err %d)", diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0cbb241488ec..af1404d337dc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4695,7 +4695,9 @@ int ext4_get_projid(struct inode *inode, kprojid_t *projid) return 0; } -struct inode *ext4_iget(struct super_block *sb, unsigned long ino) +struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, + ext4_iget_flags flags, const char *function, + unsigned int line) { struct ext4_iloc iloc; struct ext4_inode *raw_inode; @@ -4709,6 +4711,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) gid_t i_gid; projid_t i_projid; + if (((flags & EXT4_IGET_NORMAL) && + (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) || + (ino < EXT4_ROOT_INO) || + (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { + if (flags & EXT4_IGET_HANDLE) + return ERR_PTR(-ESTALE); + __ext4_error(sb, function, line, + "inode #%lu: comm %s: iget: illegal inode #", + ino, current->comm); + return ERR_PTR(-EFSCORRUPTED); + } + inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); @@ -4724,18 +4738,26 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) raw_inode = ext4_raw_inode(&iloc); if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { - EXT4_ERROR_INODE(inode, "root inode unallocated"); + ext4_error_inode(inode, function, line, 0, + "iget: root inode unallocated"); ret = -EFSCORRUPTED; goto bad_inode; } + if ((flags & EXT4_IGET_HANDLE) && + (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) { + ret = -ESTALE; + goto bad_inode; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb) || (ei->i_extra_isize & 3)) { - EXT4_ERROR_INODE(inode, - "bad extra_isize %u (inode size %u)", + ext4_error_inode(inode, function, line, 0, + "iget: bad extra_isize %u " + "(inode size %u)", ei->i_extra_isize, EXT4_INODE_SIZE(inode->i_sb)); ret = -EFSCORRUPTED; @@ -4757,7 +4779,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { - EXT4_ERROR_INODE(inode, "checksum invalid"); + ext4_error_inode(inode, function, line, 0, + "iget: checksum invalid"); ret = -EFSBADCRC; goto bad_inode; } @@ -4813,7 +4836,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(sb, raw_inode); if ((size = i_size_read(inode)) < 0) { - EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ext4_error_inode(inode, function, line, 0, + "iget: bad i_size value: %lld", size); ret = -EFSCORRUPTED; goto bad_inode; } @@ -4899,7 +4923,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { - EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", + ext4_error_inode(inode, function, line, 0, + "iget: bad extended attribute block %llu", ei->i_file_acl); ret = -EFSCORRUPTED; goto bad_inode; @@ -4954,7 +4979,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) make_bad_inode(inode); } else { ret = -EFSCORRUPTED; - EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); + ext4_error_inode(inode, function, line, 0, + "iget: bogus i_mode (%o)", inode->i_mode); goto bad_inode; } brelse(iloc.bh); @@ -4969,13 +4995,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(ret); } -struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) -{ - if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-EFSCORRUPTED); - return ext4_iget(sb, ino); -} - static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode *raw_inode, struct ext4_inode_info *ei) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 82e118e9e50b..9dbb5542167a 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -111,7 +111,7 @@ static long swap_inode_boot_loader(struct super_block *sb, if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) return -EPERM; - inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); + inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL); if (IS_ERR(inode_bl)) return PTR_ERR(inode_bl); ei_bl = EXT4_I(inode_bl); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 3f7b3836166c..161099f39ab9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1597,7 +1597,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi dentry); return ERR_PTR(-EFSCORRUPTED); } - inode = ext4_iget_normal(dir->i_sb, ino); + inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", @@ -1639,7 +1639,7 @@ struct dentry *ext4_get_parent(struct dentry *child) return ERR_PTR(-EFSCORRUPTED); } - return d_obtain_alias(ext4_iget_normal(child->d_sb, ino)); + return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL)); } /* diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index d42f7471fd5b..19af346a6651 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1652,7 +1652,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = ext4_iget(sb, EXT4_RESIZE_INO); + inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL); if (IS_ERR(inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(inode); @@ -1980,7 +1980,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) } if (!resize_inode) - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO, + EXT4_IGET_SPECIAL); if (IS_ERR(resize_inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(resize_inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0ced133a36ec..086fcbe2c576 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1126,20 +1126,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, { struct inode *inode; - if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-ESTALE); - if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) - return ERR_PTR(-ESTALE); - - /* iget isn't really right if the inode is currently unallocated!! - * - * ext4_read_inode will return a bad_inode if the inode had been - * deleted, so we should be safe. - * + /* * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = ext4_iget_normal(sb, ino); + inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE); if (IS_ERR(inode)) return ERR_CAST(inode); if (generation && inode->i_generation != generation) { @@ -4370,7 +4361,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = ext4_iget(sb, EXT4_ROOT_INO); + root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL); if (IS_ERR(root)) { ext4_msg(sb, KERN_ERR, "get root inode failed"); ret = PTR_ERR(root); @@ -4620,7 +4611,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, * happen if we iget() an unused inode, as the subsequent iput() * will try to delete it. */ - journal_inode = ext4_iget(sb, journal_inum); + journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL); if (IS_ERR(journal_inode)) { ext4_msg(sb, KERN_ERR, "no journal found"); return NULL; @@ -5693,7 +5684,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, if (!qf_inums[type]) return -EPERM; - qf_inode = ext4_iget(sb, qf_inums[type]); + qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL); if (IS_ERR(qf_inode)) { ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); return PTR_ERR(qf_inode); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f2fde3ac8698..b0873b89dc87 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -383,7 +383,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, struct inode *inode; int err; - inode = ext4_iget(parent->i_sb, ea_ino); + inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { err = PTR_ERR(inode); ext4_error(parent->i_sb, @@ -1486,7 +1486,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, } while (ce) { - ea_inode = ext4_iget(inode->i_sb, ce->e_value); + ea_inode = ext4_iget(inode->i_sb, ce->e_value, + EXT4_IGET_NORMAL); if (!IS_ERR(ea_inode) && !is_bad_inode(ea_inode) && (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) && -- GitLab From 795762468125a6412c089651e74f780bee154118 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 9 Apr 2019 23:37:08 -0400 Subject: [PATCH 1216/1278] ext4: protect journal inode's blocks using block_validity commit 345c0dbf3a30872d9b204db96b5857cd00808cae upstream. Add the blocks which belong to the journal inode to block_validity's system zone so attempts to deallocate or overwrite the journal due a corrupted file system where the journal blocks are also claimed by another inode. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202879 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Ashwin H Signed-off-by: Greg Kroah-Hartman --- fs/ext4/block_validity.c | 48 ++++++++++++++++++++++++++++++++++++++++ fs/ext4/inode.c | 4 ++++ 2 files changed, 52 insertions(+) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index bee888e0e2db..2370dfd3b239 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -137,6 +137,48 @@ static void debug_print_tree(struct ext4_sb_info *sbi) printk(KERN_CONT "\n"); } +static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) +{ + struct inode *inode; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_map_blocks map; + u32 i = 0, err = 0, num, n; + + if ((ino < EXT4_ROOT_INO) || + (ino > le32_to_cpu(sbi->s_es->s_inodes_count))) + return -EINVAL; + inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + while (i < num) { + map.m_lblk = i; + map.m_len = num - i; + n = ext4_map_blocks(NULL, inode, &map, 0); + if (n < 0) { + err = n; + break; + } + if (n == 0) { + i++; + } else { + if (!ext4_data_block_valid(sbi, map.m_pblk, n)) { + ext4_error(sb, "blocks %llu-%llu from inode %u " + "overlap system zone", map.m_pblk, + map.m_pblk + map.m_len - 1, ino); + err = -EFSCORRUPTED; + break; + } + err = add_system_zone(sbi, map.m_pblk, n); + if (err < 0) + break; + i += n; + } + } + iput(inode); + return err; +} + int ext4_setup_system_zone(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); @@ -171,6 +213,12 @@ int ext4_setup_system_zone(struct super_block *sb) if (ret) return ret; } + if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) { + ret = ext4_protect_reserved_inode(sb, + le32_to_cpu(sbi->s_es->s_journal_inum)); + if (ret) + return ret; + } if (test_opt(sb, DEBUG)) debug_print_tree(EXT4_SB(sb)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index af1404d337dc..1bc0037c50aa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -407,6 +407,10 @@ static int __check_block_validity(struct inode *inode, const char *func, unsigned int line, struct ext4_map_blocks *map) { + if (ext4_has_feature_journal(inode->i_sb) && + (inode->i_ino == + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + return 0; if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, -- GitLab From fc3293a80acc469fbabc91bfbf2e65dc84377dc7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 22 May 2019 10:27:01 -0400 Subject: [PATCH 1217/1278] ext4: don't perform block validity checks on the journal inode commit 0a944e8a6c66ca04c7afbaa17e22bf208a8b37f0 upstream. Since the journal inode is already checked when we added it to the block validity's system zone, if we check it again, we'll just trigger a failure. This was causing failures like this: [ 53.897001] EXT4-fs error (device sda): ext4_find_extent:909: inode #8: comm jbd2/sda-8: pblk 121667583 bad header/extent: invalid extent entries - magic f30a, entries 8, max 340(340), depth 0(0) [ 53.931430] jbd2_journal_bmap: journal block not found at offset 49 on sda-8 [ 53.938480] Aborting journal on device sda-8. ... but only if the system was under enough memory pressure that logical->physical mapping for the journal inode gets pushed out of the extent cache. (This is why it wasn't noticed earlier.) Fixes: 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity") Reported-by: Dan Rue Signed-off-by: Theodore Ts'o Tested-by: Naresh Kamboju Signed-off-by: Ashwin H Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 34b79825efdc..4f9eb4b61549 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -554,10 +554,14 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; + if (!ext4_has_feature_journal(inode->i_sb) || + (inode->i_ino != + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; + } set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries -- GitLab From ea8f7a3dfd66f9ad1f65b47159ff38d8c278e5d7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 15 May 2019 00:51:19 -0400 Subject: [PATCH 1218/1278] ext4: fix block validity checks for journal inodes using indirect blocks commit 170417c8c7bb2cbbdd949bf5c443c0c8f24a203b upstream. Commit 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity") failed to add an exception for the journal inode in ext4_check_blockref(), which is the function used by ext4_get_branch() for indirect blocks. This caused attempts to read from the ext3-style journals to fail with: [ 848.968550] EXT4-fs error (device sdb7): ext4_get_branch:171: inode #8: block 30343695: comm jbd2/sdb7-8: invalid block Fix this by adding the missing exception check. Fixes: 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity") Reported-by: Arthur Marsh Signed-off-by: Theodore Ts'o Signed-off-by: Ashwin H Signed-off-by: Greg Kroah-Hartman --- fs/ext4/block_validity.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 2370dfd3b239..7ba892087d91 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -275,6 +275,11 @@ int ext4_check_blockref(const char *function, unsigned int line, __le32 *bref = p; unsigned int blk; + if (ext4_has_feature_journal(inode->i_sb) && + (inode->i_ino == + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + return 0; + while (bref < p+max) { blk = le32_to_cpu(*bref++); if (blk && -- GitLab From 22dec5f46b404668d3bebe599ee4cff42501f7e7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 10 May 2019 22:06:38 -0400 Subject: [PATCH 1219/1278] ext4: unsigned int compared against zero commit fbbbbd2f28aec991f3fbc248df211550fbdfd58c upstream. There are two cases where u32 variables n and err are being checked for less than zero error values, the checks is always false because the variables are not signed. Fix this by making the variables ints. Addresses-Coverity: ("Unsigned compared against 0") Fixes: 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity") Signed-off-by: Colin Ian King Signed-off-by: Theodore Ts'o Signed-off-by: Ashwin H Signed-off-by: Greg Kroah-Hartman --- fs/ext4/block_validity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 7ba892087d91..13eb028607ca 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -142,7 +142,8 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) struct inode *inode; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_map_blocks map; - u32 i = 0, err = 0, num, n; + u32 i = 0, num; + int err = 0, n; if ((ino < EXT4_ROOT_INO) || (ino > le32_to_cpu(sbi->s_es->s_inodes_count))) -- GitLab From 5b5a948a44bf78187636ce934e3f2e7bdb30ecf1 Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Sun, 29 Mar 2020 20:32:49 +0300 Subject: [PATCH 1220/1278] qed: Fix use after free in qed_chain_free commit 8063f761cd7c17fc1d0018728936e0c33a25388a upstream. The qed_chain data structure was modified in commit 1a4a69751f4d ("qed: Chain support for external PBL") to support receiving an external pbl (due to iWARP FW requirements). The pages pointed to by the pbl are allocated in qed_chain_alloc and their virtual address are stored in an virtual addresses array to enable accessing and freeing the data. The physical addresses however weren't stored and were accessed directly from the external-pbl during free. Destroy-qp flow, leads to freeing the external pbl before the chain is freed, when the chain is freed it tries accessing the already freed external pbl, leading to a use-after-free. Therefore we need to store the physical addresses in additional to the virtual addresses in a new data structure. Fixes: 1a4a69751f4d ("qed: Chain support for external PBL") Signed-off-by: Michal Kalderon Signed-off-by: Yuval Bason Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 38 ++++++++++------------- include/linux/qed/qed_chain.h | 24 ++++++++------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index c4e8bf0773fe..6024b832b4d9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3151,26 +3151,20 @@ static void qed_chain_free_single(struct qed_dev *cdev, static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) { - void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl = p_chain->pbl.pp_addr_tbl; u32 page_cnt = p_chain->page_cnt, i, pbl_size; - u8 *p_pbl_virt = p_chain->pbl_sp.p_virt_table; - if (!pp_virt_addr_tbl) + if (!pp_addr_tbl) return; - if (!p_pbl_virt) - goto out; - for (i = 0; i < page_cnt; i++) { - if (!pp_virt_addr_tbl[i]) + if (!pp_addr_tbl[i].virt_addr || !pp_addr_tbl[i].dma_map) break; dma_free_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, - pp_virt_addr_tbl[i], - *(dma_addr_t *)p_pbl_virt); - - p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE; + pp_addr_tbl[i].virt_addr, + pp_addr_tbl[i].dma_map); } pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE; @@ -3180,9 +3174,9 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) pbl_size, p_chain->pbl_sp.p_virt_table, p_chain->pbl_sp.p_phys_table); -out: - vfree(p_chain->pbl.pp_virt_addr_tbl); - p_chain->pbl.pp_virt_addr_tbl = NULL; + + vfree(p_chain->pbl.pp_addr_tbl); + p_chain->pbl.pp_addr_tbl = NULL; } void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain) @@ -3283,19 +3277,19 @@ qed_chain_alloc_pbl(struct qed_dev *cdev, { u32 page_cnt = p_chain->page_cnt, size, i; dma_addr_t p_phys = 0, p_pbl_phys = 0; - void **pp_virt_addr_tbl = NULL; + struct addr_tbl_entry *pp_addr_tbl; u8 *p_pbl_virt = NULL; void *p_virt = NULL; - size = page_cnt * sizeof(*pp_virt_addr_tbl); - pp_virt_addr_tbl = vzalloc(size); - if (!pp_virt_addr_tbl) + size = page_cnt * sizeof(*pp_addr_tbl); + pp_addr_tbl = vzalloc(size); + if (!pp_addr_tbl) return -ENOMEM; /* The allocation of the PBL table is done with its full size, since it * is expected to be successive. * qed_chain_init_pbl_mem() is called even in a case of an allocation - * failure, since pp_virt_addr_tbl was previously allocated, and it + * failure, since tbl was previously allocated, and it * should be saved to allow its freeing during the error flow. */ size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE; @@ -3309,8 +3303,7 @@ qed_chain_alloc_pbl(struct qed_dev *cdev, p_chain->b_external_pbl = true; } - qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, - pp_virt_addr_tbl); + qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, pp_addr_tbl); if (!p_pbl_virt) return -ENOMEM; @@ -3329,7 +3322,8 @@ qed_chain_alloc_pbl(struct qed_dev *cdev, /* Fill the PBL table with the physical address of the page */ *(dma_addr_t *)p_pbl_virt = p_phys; /* Keep the virtual address of the page */ - p_chain->pbl.pp_virt_addr_tbl[i] = p_virt; + p_chain->pbl.pp_addr_tbl[i].virt_addr = p_virt; + p_chain->pbl.pp_addr_tbl[i].dma_map = p_phys; p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE; } diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 2dd0a9ed5b36..733fad7dfbed 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -97,6 +97,11 @@ struct qed_chain_u32 { u32 cons_idx; }; +struct addr_tbl_entry { + void *virt_addr; + dma_addr_t dma_map; +}; + struct qed_chain { /* fastpath portion of the chain - required for commands such * as produce / consume. @@ -107,10 +112,11 @@ struct qed_chain { /* Fastpath portions of the PBL [if exists] */ struct { - /* Table for keeping the virtual addresses of the chain pages, - * respectively to the physical addresses in the pbl table. + /* Table for keeping the virtual and physical addresses of the + * chain pages, respectively to the physical addresses + * in the pbl table. */ - void **pp_virt_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl; union { struct qed_chain_pbl_u16 u16; @@ -287,7 +293,7 @@ qed_chain_advance_page(struct qed_chain *p_chain, *(u32 *)page_to_inc = 0; page_index = *(u32 *)page_to_inc; } - *p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index]; + *p_next_elem = p_chain->pbl.pp_addr_tbl[page_index].virt_addr; } } @@ -537,7 +543,7 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->pbl_sp.p_phys_table = 0; p_chain->pbl_sp.p_virt_table = NULL; - p_chain->pbl.pp_virt_addr_tbl = NULL; + p_chain->pbl.pp_addr_tbl = NULL; } /** @@ -575,11 +581,11 @@ static inline void qed_chain_init_mem(struct qed_chain *p_chain, static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, void *p_virt_pbl, dma_addr_t p_phys_pbl, - void **pp_virt_addr_tbl) + struct addr_tbl_entry *pp_addr_tbl) { p_chain->pbl_sp.p_phys_table = p_phys_pbl; p_chain->pbl_sp.p_virt_table = p_virt_pbl; - p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; + p_chain->pbl.pp_addr_tbl = pp_addr_tbl; } /** @@ -644,7 +650,7 @@ static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain) break; case QED_CHAIN_MODE_PBL: last_page_idx = p_chain->page_cnt - 1; - p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx]; + p_virt_addr = p_chain->pbl.pp_addr_tbl[last_page_idx].virt_addr; break; } /* p_virt_addr points at this stage to the last page of the chain */ @@ -716,7 +722,7 @@ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) page_cnt = qed_chain_get_page_cnt(p_chain); for (i = 0; i < page_cnt; i++) - memset(p_chain->pbl.pp_virt_addr_tbl[i], 0, + memset(p_chain->pbl.pp_addr_tbl[i].virt_addr, 0, QED_CHAIN_PAGE_SIZE); } -- GitLab From c65024b37616749838ee74a8c9a57fbd875e10bd Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Mon, 16 Mar 2020 15:00:38 +0530 Subject: [PATCH 1221/1278] ext4: check for non-zero journal inum in ext4_calculate_overhead commit f1eec3b0d0a849996ebee733b053efa71803dad5 upstream. While calculating overhead for internal journal, also check that j_inum shouldn't be 0. Otherwise we get below error with xfstests generic/050 with external journal (XXX_LOGDEV config) enabled. It could be simply reproduced with loop device with an external journal and marking blockdev as RO before mounting. [ 3337.146838] EXT4-fs error (device pmem1p2): ext4_get_journal_inode:4634: comm mount: inode #0: comm mount: iget: illegal inode # ------------[ cut here ]------------ generic_make_request: Trying to write to read-only block-device pmem1p2 (partno 2) WARNING: CPU: 107 PID: 115347 at block/blk-core.c:788 generic_make_request_checks+0x6b4/0x7d0 CPU: 107 PID: 115347 Comm: mount Tainted: G L --------- -t - 4.18.0-167.el8.ppc64le #1 NIP: c0000000006f6d44 LR: c0000000006f6d40 CTR: 0000000030041dd4 <...> NIP [c0000000006f6d44] generic_make_request_checks+0x6b4/0x7d0 LR [c0000000006f6d40] generic_make_request_checks+0x6b0/0x7d0 <...> Call Trace: generic_make_request_checks+0x6b0/0x7d0 (unreliable) generic_make_request+0x3c/0x420 submit_bio+0xd8/0x200 submit_bh_wbc+0x1e8/0x250 __sync_dirty_buffer+0xd0/0x210 ext4_commit_super+0x310/0x420 [ext4] __ext4_error+0xa4/0x1e0 [ext4] __ext4_iget+0x388/0xe10 [ext4] ext4_get_journal_inode+0x40/0x150 [ext4] ext4_calculate_overhead+0x5a8/0x610 [ext4] ext4_fill_super+0x3188/0x3260 [ext4] mount_bdev+0x778/0x8f0 ext4_mount+0x28/0x50 [ext4] mount_fs+0x74/0x230 vfs_kern_mount.part.6+0x6c/0x250 do_mount+0x2fc/0x1280 sys_mount+0x158/0x180 system_call+0x5c/0x70 EXT4-fs (pmem1p2): no journal found EXT4-fs (pmem1p2): can't get journal size EXT4-fs (pmem1p2): mounted filesystem without journal. Opts: dax,norecovery Fixes: 3c816ded78bb ("ext4: use journal inode to determine journal overhead") Reported-by: Harish Sriram Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200316093038.25485-1-riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 086fcbe2c576..ffc985d78137 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3456,7 +3456,8 @@ int ext4_calculate_overhead(struct super_block *sb) */ if (sbi->s_journal && !sbi->journal_bdev) overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); - else if (ext4_has_feature_journal(sb) && !sbi->s_journal) { + else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { + /* j_inum for internal journal is non-zero */ j_inode = ext4_get_journal_inode(sb, j_inum); if (j_inode) { j_blocks = j_inode->i_size >> sb->s_blocksize_bits; -- GitLab From 97699864e36616399c7b98b17ea64548b7922523 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Apr 2020 10:26:22 -0400 Subject: [PATCH 1222/1278] propagate_one(): mnt_set_mountpoint() needs mount_lock commit b0d3869ce9eeacbb1bbd541909beeef4126426d5 upstream. ... to protect the modification of mp->m_count done by it. Most of the places that modify that thing also have namespace_lock held, but not all of them can do so, so we really need mount_lock here. Kudos to Piotr Krysiuk , who'd spotted a related bug in pivot_root(2) (fixed unnoticed in 5.3); search for other similar turds has caught out this one. Cc: stable@kernel.org Signed-off-by: Al Viro Signed-off-by: Piotr Krysiuk Signed-off-by: Greg Kroah-Hartman --- fs/pnode.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 53d411a371ce..7910ae91f17e 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -266,14 +266,13 @@ static int propagate_one(struct mount *m) if (IS_ERR(child)) return PTR_ERR(child); child->mnt.mnt_flags &= ~MNT_LOCKED; + read_seqlock_excl(&mount_lock); mnt_set_mountpoint(m, mp, child); + if (m->mnt_master != dest_master) + SET_MNT_MARK(m->mnt_master); + read_sequnlock_excl(&mount_lock); last_dest = m; last_source = child; - if (m->mnt_master != dest_master) { - read_seqlock_excl(&mount_lock); - SET_MNT_MARK(m->mnt_master); - read_sequnlock_excl(&mount_lock); - } hlist_add_head(&child->mnt_hash, list); return count_mounts(m->mnt_ns, child); } -- GitLab From 773e2b1cd56a17bab4cdd4fe7db12f2140951668 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 2 May 2020 17:24:47 +0200 Subject: [PATCH 1223/1278] Linux 4.14.178 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d81fb98737f7..73e93e596e50 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 177 +SUBLEVEL = 178 EXTRAVERSION = NAME = Petit Gorille -- GitLab From e282124750ba405df0499a64cacdc0cf45472572 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 4 May 2020 10:55:19 -0700 Subject: [PATCH 1224/1278] ANDROID: Incremental fs: Fix issues with very large files Test: incfs_test passes Bug: 155590527 Signed-off-by: Paul Lawrence Change-Id: Iaecfcd40e8c089d11b34c7aff2090fbfe0c36219 (cherry picked from commit 3e4fa206ce8ae4d3141af7514cb5d3d813cd4290) --- fs/incfs/vfs.c | 7 ++- .../selftests/filesystems/incfs/incfs_test.c | 60 +++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index a2932d3a4e8f..d2c82047dd7c 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -896,7 +896,8 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, .mnt = mi->mi_backing_dir_path.mnt, .dentry = dentry }; - new_file = dentry_open(&path, O_RDWR | O_NOATIME, mi->mi_owner); + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, + mi->mi_owner); if (IS_ERR(new_file)) { error = PTR_ERR(new_file); @@ -1904,8 +1905,8 @@ static int file_open(struct inode *inode, struct file *file) int err = 0; get_incfs_backing_path(file->f_path.dentry, &backing_path); - backing_file = dentry_open(&backing_path, O_RDWR | O_NOATIME, - mi->mi_owner); + backing_file = dentry_open( + &backing_path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); path_put(&backing_path); if (IS_ERR(backing_file)) { diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 0fc4c04c514a..6809399eac97 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2609,6 +2609,65 @@ static int get_hash_blocks_test(char *mount_dir) return TEST_FAILURE; } +static int large_file(char *mount_dir) +{ + char *backing_dir; + int cmd_fd = -1; + int i; + int result = TEST_FAILURE; + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + int block_count = 3LL * 1024 * 1024 * 1024 / INCFS_DATA_FILE_BLOCK_SIZE; + struct incfs_fill_block *block_buf = + calloc(block_count, sizeof(struct incfs_fill_block)); + struct incfs_fill_blocks fill_blocks = { + .count = block_count, + .fill_blocks = ptr_to_u64(block_buf), + }; + incfs_uuid_t id; + int fd; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + if (emit_file(cmd_fd, NULL, "very_large_file", &id, + (uint64_t)block_count * INCFS_DATA_FILE_BLOCK_SIZE, + NULL) < 0) + goto failure; + + for (i = 0; i < block_count; i++) { + block_buf[i].compression = COMPRESSION_NONE; + block_buf[i].block_index = i; + block_buf[i].data_len = INCFS_DATA_FILE_BLOCK_SIZE; + block_buf[i].data = ptr_to_u64(data); + } + + fd = open_file_by_id(mount_dir, id, true); + if (fd < 0) + goto failure; + + if (ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks) != block_count) + goto failure; + + if (emit_file(cmd_fd, NULL, "very_very_large_file", &id, 1LL << 40, + NULL) < 0) + goto failure; + + result = TEST_SUCCESS; + +failure: + close(fd); + close(cmd_fd); + return result; +} + static char *setup_mount_dir() { struct stat st; @@ -2678,6 +2737,7 @@ int main(int argc, char *argv[]) MAKE_TEST(read_log_test), MAKE_TEST(get_blocks_test), MAKE_TEST(get_hash_blocks_test), + MAKE_TEST(large_file), }; #undef MAKE_TEST -- GitLab From b4586e02a3457b877a0fd2bc7734112d91d451a6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Dec 2018 22:34:31 -0500 Subject: [PATCH 1225/1278] ext4: fix special inode number checks in __ext4_iget() commit 191ce17876c9367819c4b0a25b503c0f6d9054d8 upstream. The check for special (reserved) inode number checks in __ext4_iget() was broken by commit 8a363970d1dc: ("ext4: avoid declaring fs inconsistent due to invalid file handles"). This was caused by a botched reversal of the sense of the flag now known as EXT4_IGET_SPECIAL (when it was previously named EXT4_IGET_NORMAL). Fix the logic appropriately. Fixes: 8a363970d1dc ("ext4: avoid declaring fs inconsistent...") Signed-off-by: Theodore Ts'o Reported-by: Dan Carpenter Cc: stable@kernel.org Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1bc0037c50aa..5f03a4fabeaa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4715,7 +4715,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, gid_t i_gid; projid_t i_projid; - if (((flags & EXT4_IGET_NORMAL) && + if ((!(flags & EXT4_IGET_SPECIAL) && (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) || (ino < EXT4_ROOT_INO) || (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { -- GitLab From 4ad0ae87d0e5e004a95e150c7df7c4ba5342e454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Apr 2020 18:17:43 +0300 Subject: [PATCH 1226/1278] drm/edid: Fix off-by-one in DispID DTD pixel clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6292b8efe32e6be408af364132f09572aed14382 upstream. The DispID DTD pixel clock is documented as: "00 00 00 h → FF FF FF h | Pixel clock ÷ 10,000 0.01 → 167,772.16 Mega Pixels per Sec" Which seems to imply that we to add one to the raw value. Reality seems to agree as there are tiled displays in the wild which currently show a 10kHz difference in the pixel clock between the tiles (one tile gets its mode from the base EDID, the other from the DispID block). Cc: stable@vger.kernel.org References: https://gitlab.freedesktop.org/drm/intel/-/issues/27 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200423151743.18767-1-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index dfdc7d3147fb..51276dd0d864 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4502,7 +4502,7 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d struct drm_display_mode *mode; unsigned pixel_clock = (timings->pixel_clock[0] | (timings->pixel_clock[1] << 8) | - (timings->pixel_clock[2] << 16)); + (timings->pixel_clock[2] << 16)) + 1; unsigned hactive = (timings->hactive[0] | timings->hactive[1] << 8) + 1; unsigned hblank = (timings->hblank[0] | timings->hblank[1] << 8) + 1; unsigned hsync = (timings->hsync[0] | (timings->hsync[1] & 0x7f) << 8) + 1; -- GitLab From f4027b733eedca270354c9c0a6704b4eadffa289 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 27 Apr 2020 08:32:46 +0300 Subject: [PATCH 1227/1278] drm/qxl: qxl_release leak in qxl_draw_dirty_fb() commit 85e9b88af1e6164f19ec71381efd5e2bcfc17620 upstream. ret should be changed to release allocated struct qxl_release Cc: stable@vger.kernel.org Fixes: 8002db6336dd ("qxl: convert qxl driver to proper use for reservations") Signed-off-by: Vasily Averin Link: http://patchwork.freedesktop.org/patch/msgid/22cfd55f-07c8-95d0-a2f7-191b7153c3d4@virtuozzo.com Signed-off-by: Gerd Hoffmann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_draw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c index 4d8681e84e68..67f3bdeaa3bf 100644 --- a/drivers/gpu/drm/qxl/qxl_draw.c +++ b/drivers/gpu/drm/qxl/qxl_draw.c @@ -348,9 +348,10 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, goto out_release_backoff; rects = drawable_set_clipping(qdev, num_clips, clips_bo); - if (!rects) + if (!rects) { + ret = -EINVAL; goto out_release_backoff; - + } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->clip.type = SPICE_CLIP_TYPE_RECTS; -- GitLab From 1c78ac39f10c375145f872c9a8a7f6e412d21040 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 27 Apr 2020 08:32:51 +0300 Subject: [PATCH 1228/1278] drm/qxl: qxl_release leak in qxl_hw_surface_alloc() commit a65aa9c3676ffccb21361d52fcfedd5b5ff387d7 upstream. Cc: stable@vger.kernel.org Fixes: 8002db6336dd ("qxl: convert qxl driver to proper use for reservations") Signed-off-by: Vasily Averin Link: http://patchwork.freedesktop.org/patch/msgid/2e5a13ae-9ab2-5401-aa4d-03d5f5593423@virtuozzo.com Signed-off-by: Gerd Hoffmann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_cmd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 3eb920851141..548bda3c4454 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -504,9 +504,10 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, return ret; ret = qxl_release_reserve_list(release, true); - if (ret) + if (ret) { + qxl_release_free(qdev, release); return ret; - + } cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_SURFACE_CMD_CREATE; cmd->flags = QXL_SURF_FLAG_KEEP_DATA; -- GitLab From 500a886a3ac7f4d33638ea5375ef70cc89a5805b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 29 Apr 2020 12:01:24 +0300 Subject: [PATCH 1229/1278] drm/qxl: qxl_release use after free commit 933db73351d359f74b14f4af095808260aff11f9 upstream. qxl_release should not be accesses after qxl_push_*_ring_release() calls: userspace driver can process submitted command quickly, move qxl_release into release_ring, generate interrupt and trigger garbage collector. It can lead to crashes in qxl driver or trigger memory corruption in some kmalloc-192 slab object Gerd Hoffmann proposes to swap the qxl_release_fence_buffer_objects() + qxl_push_{cursor,command}_ring_release() calls to close that race window. cc: stable@vger.kernel.org Fixes: f64122c1f6ad ("drm: add new QXL driver. (v1.4)") Signed-off-by: Vasily Averin Link: http://patchwork.freedesktop.org/patch/msgid/fa17b338-66ae-f299-68fe-8d32419d9071@virtuozzo.com Signed-off-by: Gerd Hoffmann [backported to v4.14-stable] Signed-off-by: Vasily Averin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_cmd.c | 5 ++--- drivers/gpu/drm/qxl/qxl_display.c | 6 +++--- drivers/gpu/drm/qxl/qxl_draw.c | 8 ++++---- drivers/gpu/drm/qxl/qxl_ioctl.c | 5 +---- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 548bda3c4454..8334afa70b94 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -533,8 +533,8 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, /* no need to add a release to the fence for this surface bo, since it is only released when we ask to destroy the surface and it would never signal otherwise */ - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); surf->hw_surf_alloc = true; spin_lock(&qdev->surf_id_idr_lock); @@ -576,9 +576,8 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev, cmd->surface_id = id; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); - qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 573bab222123..b209a25e307d 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -533,8 +533,8 @@ static int qxl_primary_apply_cursor(struct drm_plane *plane) cmd->u.set.visible = 1; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); return ret; @@ -701,8 +701,8 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, cmd->u.position.y = plane->state->crtc_y + fb->hot_y; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); if (old_cursor_bo) qxl_bo_unref(&old_cursor_bo); @@ -747,8 +747,8 @@ static void qxl_cursor_atomic_disable(struct drm_plane *plane, cmd->type = QXL_CURSOR_HIDE; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); } static int qxl_plane_prepare_fb(struct drm_plane *plane, diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c index 67f3bdeaa3bf..d009f2bc28e9 100644 --- a/drivers/gpu/drm/qxl/qxl_draw.c +++ b/drivers/gpu/drm/qxl/qxl_draw.c @@ -241,8 +241,8 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image, qxl_bo_physical_address(qdev, dimage->bo, 0); qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_free_palette: if (palette_bo) @@ -382,8 +382,8 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, } qxl_bo_kunmap(clips_bo); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_release_backoff: if (ret) @@ -433,8 +433,8 @@ void qxl_draw_copyarea(struct qxl_device *qdev, drawable->u.copy_bits.src_pos.y = sy; qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_free_release: if (ret) @@ -477,8 +477,8 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_free_release: if (ret) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 31effed4a3c8..cede17585525 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -257,11 +257,8 @@ static int qxl_process_single_command(struct qxl_device *qdev, apply_surf_reloc(qdev, &reloc_info[i]); } + qxl_release_fence_buffer_objects(release); ret = qxl_push_command_ring_release(qdev, release, cmd->type, true); - if (ret) - qxl_release_backoff_reserve_list(release); - else - qxl_release_fence_buffer_objects(release); out_free_bos: out_free_release: -- GitLab From 4f7ade415291d187d61f34fd85e925bbd96d1f47 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Tue, 21 Apr 2020 10:54:11 +0800 Subject: [PATCH 1230/1278] btrfs: fix block group leak when removing fails commit f6033c5e333238f299c3ae03fac8cc1365b23b77 upstream. btrfs_remove_block_group() invokes btrfs_lookup_block_group(), which returns a local reference of the block group that contains the given bytenr to "block_group" with increased refcount. When btrfs_remove_block_group() returns, "block_group" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in several exception handling paths of btrfs_remove_block_group(). When those error scenarios occur such as btrfs_alloc_path() returns NULL, the function forgets to decrease its refcnt increased by btrfs_lookup_block_group() and will cause a refcnt leak. Fix this issue by jumping to "out_put_group" label and calling btrfs_put_block_group() when those error scenarios occur. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fd15f396b3a0..51e26f90f0bb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10554,7 +10554,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out; + goto out_put_group; } /* @@ -10591,7 +10591,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out; + goto out_put_group; } clear_nlink(inode); /* One for the block groups ref */ @@ -10614,13 +10614,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out; + goto out_put_group; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out; + goto out_put_group; btrfs_release_path(path); } @@ -10778,9 +10778,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, fs_info, block_group); if (ret) - goto out; + goto out_put_group; - btrfs_put_block_group(block_group); + /* Once for the block groups rbtree */ btrfs_put_block_group(block_group); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -10790,6 +10790,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; ret = btrfs_del_item(trans, root, path); + +out_put_group: + /* Once for the lookup reference */ + btrfs_put_block_group(block_group); out: btrfs_free_path(path); return ret; -- GitLab From 3aaee8e2fe53c59b8c85ed17118b7e795ac6bd99 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 23 Apr 2020 16:30:53 +0100 Subject: [PATCH 1231/1278] btrfs: fix partial loss of prealloc extent past i_size after fsync commit f135cea30de5f74d5bfb5116682073841fb4af8f upstream. When we have an inode with a prealloc extent that starts at an offset lower than the i_size and there is another prealloc extent that starts at an offset beyond i_size, we can end up losing part of the first prealloc extent (the part that starts at i_size) and have an implicit hole if we fsync the file and then have a power failure. Consider the following example with comments explaining how and why it happens. $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt # Create our test file with 2 consecutive prealloc extents, each with a # size of 128Kb, and covering the range from 0 to 256Kb, with a file # size of 0. $ xfs_io -f -c "falloc -k 0 128K" /mnt/foo $ xfs_io -c "falloc -k 128K 128K" /mnt/foo # Fsync the file to record both extents in the log tree. $ xfs_io -c "fsync" /mnt/foo # Now do a redudant extent allocation for the range from 0 to 64Kb. # This will merely increase the file size from 0 to 64Kb. Instead we # could also do a truncate to set the file size to 64Kb. $ xfs_io -c "falloc 0 64K" /mnt/foo # Fsync the file, so we update the inode item in the log tree with the # new file size (64Kb). This also ends up setting the number of bytes # for the first prealloc extent to 64Kb. This is done by the truncation # at btrfs_log_prealloc_extents(). # This means that if a power failure happens after this, a write into # the file range 64Kb to 128Kb will not use the prealloc extent and # will result in allocation of a new extent. $ xfs_io -c "fsync" /mnt/foo # Now set the file size to 256K with a truncate and then fsync the file. # Since no changes happened to the extents, the fsync only updates the # i_size in the inode item at the log tree. This results in an implicit # hole for the file range from 64Kb to 128Kb, something which fsck will # complain when not using the NO_HOLES feature if we replay the log # after a power failure. $ xfs_io -c "truncate 256K" -c "fsync" /mnt/foo So instead of always truncating the log to the inode's current i_size at btrfs_log_prealloc_extents(), check first if there's a prealloc extent that starts at an offset lower than the i_size and with a length that crosses the i_size - if there is one, just make sure we truncate to a size that corresponds to the end offset of that prealloc extent, so that we don't lose the part of that extent that starts at i_size if a power failure happens. A test case for fstests follows soon. Fixes: 31d11b83b96f ("Btrfs: fix duplicate extents after fsync of file with prealloc extents") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0b62c8080af0..bcfb7a772c8e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4155,6 +4155,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; bool dropped_extents = false; + u64 truncate_offset = i_size; + struct extent_buffer *leaf; + int slot; int ins_nr = 0; int start_slot; int ret; @@ -4169,9 +4172,43 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (ret < 0) goto out; + /* + * We must check if there is a prealloc extent that starts before the + * i_size and crosses the i_size boundary. This is to ensure later we + * truncate down to the end of that extent and not to the i_size, as + * otherwise we end up losing part of the prealloc extent after a log + * replay and with an implicit hole if there is another prealloc extent + * that starts at an offset beyond i_size. + */ + ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); + if (ret < 0) + goto out; + + if (ret == 0) { + struct btrfs_file_extent_item *ei; + + leaf = path->nodes[0]; + slot = path->slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) == + BTRFS_FILE_EXTENT_PREALLOC) { + u64 extent_end; + + btrfs_item_key_to_cpu(leaf, &key, slot); + extent_end = key.offset + + btrfs_file_extent_num_bytes(leaf, ei); + + if (extent_end > i_size) + truncate_offset = extent_end; + } + } else { + ret = 0; + } + while (true) { - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; + leaf = path->nodes[0]; + slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { @@ -4209,7 +4246,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, root->log_root, &inode->vfs_inode, - i_size, + truncate_offset, BTRFS_EXTENT_DATA_KEY); } while (ret == -EAGAIN); if (ret) -- GitLab From a923cdf0883a4f4548ec7c940e3473d858f6e9a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Mon, 20 Apr 2020 10:04:44 +0200 Subject: [PATCH 1232/1278] mmc: sdhci-xenon: fix annoying 1.8V regulator warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bb32e1987bc55ce1db400faf47d85891da3c9b9f upstream. For some reason the Host Control2 register of the Xenon SDHCI controller sometimes reports the bit representing 1.8V signaling as 0 when read after it was written as 1. Subsequent read reports 1. This causes the sdhci_start_signal_voltage_switch function to report 1.8V regulator output did not become stable When CONFIG_PM is enabled, the host is suspended and resumend many times, and in each resume the switch to 1.8V is called, and so the kernel log reports this message annoyingly often. Do an empty read of the Host Control2 register in Xenon's .voltage_switch method to circumvent this. This patch fixes this particular problem on Turris MOX. Signed-off-by: Marek Behún Fixes: 8d876bf472db ("mmc: sdhci-xenon: wait 5ms after set 1.8V...") Cc: stable@vger.kernel.org # v4.16+ Link: https://lore.kernel.org/r/20200420080444.25242-1-marek.behun@nic.cz Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-xenon.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index a0b5089b3274..fafb02644efd 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -238,6 +238,16 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); + + /* + * For some reason the controller's Host Control2 register reports + * the bit representing 1.8V signaling as 0 when read after it was + * written as 1. Subsequent read reports 1. + * + * Since this may cause some issues, do an empty read of the Host + * Control2 register here to circumvent this. + */ + sdhci_readw(host, SDHCI_HOST_CONTROL2); } static const struct sdhci_ops sdhci_xenon_ops = { -- GitLab From 08e05f4f650703106f91933e50e40f8244dcb6a7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 22 Apr 2020 14:16:29 +0300 Subject: [PATCH 1233/1278] mmc: sdhci-pci: Fix eMMC driver strength for BYT-based controllers commit 1a8eb6b373c2af6533c13d1ea11f504e5010ed9a upstream. BIOS writers have begun the practice of setting 40 ohm eMMC driver strength even though the eMMC may not support it, on the assumption that the kernel will validate the value against the eMMC (Extended CSD DRIVER_STRENGTH [offset 197]) and revert to the default 50 ohm value if 40 ohm is invalid. This is done to avoid changing the value for different boards. Putting aside the merits of this approach, it is clear the eMMC's mask of supported driver strengths is more reliable than the value provided by BIOS. Add validation accordingly. Signed-off-by: Adrian Hunter Fixes: 51ced59cc02e ("mmc: sdhci-pci: Use ACPI DSM to get driver strength for some Intel devices") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200422111629.4899-1-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 0e386f5cc836..4bc89551229b 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -490,6 +490,9 @@ static int intel_select_drive_strength(struct mmc_card *card, struct sdhci_pci_slot *slot = sdhci_priv(host); struct intel_host *intel_host = sdhci_pci_priv(slot); + if (!(mmc_driver_type_mask(intel_host->drv_strength) & card_drv)) + return 0; + return intel_host->drv_strength; } -- GitLab From a088316c38b98d1f3e45c6d85e773b130768a909 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 27 Apr 2020 11:00:39 +0800 Subject: [PATCH 1234/1278] ALSA: hda/realtek - Two front mics on a Lenovo ThinkCenter commit ef0b3203c758b6b8abdb5dca651880347eae6b8c upstream. This new Lenovo ThinkCenter has two front mics which can't be handled by PA so far, so apply the fixup ALC283_FIXUP_HEADSET_MIC to change the location for one of the mics. Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20200427030039.10121-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 288ea05dfa3c..b2aec97414fb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6590,6 +6590,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), -- GitLab From 7d8378ca26173c0f318adb41fecf39fa66f1684f Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Sun, 26 Apr 2020 21:17:22 +0800 Subject: [PATCH 1235/1278] ALSA: hda/hdmi: fix without unlocked before return commit a2f647240998aa49632fb09b01388fdf2b87acfc upstream. Fix the following coccicheck warning: sound/pci/hda/patch_hdmi.c:1852:2-8: preceding lock on line 1846 After add sanity check to pass klockwork check, The spdif_mutex should be unlock before return true in check_non_pcm_per_cvt(). Fixes: 960a581e22d9 ("ALSA: hda: fix some klockwork scan warnings") Signed-off-by: Wu Bo Cc: Link: https://lore.kernel.org/r/1587907042-694161-1-git-send-email-wubo40@huawei.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 12913368c231..435c0efb9bf2 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1849,8 +1849,10 @@ static bool check_non_pcm_per_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) /* Add sanity check to pass klockwork check. * This should never happen. */ - if (WARN_ON(spdif == NULL)) + if (WARN_ON(spdif == NULL)) { + mutex_unlock(&codec->spdif_mutex); return true; + } non_pcm = !!(spdif->status & IEC958_AES0_NONAUDIO); mutex_unlock(&codec->spdif_mutex); return non_pcm; -- GitLab From 55052736ae668518abf01f7c59f5d58af2d07857 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Apr 2020 21:33:50 +0200 Subject: [PATCH 1236/1278] ALSA: pcm: oss: Place the plugin buffer overflow checks correctly commit 4285de0725b1bf73608abbcd35ad7fd3ddc0b61e upstream. The checks of the plugin buffer overflow in the previous fix by commit f2ecf903ef06 ("ALSA: pcm: oss: Avoid plugin buffer overflow") are put in the wrong places mistakenly, which leads to the expected (repeated) sound when the rate plugin is involved. Fix in the right places. Also, at those right places, the zero check is needed for the termination node, so added there as well, and let's get it done, finally. Fixes: f2ecf903ef06 ("ALSA: pcm: oss: Avoid plugin buffer overflow") Cc: Link: https://lore.kernel.org/r/20200424193350.19678-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_plugin.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 3ecc070738e8..c1315ce98b54 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -211,21 +211,23 @@ static snd_pcm_sframes_t plug_client_size(struct snd_pcm_substream *plug, if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_last(plug); while (plugin && drv_frames > 0) { - if (check_size && drv_frames > plugin->buf_frames) - drv_frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) drv_frames = plugin->src_frames(plugin, drv_frames); + if (check_size && plugin->buf_frames && + drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin = plugin_prev; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_first(plug); while (plugin && drv_frames > 0) { plugin_next = plugin->next; + if (check_size && plugin->buf_frames && + drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); - if (check_size && drv_frames > plugin->buf_frames) - drv_frames = plugin->buf_frames; plugin = plugin_next; } } else @@ -251,26 +253,28 @@ static snd_pcm_sframes_t plug_slave_size(struct snd_pcm_substream *plug, plugin = snd_pcm_plug_first(plug); while (plugin && frames > 0) { plugin_next = plugin->next; + if (check_size && plugin->buf_frames && + frames > plugin->buf_frames) + frames = plugin->buf_frames; if (plugin->dst_frames) { frames = plugin->dst_frames(plugin, frames); if (frames < 0) return frames; } - if (check_size && frames > plugin->buf_frames) - frames = plugin->buf_frames; plugin = plugin_next; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_last(plug); while (plugin) { - if (check_size && frames > plugin->buf_frames) - frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames); if (frames < 0) return frames; } + if (check_size && plugin->buf_frames && + frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin = plugin_prev; } } else -- GitLab From 79a3b3bb1cd1df58e2e7fb53f0e82b0ffcba28ab Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 21 Apr 2020 15:55:16 +0800 Subject: [PATCH 1237/1278] PM: ACPI: Output correct message on target power state commit a9b760b0266f563b4784f695bbd0e717610dc10a upstream. Transitioned power state logged at the end of setting ACPI power. However, D3cold won't be in the message because state can only be D3hot at most. Use target_state to corretly report when power state is D3cold. Cc: All applicable Signed-off-by: Kai-Heng Feng Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index fc300ce3ae8e..afb1bc104a6f 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -227,13 +227,13 @@ int acpi_device_set_power(struct acpi_device *device, int state) end: if (result) { dev_warn(&device->dev, "Failed to change power state to %s\n", - acpi_power_state_string(state)); + acpi_power_state_string(target_state)); } else { device->power.state = target_state; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] transitioned to %s\n", device->pnp.bus_id, - acpi_power_state_string(state))); + acpi_power_state_string(target_state))); } return result; -- GitLab From 3ad04be60c18651061096d42c5056bb28a5a4ce4 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 23 Apr 2020 20:40:16 -0700 Subject: [PATCH 1238/1278] PM: hibernate: Freeze kernel threads in software_resume() commit 2351f8d295ed63393190e39c2f7c1fee1a80578f upstream. Currently the kernel threads are not frozen in software_resume(), so between dpm_suspend_start(PMSG_QUIESCE) and resume_target_kernel(), system_freezable_power_efficient_wq can still try to submit SCSI commands and this can cause a panic since the low level SCSI driver (e.g. hv_storvsc) has quiesced the SCSI adapter and can not accept any SCSI commands: https://lkml.org/lkml/2020/4/10/47 At first I posted a fix (https://lkml.org/lkml/2020/4/21/1318) trying to resolve the issue from hv_storvsc, but with the help of Bart Van Assche, I realized it's better to fix software_resume(), since this looks like a generic issue, not only pertaining to SCSI. Cc: All applicable Signed-off-by: Dexuan Cui Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/hibernate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2d6d14ad7b4f..2e65aacfa116 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -892,6 +892,13 @@ static int software_resume(void) error = freeze_processes(); if (error) goto Close_Finish; + + error = freeze_kernel_threads(); + if (error) { + thaw_processes(); + goto Close_Finish; + } + error = load_image_and_restore(); thaw_processes(); Finish: -- GitLab From 74381a0cf22edda38dbb8cb492ce4d097868b195 Mon Sep 17 00:00:00 2001 From: Sunwook Eom Date: Fri, 10 Apr 2020 12:54:19 +0900 Subject: [PATCH 1239/1278] dm verity fec: fix hash block number in verity_fec_decode commit ad4e80a639fc61d5ecebb03caa5cdbfb91fcebfc upstream. The error correction data is computed as if data and hash blocks were concatenated. But hash block number starts from v->hash_start. So, we have to calculate hash block number based on that. Fixes: a739ff3f543af ("dm verity: add support for forward error correction") Cc: stable@vger.kernel.org Signed-off-by: Sunwook Eom Reviewed-by: Sami Tolvanen Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 285148100cde..0fe6cc3007f5 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -436,7 +436,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, fio->level++; if (type == DM_VERITY_BLOCK_TYPE_METADATA) - block += v->data_blocks; + block = block - v->hash_start + v->data_blocks; /* * For RS(M, N), the continuous FEC data is divided into blocks of N -- GitLab From 2b6131e8316df2235dc0f63c03008376e027cee8 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 13 Apr 2020 16:20:28 +0300 Subject: [PATCH 1240/1278] RDMA/mlx5: Set GRH fields in query QP on RoCE commit 2d7e3ff7b6f2c614eb21d0dc348957a47eaffb57 upstream. GRH fields such as sgid_index, hop limit, et. are set in the QP context when QP is created/modified. Currently, when query QP is performed, we fill the GRH fields only if the GRH bit is set in the QP context, but this bit is not set for RoCE. Adjust the check so we will set all relevant data for the RoCE too. Since this data is returned to userspace, the below is an ABI regression. Fixes: d8966fcd4c25 ("IB/core: Use rdma_ah_attr accessor functions") Link: https://lore.kernel.org/r/20200413132028.930109-1-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb60072c82f..d835ef2ce23c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4362,7 +4362,9 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev, rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f); rdma_ah_set_static_rate(ah_attr, path->static_rate ? path->static_rate - 5 : 0); - if (path->grh_mlid & (1 << 7)) { + + if (path->grh_mlid & (1 << 7) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { u32 tc_fl = be32_to_cpu(path->tclass_flowlabel); rdma_ah_set_grh(ah_attr, NULL, -- GitLab From 28acd21db599365892cf5910eaeab24e6782032e Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Mon, 13 Apr 2020 16:22:35 +0300 Subject: [PATCH 1241/1278] RDMA/mlx4: Initialize ib_spec on the stack commit c08cfb2d8d78bfe81b37cc6ba84f0875bddd0d5c upstream. Initialize ib_spec on the stack before using it, otherwise we will have garbage values that will break creating default rules with invalid parsing error. Fixes: a37a1a428431 ("IB/mlx4: Add mechanism to support flow steering over IB links") Link: https://lore.kernel.org/r/20200413132235.930642-1-leon@kernel.org Signed-off-by: Alaa Hleihel Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7e73a1a6cb67..3f8511104c5b 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1614,8 +1614,9 @@ static int __mlx4_ib_create_default_rules( int i; for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { + union ib_flow_spec ib_spec = {}; int ret; - union ib_flow_spec ib_spec; + switch (pdefault_rules->rules_create_list[i]) { case 0: /* no rule */ -- GitLab From b4324db6f543504e28643fece393455b6a6f8112 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 8 Apr 2020 03:12:34 -0400 Subject: [PATCH 1242/1278] vfio: avoid possible overflow in vfio_iommu_type1_pin_pages commit 0ea971f8dcd6dee78a9a30ea70227cf305f11ff7 upstream. add parentheses to avoid possible vaddr overflow. Fixes: a54eb55045ae ("vfio iommu type1: Add support for mediated devices") Signed-off-by: Yan Zhao Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f77a9b3370b5..690ae081eedc 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -591,7 +591,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, continue; } - remote_vaddr = dma->vaddr + iova - dma->iova; + remote_vaddr = dma->vaddr + (iova - dma->iova); ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], do_accounting); if (ret) -- GitLab From 440e152362d19bb5ffe7ef5677de1107ee9c0989 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 16 Apr 2020 15:50:57 -0700 Subject: [PATCH 1243/1278] vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn() commit 5cbf3264bc715e9eb384e2b68601f8c02bb9a61d upstream. Use follow_pfn() to get the PFN of a PFNMAP VMA instead of assuming that vma->vm_pgoff holds the base PFN of the VMA. This fixes a bug where attempting to do VFIO_IOMMU_MAP_DMA on an arbitrary PFNMAP'd region of memory calculates garbage for the PFN. Hilariously, this only got detected because the first "PFN" calculated by vaddr_get_pfn() is PFN 0 (vma->vm_pgoff==0), and iommu_iova_to_phys() uses PA==0 as an error, which triggers a WARN in vfio_unmap_unpin() because the translation "failed". PFN 0 is now unconditionally reserved on x86 in order to mitigate L1TF, which causes is_invalid_reserved_pfn() to return true and in turns results in vaddr_get_pfn() returning success for PFN 0. Eventually the bogus calculation runs into PFNs that aren't reserved and leads to failure in vfio_pin_map_dma(). The subsequent call to vfio_remove_dma() attempts to unmap PFN 0 and WARNs. WARNING: CPU: 8 PID: 5130 at drivers/vfio/vfio_iommu_type1.c:750 vfio_unmap_unpin+0x2e1/0x310 [vfio_iommu_type1] Modules linked in: vfio_pci vfio_virqfd vfio_iommu_type1 vfio ... CPU: 8 PID: 5130 Comm: sgx Tainted: G W 5.6.0-rc5-705d787c7fee-vfio+ #3 Hardware name: Intel Corporation Mehlow UP Server Platform/Moss Beach Server, BIOS CNLSE2R1.D00.X119.B49.1803010910 03/01/2018 RIP: 0010:vfio_unmap_unpin+0x2e1/0x310 [vfio_iommu_type1] Code: <0f> 0b 49 81 c5 00 10 00 00 e9 c5 fe ff ff bb 00 10 00 00 e9 3d fe RSP: 0018:ffffbeb5039ebda8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9a55cbf8d480 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff9a52b771c200 RBP: 0000000000000000 R08: 0000000000000040 R09: 00000000fffffff2 R10: 0000000000000001 R11: ffff9a51fa896000 R12: 0000000184010000 R13: 0000000184000000 R14: 0000000000010000 R15: ffff9a55cb66ea08 FS: 00007f15d3830b40(0000) GS:ffff9a55d5600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561cf39429e0 CR3: 000000084f75f005 CR4: 00000000003626e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: vfio_remove_dma+0x17/0x70 [vfio_iommu_type1] vfio_iommu_type1_ioctl+0x9e3/0xa7b [vfio_iommu_type1] ksys_ioctl+0x92/0xb0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4c/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15d04c75d7 Code: <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48 Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation") Signed-off-by: Sean Christopherson Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 690ae081eedc..35a3750a6ddd 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -378,8 +378,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, vma = find_vma_intersection(mm, vaddr, vaddr + 1); if (vma && vma->vm_flags & VM_PFNMAP) { - *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (is_invalid_reserved_pfn(*pfn)) + if (!follow_pfn(vma, vaddr, pfn) && + is_invalid_reserved_pfn(*pfn)) ret = 0; } -- GitLab From ecf93ef5fa40d26e99b48e9f18e18ad6029708bc Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Sat, 18 Apr 2020 21:47:03 +0800 Subject: [PATCH 1244/1278] iommu/qcom: Fix local_base status check commit b52649aee6243ea661905bdc5fbe28cc5f6dec76 upstream. The function qcom_iommu_device_probe() does not perform sufficient error checking after executing devm_ioremap_resource(), which can result in crashes if a critical error path is encountered. Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Signed-off-by: Tang Bin Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20200418134703.1760-1-tangbin@cmss.chinamobile.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/qcom_iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index b08002851e06..920a5df319bc 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -775,8 +775,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) qcom_iommu->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res) + if (res) { qcom_iommu->local_base = devm_ioremap_resource(dev, res); + if (IS_ERR(qcom_iommu->local_base)) + return PTR_ERR(qcom_iommu->local_base); + } qcom_iommu->iface_clk = devm_clk_get(dev, "iface"); if (IS_ERR(qcom_iommu->iface_clk)) { -- GitLab From 9ecf5561f13fbf1d919ea6c797cffc85ed42f6b9 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Sun, 19 Apr 2020 18:31:09 +0200 Subject: [PATCH 1245/1278] scsi: target/iblock: fix WRITE SAME zeroing commit 1d2ff149b263c9325875726a7804a0c75ef7112e upstream. SBC4 specifies that WRITE SAME requests with the UNMAP bit set to zero "shall perform the specified write operation to each LBA specified by the command". Commit 2237498f0b5c ("target/iblock: Convert WRITE_SAME to blkdev_issue_zeroout") modified the iblock backend to call blkdev_issue_zeroout() when handling WRITE SAME requests with UNMAP=0 and a zero data segment. The iblock blkdev_issue_zeroout() call incorrectly provides a flags parameter of 0 (bool false), instead of BLKDEV_ZERO_NOUNMAP. The bool false parameter reflects the blkdev_issue_zeroout() API prior to commit ee472d835c26 ("block: add a flags argument to (__)blkdev_issue_zeroout") which was merged shortly before 2237498f0b5c. Link: https://lore.kernel.org/r/20200419163109.11689-1-ddiss@suse.de Fixes: 2237498f0b5c ("target/iblock: Convert WRITE_SAME to blkdev_issue_zeroout") Reviewed-by: Bart Van Assche Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_iblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 60429011292a..2a9e023f5429 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -447,7 +447,7 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd) target_to_linux_sector(dev, cmd->t_task_lba), target_to_linux_sector(dev, sbc_get_write_same_sectors(cmd)), - GFP_KERNEL, false); + GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); if (ret) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; -- GitLab From 41c978d8d33ba1c9d46161316f17eec0c81a99da Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 22 Apr 2020 08:30:02 -0500 Subject: [PATCH 1246/1278] iommu/amd: Fix legacy interrupt remapping for x2APIC-enabled system commit b74aa02d7a30ee5e262072a7d6e8deff10b37924 upstream. Currently, system fails to boot because the legacy interrupt remapping mode does not enable 128-bit IRTE (GA), which is required for x2APIC support. Fix by using AMD_IOMMU_GUEST_IR_LEGACY_GA mode when booting with kernel option amd_iommu_intr=legacy instead. The initialization logic will check GASup and automatically fallback to using AMD_IOMMU_GUEST_IR_LEGACY if GA mode is not supported. Fixes: 3928aa3f5775 ("iommu/amd: Detect and enable guest vAPIC support") Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/1587562202-14183-1-git-send-email-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 4d2920988d60..6c228144b3da 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2809,7 +2809,7 @@ static int __init parse_amd_iommu_intr(char *str) { for (; *str; ++str) { if (strncmp(str, "legacy", 6) == 0) { - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; break; } if (strncmp(str, "vapic", 5) == 0) { -- GitLab From dc4a3585daefe9b188bfc467d27d10477f45d670 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 29 Apr 2020 21:02:03 +0200 Subject: [PATCH 1247/1278] ALSA: opti9xx: shut up gcc-10 range warning commit 5ce00760a84848d008554c693ceb6286f4d9c509 upstream. gcc-10 points out a few instances of suspicious integer arithmetic leading to value truncation: sound/isa/opti9xx/opti92x-ad1848.c: In function 'snd_opti9xx_configure': sound/isa/opti9xx/opti92x-ad1848.c:322:43: error: overflow in conversion from 'int' to 'unsigned char' changes value from '(int)snd_opti9xx_read(chip, 3) & -256 | 240' to '240' [-Werror=overflow] 322 | (snd_opti9xx_read(chip, reg) & ~(mask)) | ((value) & (mask))) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~ sound/isa/opti9xx/opti92x-ad1848.c:351:3: note: in expansion of macro 'snd_opti9xx_write_mask' 351 | snd_opti9xx_write_mask(chip, OPTi9XX_MC_REG(3), 0xf0, 0xff); | ^~~~~~~~~~~~~~~~~~~~~~ sound/isa/opti9xx/miro.c: In function 'snd_miro_configure': sound/isa/opti9xx/miro.c:873:40: error: overflow in conversion from 'int' to 'unsigned char' changes value from '(int)snd_miro_read(chip, 3) & -256 | 240' to '240' [-Werror=overflow] 873 | (snd_miro_read(chip, reg) & ~(mask)) | ((value) & (mask))) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~ sound/isa/opti9xx/miro.c:1010:3: note: in expansion of macro 'snd_miro_write_mask' 1010 | snd_miro_write_mask(chip, OPTi9XX_MC_REG(3), 0xf0, 0xff); | ^~~~~~~~~~~~~~~~~~~ These are all harmless here as only the low 8 bit are passed down anyway. Change the macros to inline functions to make the code more readable and also avoid the warning. Strictly speaking those functions also need locking to make the read/write pair atomic, but it seems unlikely that anyone would still run into that issue. Fixes: 1841f613fd2e ("[ALSA] Add snd-miro driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200429190216.85919-1-arnd@arndb.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/opti9xx/miro.c | 9 ++++++--- sound/isa/opti9xx/opti92x-ad1848.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 8894c7c18ad6..d92c3c6b6051 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -875,10 +875,13 @@ static void snd_miro_write(struct snd_miro *chip, unsigned char reg, spin_unlock_irqrestore(&chip->lock, flags); } +static inline void snd_miro_write_mask(struct snd_miro *chip, + unsigned char reg, unsigned char value, unsigned char mask) +{ + unsigned char oldval = snd_miro_read(chip, reg); -#define snd_miro_write_mask(chip, reg, value, mask) \ - snd_miro_write(chip, reg, \ - (snd_miro_read(chip, reg) & ~(mask)) | ((value) & (mask))) + snd_miro_write(chip, reg, (oldval & ~mask) | (value & mask)); +} /* * Proc Interface diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 505cd81e19fa..4ef3caaf4354 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -327,10 +327,13 @@ static void snd_opti9xx_write(struct snd_opti9xx *chip, unsigned char reg, } -#define snd_opti9xx_write_mask(chip, reg, value, mask) \ - snd_opti9xx_write(chip, reg, \ - (snd_opti9xx_read(chip, reg) & ~(mask)) | ((value) & (mask))) +static inline void snd_opti9xx_write_mask(struct snd_opti9xx *chip, + unsigned char reg, unsigned char value, unsigned char mask) +{ + unsigned char oldval = snd_opti9xx_read(chip, reg); + snd_opti9xx_write(chip, reg, (oldval & ~mask) | (value & mask)); +} static int snd_opti9xx_configure(struct snd_opti9xx *chip, long port, -- GitLab From 4d6df332cf7c19118f3aafee8c7aa55a8feeac8e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 20 Apr 2020 15:51:47 +0200 Subject: [PATCH 1248/1278] nfs: Fix potential posix_acl refcnt leak in nfs3_set_acl commit 7648f939cb919b9d15c21fff8cd9eba908d595dc upstream. nfs3_set_acl keeps track of the acl it allocated locally to determine if an acl needs to be released at the end. This results in a memory leak when the function allocates an acl as well as a default acl. Fix by releasing acls that differ from the acl originally passed into nfs3_set_acl. Fixes: b7fa0554cf1b ("[PATCH] NFS: Add support for NFSv3 ACLs") Reported-by: Xiyu Yang Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs3acl.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7173a4ee862c..5e9f9c70fe70 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -253,37 +253,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct posix_acl *alloc = NULL, *dfacl = NULL; + struct posix_acl *orig = acl, *dfacl = NULL, *alloc; int status; if (S_ISDIR(inode->i_mode)) { switch(type) { case ACL_TYPE_ACCESS: - alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); + alloc = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(alloc)) goto fail; + dfacl = alloc; break; case ACL_TYPE_DEFAULT: - dfacl = acl; - alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); + alloc = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(alloc)) goto fail; + dfacl = acl; + acl = alloc; break; } } if (acl == NULL) { - alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(alloc)) goto fail; + acl = alloc; } status = __nfs3_proc_setacls(inode, acl, dfacl); - posix_acl_release(alloc); +out: + if (acl != orig) + posix_acl_release(acl); + if (dfacl != orig) + posix_acl_release(dfacl); return status; fail: - return PTR_ERR(alloc); + status = PTR_ERR(alloc); + goto out; } const struct xattr_handler *nfs3_xattr_handlers[] = { -- GitLab From bd9339bf0b62f70a2f3f29c47262e667f75b1139 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Apr 2020 19:11:42 +0300 Subject: [PATCH 1249/1278] dmaengine: dmatest: Fix iteration non-stop logic commit b9f960201249f20deea586b4ec814669b4c6b1c0 upstream. Under some circumstances, i.e. when test is still running and about to time out and user runs, for example, grep -H . /sys/module/dmatest/parameters/* the iterations parameter is not respected and test is going on and on until user gives echo 0 > /sys/module/dmatest/parameters/run This is not what expected. The history of this bug is interesting. I though that the commit 2d88ce76eb98 ("dmatest: add a 'wait' parameter") is a culprit, but looking closer to the code I think it simple revealed the broken logic from the day one, i.e. in the commit 0a2ff57d6fba ("dmaengine: dmatest: add a maximum number of test iterations") which adds iterations parameter. So, to the point, the conditional of checking the thread to be stopped being first part of conjunction logic prevents to check iterations. Thus, we have to always check both conditions to be able to stop after given iterations. Since it wasn't visible before second commit appeared, I add a respective Fixes tag. Fixes: 2d88ce76eb98 ("dmatest: add a 'wait' parameter") Cc: Dan Williams Cc: Nicolas Ferre Signed-off-by: Andy Shevchenko Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20200424161147.16895-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmatest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index e39336127741..d19a602beebd 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -552,8 +552,8 @@ static int dmatest_func(void *data) flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; ktime = ktime_get(); - while (!kthread_should_stop() - && !(params->iterations && total_tests >= params->iterations)) { + while (!(kthread_should_stop() || + (params->iterations && total_tests >= params->iterations))) { struct dma_async_tx_descriptor *tx = NULL; struct dmaengine_unmap_data *um; dma_addr_t srcs[src_cnt]; -- GitLab From 90d4469b0aefac7695e48a641551f74a38f55613 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 28 Apr 2020 09:59:02 -0400 Subject: [PATCH 1250/1278] selinux: properly handle multiple messages in selinux_netlink_send() commit fb73974172ffaaf57a7c42f35424d9aece1a5af6 upstream. Fix the SELinux netlink_send hook to properly handle multiple netlink messages in a single sk_buff; each message is parsed and subject to SELinux access control. Prior to this patch, SELinux only inspected the first message in the sk_buff. Cc: stable@vger.kernel.org Reported-by: Dmitry Vyukov Reviewed-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 68 ++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5f7bfc65c446..5def19ec1179 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5121,39 +5121,59 @@ static int selinux_tun_dev_open(void *security) static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) { - int err = 0; - u32 perm; + int rc = 0; + unsigned int msg_len; + unsigned int data_len = skb->len; + unsigned char *data = skb->data; struct nlmsghdr *nlh; struct sk_security_struct *sksec = sk->sk_security; + u16 sclass = sksec->sclass; + u32 perm; - if (skb->len < NLMSG_HDRLEN) { - err = -EINVAL; - goto out; - } - nlh = nlmsg_hdr(skb); + while (data_len >= nlmsg_total_size(0)) { + nlh = (struct nlmsghdr *)data; - err = selinux_nlmsg_lookup(sksec->sclass, nlh->nlmsg_type, &perm); - if (err) { - if (err == -EINVAL) { + /* NOTE: the nlmsg_len field isn't reliably set by some netlink + * users which means we can't reject skb's with bogus + * length fields; our solution is to follow what + * netlink_rcv_skb() does and simply skip processing at + * messages with length fields that are clearly junk + */ + if (nlh->nlmsg_len < NLMSG_HDRLEN || nlh->nlmsg_len > data_len) + return 0; + + rc = selinux_nlmsg_lookup(sclass, nlh->nlmsg_type, &perm); + if (rc == 0) { + rc = sock_has_perm(sk, perm); + if (rc) + return rc; + } else if (rc == -EINVAL) { + /* -EINVAL is a missing msg/perm mapping */ pr_warn_ratelimited("SELinux: unrecognized netlink" - " message: protocol=%hu nlmsg_type=%hu sclass=%s" - " pig=%d comm=%s\n", - sk->sk_protocol, nlh->nlmsg_type, - secclass_map[sksec->sclass - 1].name, - task_pid_nr(current), current->comm); - if (!selinux_enforcing || security_get_allow_unknown()) - err = 0; + " message: protocol=%hu nlmsg_type=%hu sclass=%s" + " pid=%d comm=%s\n", + sk->sk_protocol, nlh->nlmsg_type, + secclass_map[sclass - 1].name, + task_pid_nr(current), current->comm); + if (selinux_enforcing && !security_get_allow_unknown()) + return rc; + rc = 0; + } else if (rc == -ENOENT) { + /* -ENOENT is a missing socket/class mapping, ignore */ + rc = 0; + } else { + return rc; } - /* Ignore */ - if (err == -ENOENT) - err = 0; - goto out; + /* move to the next message after applying netlink padding */ + msg_len = NLMSG_ALIGN(nlh->nlmsg_len); + if (msg_len >= data_len) + return 0; + data_len -= msg_len; + data += msg_len; } - err = sock_has_perm(sk, perm); -out: - return err; + return rc; } #ifdef CONFIG_NETFILTER -- GitLab From d71f695ce745df9544a85d8a762f16d72e72df00 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 May 2020 19:15:53 +0200 Subject: [PATCH 1251/1278] Linux 4.14.179 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 73e93e596e50..d2baacc1b0f6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 178 +SUBLEVEL = 179 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 01558dbf2e28c0999823be348910d1f14ff5917f Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 7 May 2020 10:30:32 +0800 Subject: [PATCH 1252/1278] ANDROID: arm64: fix a mismerge in proc.S Fix a mismerge in: "FROMLIST: arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings" We should write x17 to sctlr_el1, not x18. We observed boot failures because of this. Before: mrs x17, sctlr_el1 bic x17, x17, #SCTLR_ELx_M msr sctlr_el1, x18 After mrs x17, sctlr_el1 bic x17, x17, #SCTLR_ELx_M msr sctlr_el1, x17 Signed-off-by: Miles Chen Change-Id: Ib4356ec814beb374b7b57117e029241321f5fc22 --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 118597813da7..885ac66a9143 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -272,7 +272,7 @@ ENTRY(idmap_kpti_install_ng_mappings) /* We need to walk swapper, so turn off the MMU. */ mrs x17, sctlr_el1 bic x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x18 + msr sctlr_el1, x17 isb /* Everybody is enjoying the idmap, so we can rewrite swapper. */ -- GitLab From ad70bc0e012831920f2ddf37f7bd1f07970de9a7 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 1 May 2020 12:38:40 +0800 Subject: [PATCH 1253/1278] vhost: vsock: kick send_pkt worker once device is started commit 0b841030625cde5f784dd62aec72d6a766faae70 upstream. Ning Bo reported an abnormal 2-second gap when booting Kata container [1]. The unconditional timeout was caused by VSOCK_DEFAULT_CONNECT_TIMEOUT of connecting from the client side. The vhost vsock client tries to connect an initializing virtio vsock server. The abnormal flow looks like: host-userspace vhost vsock guest vsock ============== =========== ============ connect() --------> vhost_transport_send_pkt_work() initializing | vq->private_data==NULL | will not be queued V schedule_timeout(2s) vhost_vsock_start() <--------- device ready set vq->private_data wait for 2s and failed connect() again vq->private_data!=NULL recv connecting pkt Details: 1. Host userspace sends a connect pkt, at that time, guest vsock is under initializing, hence the vhost_vsock_start has not been called. So vq->private_data==NULL, and the pkt is not been queued to send to guest 2. Then it sleeps for 2s 3. After guest vsock finishes initializing, vq->private_data is set 4. When host userspace wakes up after 2s, send connecting pkt again, everything is fine. As suggested by Stefano Garzarella, this fixes it by additional kicking the send_pkt worker in vhost_vsock_start once the virtio device is started. This makes the pending pkt sent again. After this patch, kata-runtime (with vsock enabled) boot time is reduced from 3s to 1s on a ThunderX2 arm64 server. [1] https://github.com/kata-containers/runtime/issues/1917 Reported-by: Ning Bo Suggested-by: Stefano Garzarella Signed-off-by: Jia He Link: https://lore.kernel.org/r/20200501043840.186557-1-justin.he@arm.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6391dc5b0ebe..834e88e20550 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -499,6 +499,11 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) mutex_unlock(&vq->mutex); } + /* Some packets may have been queued before the device was started, + * let's kick the send worker to send them. + */ + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + mutex_unlock(&vsock->dev.mutex); return 0; -- GitLab From 929599defc7aef8cee2c0ba836f1b281629e37e2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 26 Jun 2019 12:37:46 +1000 Subject: [PATCH 1254/1278] powerpc/pci/of: Parse unassigned resources commit dead1c845dbe97e0061dae2017eaf3bd8f8f06ee upstream. The pseries platform uses the PCI_PROBE_DEVTREE method of PCI probing which reads "assigned-addresses" of every PCI device and initializes the device resources. However if the property is missing or zero sized, then there is no fallback of any kind and the PCI resources remain undiscovered, i.e. pdev->resource[] array remains empty. This adds a fallback which parses the "reg" property in pretty much same way except it marks resources as "unset" which later make Linux assign those resources proper addresses. This has an effect when: 1. a hypervisor failed to assign any resource for a device; 2. /chosen/linux,pci-probe-only=0 is in the DT so the system may try assigning a resource. Neither is likely to happen under PowerVM. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/pci_of_scan.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 6ca1b3a1e196..54e949d5452d 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -82,10 +82,16 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) const __be32 *addrs; u32 i; int proplen; + bool mark_unset = false; addrs = of_get_property(node, "assigned-addresses", &proplen); - if (!addrs) - return; + if (!addrs || !proplen) { + addrs = of_get_property(node, "reg", &proplen); + if (!addrs || !proplen) + return; + mark_unset = true; + } + pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); for (; proplen >= 20; proplen -= 20, addrs += 5) { flags = pci_parse_of_flags(of_read_number(addrs, 1), 0); @@ -110,6 +116,8 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) continue; } res->flags = flags; + if (mark_unset) + res->flags |= IORESOURCE_UNSET; res->name = pci_name(dev); region.start = base; region.end = base + size - 1; -- GitLab From f111c1d80638aaa6de3b62b4a77e64c0b4f20725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Fri, 27 Mar 2020 16:47:28 -0400 Subject: [PATCH 1255/1278] ASoC: topology: Check return value of pcm_new_ver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b3677fc3d68dd942c92de52f0bd9dd8b472a40e6 ] Function pcm_new_ver can fail, so we should check it's return value and handle possible error. Signed-off-by: Amadeusz Sławiński Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200327204729.397-6-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index a215b9ad148c..50aa45525be5 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1954,7 +1954,9 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg, _pcm = pcm; } else { abi_match = false; - pcm_new_ver(tplg, pcm, &_pcm); + ret = pcm_new_ver(tplg, pcm, &_pcm); + if (ret < 0) + return ret; } /* create the FE DAIs and DAI links */ -- GitLab From a88e01df10fec0c3c4170a9be3af4ec147287f29 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 13 Apr 2020 15:21:45 -0500 Subject: [PATCH 1256/1278] selftests/ipc: Fix test failure seen after initial test run [ Upstream commit b87080eab4c1377706c113fc9c0157f19ea8fed1 ] After successfully running the IPC msgque test once, subsequent runs result in a test failure: $ sudo ./run_kselftest.sh TAP version 13 1..1 # selftests: ipc: msgque # Failed to get stats for IPC queue with id 0 # Failed to dump queue: -22 # Bail out! # # Pass 0 Fail 0 Xfail 0 Xpass 0 Skip 0 Error 0 not ok 1 selftests: ipc: msgque # exit=1 The dump_queue() function loops through the possible message queue index values using calls to msgctl(kern_id, MSG_STAT, ...) where kern_id represents the index value. The first time the test is ran, the initial index value of 0 is valid and the test is able to complete. The index value of 0 is not valid in subsequent test runs and the loop attempts to try index values of 1, 2, 3, and so on until a valid index value is found that corresponds to the message queue created earlier in the test. The msgctl() syscall returns -1 and sets errno to EINVAL when invalid index values are used. The test failure is caused by incorrectly comparing errno to -EINVAL when cycling through possible index values. Fix invalid test failures on subsequent runs of the msgque test by correctly comparing errno values to a non-negated EINVAL. Fixes: 3a665531a3b7 ("selftests: IPC message queue copy feature test") Signed-off-by: Tyler Hicks Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/ipc/msgque.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index c5587844fbb8..ad723a5d0f83 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -137,7 +137,7 @@ int dump_queue(struct msgque_data *msgque) for (kern_id = 0; kern_id < 256; kern_id++) { ret = msgctl(kern_id, MSG_STAT, &ds); if (ret < 0) { - if (errno == -EINVAL) + if (errno == EINVAL) continue; printf("Failed to get stats for IPC queue with id %d\n", kern_id); -- GitLab From a500f557e4db9bd1415a061695fe72fb52d51bea Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 14 Apr 2020 20:11:40 +0200 Subject: [PATCH 1257/1278] ASoC: sgtl5000: Fix VAG power-on handling [ Upstream commit aa7812737f2877e192d57626cbe8825cc7cf6de9 ] As mentioned slightly out of patch context in the code, there is no reset routine for the chip. On boards where the chip is supplied by a fixed regulator, it might not even be resetted during (e.g. watchdog) reboot and can be in any state. If the device is probed with VAG enabled, the driver's probe routine will generate a loud pop sound when ANA_POWER is being programmed. Avoid this by properly disabling just the VAG bit and waiting the required power down time. Signed-off-by: Sebastian Reichel Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20200414181140.145825-1-sebastian.reichel@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/sgtl5000.c | 34 ++++++++++++++++++++++++++++++++++ sound/soc/codecs/sgtl5000.h | 1 + 2 files changed, 35 insertions(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index ca8a70ab22a8..d64cb28e8dc5 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1563,6 +1563,40 @@ static int sgtl5000_i2c_probe(struct i2c_client *client, dev_err(&client->dev, "Error %d initializing CHIP_CLK_CTRL\n", ret); + /* Mute everything to avoid pop from the following power-up */ + ret = regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_CTRL, + SGTL5000_CHIP_ANA_CTRL_DEFAULT); + if (ret) { + dev_err(&client->dev, + "Error %d muting outputs via CHIP_ANA_CTRL\n", ret); + goto disable_clk; + } + + /* + * If VAG is powered-on (e.g. from previous boot), it would be disabled + * by the write to ANA_POWER in later steps of the probe code. This + * may create a loud pop even with all outputs muted. The proper way + * to circumvent this is disabling the bit first and waiting the proper + * cool-down time. + */ + ret = regmap_read(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, &value); + if (ret) { + dev_err(&client->dev, "Failed to read ANA_POWER: %d\n", ret); + goto disable_clk; + } + if (value & SGTL5000_VAG_POWERUP) { + ret = regmap_update_bits(sgtl5000->regmap, + SGTL5000_CHIP_ANA_POWER, + SGTL5000_VAG_POWERUP, + 0); + if (ret) { + dev_err(&client->dev, "Error %d disabling VAG\n", ret); + goto disable_clk; + } + + msleep(SGTL5000_VAG_POWERDOWN_DELAY); + } + /* Follow section 2.2.1.1 of AN3663 */ ana_pwr = SGTL5000_ANA_POWER_DEFAULT; if (sgtl5000->num_supplies <= VDDD) { diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 22f3442af982..9ea41749d037 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -236,6 +236,7 @@ /* * SGTL5000_CHIP_ANA_CTRL */ +#define SGTL5000_CHIP_ANA_CTRL_DEFAULT 0x0133 #define SGTL5000_LINE_OUT_MUTE 0x0100 #define SGTL5000_HP_SEL_MASK 0x0040 #define SGTL5000_HP_SEL_SHIFT 6 -- GitLab From 2e8f2acaf32d5e6f31a734d3e19f9cf8c154fc7e Mon Sep 17 00:00:00 2001 From: Matthias Blankertz Date: Wed, 15 Apr 2020 16:10:17 +0200 Subject: [PATCH 1258/1278] ASoC: rsnd: Fix HDMI channel mapping for multi-SSI mode [ Upstream commit b94e164759b82d0c1c80d4b1c8f12c9bee83f11d ] The HDMI?_SEL register maps up to four stereo SSI data lanes onto the sdata[0..3] inputs of the HDMI output block. The upper half of the register contains four blocks of 4 bits, with the most significant controlling the sdata3 line and the least significant the sdata0 line. The shift calculation has an off-by-one error, causing the parent SSI to be mapped to sdata3, the first multi-SSI child to sdata0 and so forth. As the parent SSI transmits the stereo L/R channels, and the HDMI core expects it on the sdata0 line, this causes no audio to be output when playing stereo audio on a multichannel capable HDMI out, and multichannel audio has permutated channels. Fix the shift calculation to map the parent SSI to sdata0, the first child to sdata1 etc. Signed-off-by: Matthias Blankertz Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20200415141017.384017-3-matthias.blankertz@cetitec.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/ssiu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4d948757d300..5e5ed5475473 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -172,7 +172,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, i; for_each_rsnd_mod_array(i, pos, io, rsnd_ssi_array) { - shift = (i * 4) + 16; + shift = (i * 4) + 20; val = (val & ~(0xF << shift)) | rsnd_mod_id(pos) << shift; } -- GitLab From 95b62395de419da09875db4aa10b4928e9c30024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Wed, 15 Apr 2020 12:28:49 -0400 Subject: [PATCH 1259/1278] ASoC: codecs: hdac_hdmi: Fix incorrect use of list_for_each_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 326b509238171d37402dbe308e154cc234ed1960 ] If we don't find any pcm, pcm will point at address at an offset from the the list head and not a meaningful structure. Fix this by returning correct pcm if found and NULL if not. Found with coccinelle. Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20200415162849.308-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hdac_hdmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 1c3626347e12..aeeec1144558 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -142,14 +142,14 @@ static struct hdac_hdmi_pcm * hdac_hdmi_get_pcm_from_cvt(struct hdac_hdmi_priv *hdmi, struct hdac_hdmi_cvt *cvt) { - struct hdac_hdmi_pcm *pcm = NULL; + struct hdac_hdmi_pcm *pcm; list_for_each_entry(pcm, &hdmi->pcm_list, head) { if (pcm->cvt == cvt) - break; + return pcm; } - return pcm; + return NULL; } static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm, -- GitLab From 67fd6fa9f1dd770f410d555f9a1d5ef2beee45f2 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 15 Apr 2020 16:41:20 +0800 Subject: [PATCH 1260/1278] wimax/i2400m: Fix potential urb refcnt leak [ Upstream commit 7717cbec172c3554d470023b4020d5781961187e ] i2400mu_bus_bm_wait_for_ack() invokes usb_get_urb(), which increases the refcount of the "notif_urb". When i2400mu_bus_bm_wait_for_ack() returns, local variable "notif_urb" becomes invalid, so the refcount should be decreased to keep refcount balanced. The issue happens in all paths of i2400mu_bus_bm_wait_for_ack(), which forget to decrease the refcnt increased by usb_get_urb(), causing a refcnt leak. Fix this issue by calling usb_put_urb() before the i2400mu_bus_bm_wait_for_ack() returns. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wimax/i2400m/usb-fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wimax/i2400m/usb-fw.c b/drivers/net/wimax/i2400m/usb-fw.c index 502c346aa790..7d396c81ec3e 100644 --- a/drivers/net/wimax/i2400m/usb-fw.c +++ b/drivers/net/wimax/i2400m/usb-fw.c @@ -354,6 +354,7 @@ ssize_t i2400mu_bus_bm_wait_for_ack(struct i2400m *i2400m, usb_autopm_put_interface(i2400mu->usb_iface); d_fnend(8, dev, "(i2400m %p ack %p size %zu) = %ld\n", i2400m, ack, ack_size, (long) result); + usb_put_urb(¬if_urb); return result; error_exceeded: -- GitLab From 2eb4ce777854e50606d379d8185baefe4c6c4c92 Mon Sep 17 00:00:00 2001 From: Julien Beraud Date: Wed, 15 Apr 2020 14:24:31 +0200 Subject: [PATCH 1261/1278] net: stmmac: fix enabling socfpga's ptp_ref_clock [ Upstream commit 15ce30609d1e88d42fb1cd948f453e6d5f188249 ] There are 2 registers to write to enable a ptp ref clock coming from the fpga. One that enables the usage of the clock from the fpga for emac0 and emac1 as a ptp ref clock, and the other to allow signals from the fpga to reach emac0 and emac1. Currently, if the dwmac-socfpga has phymode set to PHY_INTERFACE_MODE_MII, PHY_INTERFACE_MODE_GMII, or PHY_INTERFACE_MODE_SGMII, both registers will be written and the ptp ref clock will be set as coming from the fpga. Separate the 2 register writes to only enable signals from the fpga to reach emac0 or emac1 when ptp ref clock is not coming from the fpga. Signed-off-by: Julien Beraud Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 5b3b06a0a3bf..33407df6bea6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -274,16 +274,19 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) phymode == PHY_INTERFACE_MODE_MII || phymode == PHY_INTERFACE_MODE_GMII || phymode == PHY_INTERFACE_MODE_SGMII) { - ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); regmap_read(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, &module); module |= (SYSMGR_FPGAGRP_MODULE_EMAC << (reg_shift / 2)); regmap_write(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, module); - } else { - ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2)); } + if (dwmac->f2h_ptp_ref_clk) + ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); + else + ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << + (reg_shift / 2)); + regmap_write(sys_mgr_base_addr, reg_offset, ctrl); /* Deassert reset for the phy configuration to be sampled by -- GitLab From 88d82debb1f094ba34382e2f5a9b0c8a1facb671 Mon Sep 17 00:00:00 2001 From: Julien Beraud Date: Wed, 15 Apr 2020 14:24:32 +0200 Subject: [PATCH 1262/1278] net: stmmac: Fix sub-second increment [ Upstream commit 91a2559c1dc5b0f7e1256d42b1508935e8eabfbf ] In fine adjustement mode, which is the current default, the sub-second increment register is the number of nanoseconds that will be added to the clock when the accumulator overflows. At each clock cycle, the value of the addend register is added to the accumulator. Currently, we use 20ns = 1e09ns / 50MHz as this value whatever the frequency of the ptp clock actually is. The adjustment is then done on the addend register, only incrementing every X clock cycles X being the ratio between 50MHz and ptp_clock_rate (addend = 2^32 * 50MHz/ptp_clock_rate). This causes the following issues : - In case the frequency of the ptp clock is inferior or equal to 50MHz, the addend value calculation will overflow and the default addend value will be set to 0, causing the clock to not work at all. (For instance, for ptp_clock_rate = 50MHz, addend = 2^32). - The resolution of the timestamping clock is limited to 20ns while it is not needed, thus limiting the accuracy of the timestamping to 20ns. Fix this by setting sub-second increment to 2e09ns / ptp_clock_rate. It will allow to reach the minimum possible frequency for ptp_clk_ref, which is 5MHz for GMII 1000Mps Full-Duplex by setting the sub-second-increment to a higher value. For instance, for 25MHz, it gives ssinc = 80ns and default_addend = 2^31. It will also allow to use a lower value for sub-second-increment, thus improving the timestamping accuracy with frequencies higher than 100MHz, for instance, for 200MHz, ssinc = 10ns and default_addend = 2^31. v1->v2: - Remove modifications to the calculation of default addend, which broke compatibility with clock frequencies for which 2000000000 / ptp_clk_freq is not an integer. - Modify description according to discussions. Signed-off-by: Julien Beraud Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 41d528fbebb4..ccf7381c8bae 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -36,12 +36,16 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, unsigned long data; u32 reg_value; - /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second - * formula = (1/ptp_clock) * 1000000000 - * where ptp_clock is 50MHz if fine method is used to update system + /* For GMAC3.x, 4.x versions, in "fine adjustement mode" set sub-second + * increment to twice the number of nanoseconds of a clock cycle. + * The calculation of the default_addend value by the caller will set it + * to mid-range = 2^31 when the remainder of this division is zero, + * which will make the accumulator overflow once every 2 ptp_clock + * cycles, adding twice the number of nanoseconds of a clock cycle : + * 2000000000ULL / ptp_clock. */ if (value & PTP_TCR_TSCFUPDT) - data = (1000000000ULL / 50000000); + data = (2000000000ULL / ptp_clock); else data = (1000000000ULL / ptp_clock); -- GitLab From 4c49cf3581b920745afc0650613a676dbb81ef60 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Apr 2020 12:37:39 +1000 Subject: [PATCH 1263/1278] cifs: protect updating server->dstaddr with a spinlock [ Upstream commit fada37f6f62995cc449b36ebba1220594bfe55fe ] We use a spinlock while we are reading and accessing the destination address for a server. We need to also use this spinlock to protect when we are modifying this address from reconn_set_ipaddr(). Signed-off-by: Ronnie Sahlberg Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 697edc92dff2..58e7288e5151 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -348,8 +348,10 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server) return rc; } + spin_lock(&cifs_tcp_ses_lock); rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, strlen(ipaddr)); + spin_unlock(&cifs_tcp_ses_lock); kfree(ipaddr); return !rc ? -1 : 0; -- GitLab From 3ca221d120ab13df15d39c8b854692924b8bfff6 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Mon, 6 Apr 2020 14:47:48 +0200 Subject: [PATCH 1264/1278] s390/ftrace: fix potential crashes when switching tracers [ Upstream commit 8ebf6da9db1b2a20bb86cc1bee2552e894d03308 ] Switching tracers include instruction patching. To prevent that a instruction is patched while it's read the instruction patching is done in stop_machine 'context'. This also means that any function called during stop_machine must not be traced. Thus add 'notrace' to all functions called within stop_machine. Fixes: 1ec2772e0c3c ("s390/diag: add a statistic for diagnose calls") Fixes: 38f2c691a4b3 ("s390: improve wait logic of stop_machine") Fixes: 4ecf0a43e729 ("processor: get rid of cpu_relax_yield") Signed-off-by: Philipp Rudo Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/diag.c | 2 +- arch/s390/kernel/smp.c | 4 ++-- arch/s390/kernel/trace.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 35c842aa8705..4c7cf8787a84 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -128,7 +128,7 @@ void diag_stat_inc(enum diag_stat_enum nr) } EXPORT_SYMBOL(diag_stat_inc); -void diag_stat_inc_norecursion(enum diag_stat_enum nr) +void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr) { this_cpu_inc(diag_stat.counter[nr]); trace_s390_diagnose_norecursion(diag_map[nr].code); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b649a6538350..808f4fbe869e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -406,7 +406,7 @@ int smp_find_processor_id(u16 address) return -1; } -bool arch_vcpu_is_preempted(int cpu) +bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; @@ -416,7 +416,7 @@ bool arch_vcpu_is_preempted(int cpu) } EXPORT_SYMBOL(arch_vcpu_is_preempted); -void smp_yield_cpu(int cpu) +void notrace smp_yield_cpu(int cpu) { if (MACHINE_HAS_DIAG9C) { diag_stat_inc_norecursion(DIAG_STAT_X09C); diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c index 490b52e85014..11a669f3cc93 100644 --- a/arch/s390/kernel/trace.c +++ b/arch/s390/kernel/trace.c @@ -14,7 +14,7 @@ EXPORT_TRACEPOINT_SYMBOL(s390_diagnose); static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth); -void trace_s390_diagnose_norecursion(int diag_nr) +void notrace trace_s390_diagnose_norecursion(int diag_nr) { unsigned long flags; unsigned int *depth; -- GitLab From e6b43bda86caffea0453c6d3abb0fe55a7860b31 Mon Sep 17 00:00:00 2001 From: "Jeremie Francois (on alpha)" Date: Fri, 10 Apr 2020 18:57:40 +0200 Subject: [PATCH 1265/1278] scripts/config: allow colons in option strings for sed [ Upstream commit e461bc9f9ab105637b86065d24b0b83f182d477c ] Sed broke on some strings as it used colon as a separator. I made it more robust by using \001, which is legit POSIX AFAIK. E.g. ./config --set-str CONFIG_USBNET_DEVADDR "de:ad:be:ef:00:01" failed with: sed: -e expression #1, char 55: unknown option to `s' Signed-off-by: Jeremie Francois (on alpha) Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/config b/scripts/config index e0e39826dae9..eee5b7f3a092 100755 --- a/scripts/config +++ b/scripts/config @@ -7,6 +7,9 @@ myname=${0##*/} # If no prefix forced, use the default CONFIG_ CONFIG_="${CONFIG_-CONFIG_}" +# We use an uncommon delimiter for sed substitutions +SED_DELIM=$(echo -en "\001") + usage() { cat >&2 <"$tmpfile" + sed -e "s$SED_DELIM$before$SED_DELIM$after$SED_DELIM" "$infile" >"$tmpfile" # replace original file with the edited one mv "$tmpfile" "$infile" } -- GitLab From 20d782f80db489769e8f689e2dd8b949b796634d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:54 -0700 Subject: [PATCH 1266/1278] net: dsa: b53: Rework ARL bin logic [ Upstream commit 6344dbde6a27d10d16246d734b968f84887841e2 ] When asking the ARL to read a MAC address, we will get a number of bins returned in a single read. Out of those bins, there can essentially be 3 states: - all bins are full, we have no space left, and we can either replace an existing address or return that full condition - the MAC address was found, then we need to return its bin index and modify that one, and only that one - the MAC address was not found and we have a least one bin free, we use that bin index location then The code would unfortunately fail on all counts. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 30 ++++++++++++++++++++++++++---- drivers/net/dsa/b53/b53_regs.h | 3 +++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 434e6dced6b7..274d36915110 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1094,6 +1094,7 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, u16 vid, struct b53_arl_entry *ent, u8 *idx, bool is_valid) { + DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); unsigned int i; int ret; @@ -1101,6 +1102,8 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, if (ret) return ret; + bitmap_zero(free_bins, dev->num_arl_entries); + /* Read the bins */ for (i = 0; i < dev->num_arl_entries; i++) { u64 mac_vid; @@ -1112,13 +1115,21 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, B53_ARLTBL_DATA_ENTRY(i), &fwd_entry); b53_arl_to_entry(ent, mac_vid, fwd_entry); - if (!(fwd_entry & ARLTBL_VALID)) + if (!(fwd_entry & ARLTBL_VALID)) { + set_bit(i, free_bins); continue; + } if ((mac_vid & ARLTBL_MAC_MASK) != mac) continue; *idx = i; + return 0; } + if (bitmap_weight(free_bins, dev->num_arl_entries) == 0) + return -ENOSPC; + + *idx = find_first_bit(free_bins, dev->num_arl_entries); + return -ENOENT; } @@ -1148,10 +1159,21 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, if (op) return ret; - /* We could not find a matching MAC, so reset to a new entry */ - if (ret) { + switch (ret) { + case -ENOSPC: + dev_dbg(dev->dev, "{%pM,%.4d} no space left in ARL\n", + addr, vid); + return is_valid ? ret : 0; + case -ENOENT: + /* We could not find a matching MAC, so reset to a new entry */ + dev_dbg(dev->dev, "{%pM,%.4d} not found, using idx: %d\n", + addr, vid, idx); fwd_entry = 0; - idx = 1; + break; + default: + dev_dbg(dev->dev, "{%pM,%.4d} found, using idx: %d\n", + addr, vid, idx); + break; } memset(&ent, 0, sizeof(ent)); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 1b2a337d673d..247aef92b759 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -313,6 +313,9 @@ #define ARLTBL_STATIC BIT(15) #define ARLTBL_VALID BIT(16) +/* Maximum number of bin entries in the ARL for all switches */ +#define B53_ARLTBL_MAX_BIN_ENTRIES 4 + /* ARL Search Control Register (8 bit) */ #define B53_ARL_SRCH_CTL 0x50 #define B53_ARL_SRCH_CTL_25 0x20 -- GitLab From f7cfbf422d3963207c16a3ed548db8b0ee4cd042 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 13 Apr 2020 12:50:42 -0700 Subject: [PATCH 1267/1278] lib/mpi: Fix building for powerpc with clang [ Upstream commit 5990cdee689c6885b27c6d969a3d58b09002b0bc ] 0day reports over and over on an powerpc randconfig with clang: lib/mpi/generic_mpih-mul1.c:37:13: error: invalid use of a cast in a inline asm context requiring an l-value: remove the cast or build with -fheinous-gnu-extensions Remove the superfluous casts, which have been done previously for x86 and arm32 in commit dea632cadd12 ("lib/mpi: fix build with clang") and commit 7b7c1df2883d ("lib/mpi/longlong.h: fix building with 32-bit x86"). Reported-by: kbuild test robot Signed-off-by: Nathan Chancellor Acked-by: Herbert Xu Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/991 Link: https://lore.kernel.org/r/20200413195041.24064-1-natechancellor@gmail.com Signed-off-by: Sasha Levin --- lib/mpi/longlong.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 08c60d10747f..e01b705556aa 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -756,22 +756,22 @@ do { \ do { \ if (__builtin_constant_p(bh) && (bh) == 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "%r" ((USItype)(al)), \ "rI" ((USItype)(bl))); \ else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "%r" ((USItype)(al)), \ "rI" ((USItype)(bl))); \ else \ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "r" ((USItype)(bh)), \ "%r" ((USItype)(al)), \ @@ -781,36 +781,36 @@ do { \ do { \ if (__builtin_constant_p(ah) && (ah) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p(bh) && (bh) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else \ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ @@ -821,7 +821,7 @@ do { \ do { \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhwu %0,%1,%2" \ - : "=r" ((USItype) ph) \ + : "=r" (ph) \ : "%r" (__m0), \ "r" (__m1)); \ (pl) = __m0 * __m1; \ -- GitLab From 27bdb2bdf2d5e3db8e4aec4b32ce6b79359efcc3 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 23 Apr 2020 16:02:11 -0700 Subject: [PATCH 1268/1278] net: bcmgenet: suppress warnings on failed Rx SKB allocations [ Upstream commit ecaeceb8a8a145d93c7e136f170238229165348f ] The driver is designed to drop Rx packets and reclaim the buffers when an allocation fails, and the network interface needs to safely handle this packet loss. Therefore, an allocation failure of Rx SKBs is relatively benign. However, the output of the warning message occurs with a high scheduling priority that can cause excessive jitter/latency for other high priority processing. This commit suppresses the warning messages to prevent scheduling problems while retaining the failure count in the statistics of the network interface. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 4b3660c63b86..38391230ca86 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1674,7 +1674,8 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, dma_addr_t mapping; /* Allocate a new Rx skb */ - skb = netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT); + skb = __netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT, + GFP_ATOMIC | __GFP_NOWARN); if (!skb) { priv->mib.alloc_rx_buff_failed++; netif_err(priv, rx_err, priv->dev, -- GitLab From b39a1578fbcaa0c0acc930e31c5eef46814b2f16 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 23 Apr 2020 16:13:30 -0700 Subject: [PATCH 1269/1278] net: systemport: suppress warnings on failed Rx SKB allocations [ Upstream commit 3554e54a46125030c534820c297ed7f6c3907e24 ] The driver is designed to drop Rx packets and reclaim the buffers when an allocation fails, and the network interface needs to safely handle this packet loss. Therefore, an allocation failure of Rx SKBs is relatively benign. However, the output of the warning message occurs with a high scheduling priority that can cause excessive jitter/latency for other high priority processing. This commit suppresses the warning messages to prevent scheduling problems while retaining the failure count in the statistics of the network interface. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcmsysport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f48f7d104af2..123ee5c11bc0 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -645,7 +645,8 @@ static struct sk_buff *bcm_sysport_rx_refill(struct bcm_sysport_priv *priv, dma_addr_t mapping; /* Allocate a new SKB for a new packet */ - skb = netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH); + skb = __netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH, + GFP_ATOMIC | __GFP_NOWARN); if (!skb) { priv->mib.alloc_rx_buff_failed++; netif_err(priv, rx_err, ndev, "SKB alloc failed\n"); -- GitLab From 3aa030ab4a2db5deac9ae574e25b972b39d6c0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jere=20Lepp=C3=A4nen?= Date: Tue, 21 Apr 2020 22:03:42 +0300 Subject: [PATCH 1270/1278] sctp: Fix SHUTDOWN CTSN Ack in the peer restart case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 12dfd78e3a74825e6f0bc8df7ef9f938fbc6bfe3 upstream. When starting shutdown in sctp_sf_do_dupcook_a(), get the value for SHUTDOWN Cumulative TSN Ack from the new association, which is reconstructed from the cookie, instead of the old association, which the peer doesn't have anymore. Otherwise the SHUTDOWN is either ignored or replied to with an ABORT by the peer because CTSN Ack doesn't match the peer's Initial TSN. Fixes: bdf6fa52f01b ("sctp: handle association restarts when the socket is closed.") Signed-off-by: Jere Leppänen Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f67df16bd340..e698edd56bd5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -858,7 +858,11 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, struct sctp_chunk *retval; __u32 ctsn; - ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + if (chunk && chunk->asoc) + ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map); + else + ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, -- GitLab From dc13f456ff64d205a62e339c3a60b3d1951bcbe1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Sep 2017 16:22:49 -0400 Subject: [PATCH 1271/1278] tracing: Reverse the order of trace_types_lock and event_mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 12ecef0cb12102d8c034770173d2d1363cb97d52 upstream. In order to make future changes where we need to call tracing_set_clock() from within an event command, the order of trace_types_lock and event_mutex must be reversed, as the event command will hold event_mutex and the trace_types_lock is taken from within tracing_set_clock(). Link: http://lkml.kernel.org/r/20170921162249.0dde3dca@gandalf.local.home Requested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) Cc: Andress Kuo (郭孟修) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 +++++ kernel/trace/trace_events.c | 31 +++++++++++++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 207d7c35214f..62c390a1cea8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7720,6 +7720,7 @@ static int instance_mkdir(const char *name) struct trace_array *tr; int ret; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -EEXIST; @@ -7775,6 +7776,7 @@ static int instance_mkdir(const char *name) list_add(&tr->list, &ftrace_trace_arrays); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; @@ -7786,6 +7788,7 @@ static int instance_mkdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; @@ -7798,6 +7801,7 @@ static int instance_rmdir(const char *name) int ret; int i; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -ENODEV; @@ -7843,6 +7847,7 @@ static int instance_rmdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2b0a01b2be2d..421166a39253 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1403,8 +1403,8 @@ static int subsystem_open(struct inode *inode, struct file *filp) return -ENODEV; /* Make sure the system still exists */ - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { if (dir == inode->i_private) { @@ -1418,8 +1418,8 @@ static int subsystem_open(struct inode *inode, struct file *filp) } } exit_loop: - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (!system) return -ENODEV; @@ -2305,15 +2305,15 @@ static void __add_event_to_tracers(struct trace_event_call *call); int trace_add_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); ret = __register_event(call, NULL); if (ret >= 0) __add_event_to_tracers(call); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -2367,13 +2367,13 @@ int trace_remove_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); down_write(&trace_event_sem); ret = probe_remove_event_call(call); up_write(&trace_event_sem); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -2435,8 +2435,8 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); switch (val) { case MODULE_STATE_COMING: trace_module_add_events(mod); @@ -2445,8 +2445,8 @@ static int trace_module_notify(struct notifier_block *self, trace_module_remove_events(mod); break; } - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; } @@ -2961,24 +2961,24 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) * creates the event hierachry in the @parent/events directory. * * Returns 0 on success. + * + * Must be called with event_mutex held. */ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) - goto out_unlock; + goto out; down_write(&trace_event_sem); __trace_add_event_dirs(tr); up_write(&trace_event_sem); - out_unlock: - mutex_unlock(&event_mutex); - + out: return ret; } @@ -3007,9 +3007,10 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) return ret; } +/* Must be called with event_mutex held */ int event_trace_del_tracer(struct trace_array *tr) { - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); /* Disable any event triggers and associated soft-disabled events */ clear_event_triggers(tr); @@ -3030,8 +3031,6 @@ int event_trace_del_tracer(struct trace_array *tr) tr->event_dir = NULL; - mutex_unlock(&event_mutex); - return 0; } -- GitLab From 092d8c20f03eb33920de5205d5d545fb4a6b7aeb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Apr 2020 08:12:22 +0200 Subject: [PATCH 1272/1278] ALSA: hda: Match both PCI ID and SSID for driver blacklist commit 977dfef40c8996b69afe23a9094d184049efb7bb upstream. The commit 3c6fd1f07ed0 ("ALSA: hda: Add driver blacklist") added a new blacklist for the devices that are known to have empty codecs, and one of the entries was ASUS ROG Zenith II (PCI SSID 1043:874f). However, it turned out that the very same PCI SSID is used for the previous model that does have the valid HD-audio codecs and the change broke the sound on it. Since the empty codec problem appear on the certain AMD platform (PCI ID 1022:1487), this patch changes the blacklist matching to both PCI ID and SSID using pci_match_id(). Also, the entry that was removed by the previous fix for ASUS ROG Zenigh II is re-added. Link: https://lore.kernel.org/r/20200424061222.19792-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 46670da04707..7779f5460715 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2172,9 +2172,10 @@ static const struct hdac_io_ops pci_hda_io_ops = { * some HD-audio PCI entries are exposed without any codecs, and such devices * should be ignored from the beginning. */ -static const struct snd_pci_quirk driver_blacklist[] = { - SND_PCI_QUIRK(0x1462, 0xcb59, "MSI TRX40 Creator", 0), - SND_PCI_QUIRK(0x1462, 0xcb60, "MSI TRX40", 0), +static const struct pci_device_id driver_blacklist[] = { + { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ + { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ + { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ {} }; @@ -2197,7 +2198,7 @@ static int azx_probe(struct pci_dev *pci, bool schedule_probe; int err; - if (snd_pci_quirk_lookup(pci, driver_blacklist)) { + if (pci_match_id(driver_blacklist, pci)) { dev_info(&pci->dev, "Skipping the blacklisted device\n"); return -ENODEV; } -- GitLab From 1bfd4bed884d075f90793f297bc4ac09df3bd844 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 13 Jan 2020 21:59:40 -0800 Subject: [PATCH 1273/1278] mac80211: add ieee80211_is_any_nullfunc() commit 30b2f0be23fb40e58d0ad2caf8702c2a44cda2e1 upstream. commit 08a5bdde3812 ("mac80211: consider QoS Null frames for STA_NULLFUNC_ACKED") Fixed a bug where we failed to take into account a nullfunc frame can be either non-QoS or QoS. It turns out there is at least one more bug in ieee80211_sta_tx_notify(), introduced in commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing"), where we forgot to check for the QoS variant and so assumed the QoS nullfunc frame never went out Fix this by adding a helper ieee80211_is_any_nullfunc() which consolidates the check for non-QoS and QoS nullfunc frames. Replace existing compound conditionals and add a couple more missing checks for QoS variant. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200114055940.18502-3-thomas@adapt-ip.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/linux/ieee80211.h | 9 +++++++++ net/mac80211/mlme.c | 2 +- net/mac80211/rx.c | 8 +++----- net/mac80211/status.c | 5 ++--- net/mac80211/tx.c | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2e179778576c..c316ff030b1d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -621,6 +621,15 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_any_nullfunc(__le16 fc) +{ + return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); +} + /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @fc: frame control field in little-endian byteorder diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 36bd59ff49c4..ab26b8b95471 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2273,7 +2273,7 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, if (!ieee80211_is_data(hdr->frame_control)) return; - if (ieee80211_is_nullfunc(hdr->frame_control) && + if (ieee80211_is_any_nullfunc(hdr->frame_control) && sdata->u.mgd.probe_send_count > 0) { if (ack) ieee80211_sta_reset_conn_monitor(sdata); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7c92b1471c34..56d7a3dfa543 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1255,8 +1255,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || - ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control) || + ieee80211_is_any_nullfunc(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; @@ -1643,8 +1642,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. */ - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { + if (ieee80211_is_any_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); /* @@ -2134,7 +2132,7 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && - !ieee80211_is_nullfunc(fc) && + !ieee80211_is_any_nullfunc(fc) && ieee80211_is_data(fc) && rx->key)) return -EACCES; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index fbe7354aeac7..fcfa6714e492 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -478,8 +478,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); if (sdata) { - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) + if (ieee80211_is_any_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); @@ -856,7 +855,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, I802_DEBUG_INC(local->dot11FailedCount); } - if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && + if (ieee80211_is_any_nullfunc(fc) && ieee80211_has_pm(fc) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 513d071ccac7..1b1f2d6cb3f4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -296,7 +296,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && !ieee80211_is_probe_req(hdr->frame_control) && - !ieee80211_is_nullfunc(hdr->frame_control)) + !ieee80211_is_any_nullfunc(hdr->frame_control)) /* * When software scanning only nullfunc frames (to notify * the sleep state to the AP) and probe requests (for the -- GitLab From dbc839a90fb50f9ab52bbb64ab31991a0eb01d9f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 20 Apr 2020 09:04:24 +0200 Subject: [PATCH 1274/1278] cgroup, netclassid: remove double cond_resched commit 526f3d96b8f83b1b13d73bd0b5c79cc2c487ec8e upstream. Commit 018d26fcd12a ("cgroup, netclassid: periodically release file_lock on classid") added a second cond_resched to write_classid indirectly by update_classid_task. Remove the one in write_classid. Signed-off-by: Jiri Slaby Cc: Dmitry Yakunin Cc: Konstantin Khlebnikov Cc: David S. Miller Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/netclassid_cgroup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 67feeb207dad..668330ace961 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -131,10 +131,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, cs->classid = (u32)value; css_task_iter_start(css, 0, &it); - while ((p = css_task_iter_next(&it))) { + while ((p = css_task_iter_next(&it))) update_classid_task(p, cs->classid); - cond_resched(); - } css_task_iter_end(&it); return 0; -- GitLab From ab9dfda232481dcfaf549ce774004d116fc80c13 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 10 May 2020 10:29:03 +0200 Subject: [PATCH 1275/1278] Linux 4.14.180 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d2baacc1b0f6..525565f44b17 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 179 +SUBLEVEL = 180 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 975fd0628f2dc323f4ac0108526f8eb5f2deaeb4 Mon Sep 17 00:00:00 2001 From: Srinivasarao P Date: Mon, 27 Jul 2020 11:36:52 +0530 Subject: [PATCH 1276/1278] Reverting incremental fs changes e282124 ANDROID: Incremental fs: Fix issues with very large files b564958 ANDROID: Incremental fs: Add setattr call fdd560e ANDROID: Incremental fs: Use simple compression in log buffer 4f81903 ANDROID: Incremental fs: Fix create_file performance 3c76e8d ANDROID: Incremental fs: Fix compound page usercopy crash 3aee2b9 ANDROID: Incremental fs: Clean up incfs_test build process e158e20 ANDROID: Incremental fs: make remount log buffer change atomic 467d1f6 ANDROID: Incremental fs: Optimize get_filled_block 91ef6b6 ANDROID: Incremental fs: Fix mislabeled __user ptrs 1b7e2d0 ANDROID: Incremental fs: Use 64-bit int for file_size when writing hash blocks df76f38 Revert "ANDROID: Incremental fs: Fix initialization, use of bitfields" d73d0b4 ANDROID: Incremental fs: Fix remount ecd6f86 ANDROID: Incremental fs: Protect get_fill_block, and add a field 1a00062 ANDROID: Incremental fs: Fix crash polling 0 size read_log df5824e ANDROID: Incremental fs: get_filled_blocks: better index_out a4cad4e ANDROID: Incremental fs: Fix four resource bugs 76f5f1c ANDROID: Incremental fs: Add INCFS_IOC_GET_FILLED_BLOCKS 2d41ac8 ANDROID: Incremental fs: Fix two typos cb94ec7 ANDROID: Incremental fs: Add INCFS_IOC_PERMIT_FILL 758073b ANDROID: Incremental fs: Remove signature checks from kernel 8118f34 ANDROID: Incremental fs: Pad hash blocks dd3909c ANDROID: Incremental fs: Make fill block an ioctl 89e0905 ANDROID: Incremental fs: Remove all access_ok checks ee1d24d ANDROID: Incremental fs: Support xattrs Change-Id: Ib455db0ab788d08e968dcc665e2c9bd98c701b91 Signed-off-by: Srinivasarao P --- fs/incfs/data_mgmt.c | 690 ++++------- fs/incfs/data_mgmt.h | 130 +- fs/incfs/format.c | 126 +- fs/incfs/format.h | 67 +- fs/incfs/integrity.c | 183 ++- fs/incfs/integrity.h | 20 +- fs/incfs/vfs.c | 605 ++++----- include/uapi/linux/incrementalfs.h | 191 +-- .../selftests/filesystems/incfs/Makefile | 17 +- .../selftests/filesystems/incfs/config | 1 + .../selftests/filesystems/incfs/incfs_test.c | 1081 ++++++----------- .../selftests/filesystems/incfs/utils.c | 260 ++-- .../selftests/filesystems/incfs/utils.h | 36 +- 13 files changed, 1365 insertions(+), 2042 deletions(-) create mode 100644 tools/testing/selftests/filesystems/incfs/config diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index d9c43d5cca19..4698f14bbdf7 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -27,19 +27,28 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, return ERR_PTR(-ENOMEM); mi->mi_sb = sb; + mi->mi_options = *options; mi->mi_backing_dir_path = *backing_dir_path; mi->mi_owner = get_current_cred(); path_get(&mi->mi_backing_dir_path); mutex_init(&mi->mi_dir_struct_mutex); mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); - init_waitqueue_head(&mi->mi_log.ml_notif_wq); - spin_lock_init(&mi->mi_log.rl_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); - error = incfs_realloc_mount_info(mi, options); - if (error) - goto err; + if (options->read_log_pages != 0) { + size_t buf_size = PAGE_SIZE * options->read_log_pages; + + spin_lock_init(&mi->mi_log.rl_writer_lock); + init_waitqueue_head(&mi->mi_log.ml_notif_wq); + + mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); + mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); + if (!mi->mi_log.rl_ring_buf) { + error = -ENOMEM; + goto err; + } + } return mi; @@ -48,47 +57,6 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, return ERR_PTR(error); } -int incfs_realloc_mount_info(struct mount_info *mi, - struct mount_options *options) -{ - void *new_buffer = NULL; - void *old_buffer; - size_t new_buffer_size = 0; - - if (options->read_log_pages != mi->mi_options.read_log_pages) { - struct read_log_state log_state; - /* - * Even though having two buffers allocated at once isn't - * usually good, allocating a multipage buffer under a spinlock - * is even worse, so let's optimize for the shorter lock - * duration. It's not end of the world if we fail to increase - * the buffer size anyway. - */ - if (options->read_log_pages > 0) { - new_buffer_size = PAGE_SIZE * options->read_log_pages; - new_buffer = kzalloc(new_buffer_size, GFP_NOFS); - if (!new_buffer) - return -ENOMEM; - } - - spin_lock(&mi->mi_log.rl_lock); - old_buffer = mi->mi_log.rl_ring_buf; - mi->mi_log.rl_ring_buf = new_buffer; - mi->mi_log.rl_size = new_buffer_size; - log_state = (struct read_log_state){ - .generation_id = mi->mi_log.rl_head.generation_id + 1, - }; - mi->mi_log.rl_head = log_state; - mi->mi_log.rl_tail = log_state; - spin_unlock(&mi->mi_log.rl_lock); - - kfree(old_buffer); - } - - mi->mi_options = *options; - return 0; -} - void incfs_free_mount_info(struct mount_info *mi) { if (!mi) @@ -100,8 +68,6 @@ void incfs_free_mount_info(struct mount_info *mi) mutex_destroy(&mi->mi_pending_reads_mutex); put_cred(mi->mi_owner); kfree(mi->mi_log.rl_ring_buf); - kfree(mi->log_xattr); - kfree(mi->pending_read_xattr); kfree(mi); } @@ -119,11 +85,11 @@ static void data_file_segment_destroy(struct data_file_segment *segment) struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) { - struct data_file *df = NULL; - struct backing_file_context *bfc = NULL; + struct data_file *df; + struct backing_file_context *bfc; int md_records; u64 size; - int error = 0; + int error; int i; if (!bf || !mi) @@ -150,8 +116,8 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) error = mutex_lock_interruptible(&bfc->bc_mutex); if (error) goto out; - error = incfs_read_file_header(bfc, &df->df_metadata_off, &df->df_id, - &size, &df->df_header_flags); + error = incfs_read_file_header(bfc, &df->df_metadata_off, + &df->df_id, &size); mutex_unlock(&bfc->bc_mutex); if (error) @@ -159,7 +125,7 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) df->df_size = size; if (size > 0) - df->df_data_block_count = get_blocks_count_for_size(size); + df->df_block_count = get_blocks_count_for_size(size); md_records = incfs_scan_metadata_chain(df); if (md_records < 0) @@ -194,7 +160,7 @@ int make_inode_ready_for_data_ops(struct mount_info *mi, struct file *backing_file) { struct inode_info *node = get_incfs_node(inode); - struct data_file *df = NULL; + struct data_file *df; int err = 0; inode_lock(inode); @@ -215,7 +181,7 @@ int make_inode_ready_for_data_ops(struct mount_info *mi, struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) { - struct dir_file *dir = NULL; + struct dir_file *dir; if (!S_ISDIR(bf->f_inode->i_mode)) return ERR_PTR(-EBADF); @@ -248,120 +214,33 @@ static ssize_t decompress(struct mem_range src, struct mem_range dst) return result; } -static void log_read_one_record(struct read_log *rl, struct read_log_state *rs) -{ - union log_record *record = - (union log_record *)((u8 *)rl->rl_ring_buf + rs->next_offset); - size_t record_size; - - switch (record->full_record.type) { - case FULL: - rs->base_record = record->full_record; - record_size = sizeof(record->full_record); - break; - - case SAME_FILE: - rs->base_record.block_index = - record->same_file_record.block_index; - rs->base_record.absolute_ts_us += - record->same_file_record.relative_ts_us; - record_size = sizeof(record->same_file_record); - break; - - case SAME_FILE_NEXT_BLOCK: - ++rs->base_record.block_index; - rs->base_record.absolute_ts_us += - record->same_file_next_block.relative_ts_us; - record_size = sizeof(record->same_file_next_block); - break; - - case SAME_FILE_NEXT_BLOCK_SHORT: - ++rs->base_record.block_index; - rs->base_record.absolute_ts_us += - record->same_file_next_block_short.relative_ts_us; - record_size = sizeof(record->same_file_next_block_short); - break; - } - - rs->next_offset += record_size; - if (rs->next_offset > rl->rl_size - sizeof(*record)) { - rs->next_offset = 0; - ++rs->current_pass_no; - } - ++rs->current_record_no; -} - static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, - int block_index) + int block_index, bool timed_out) { struct read_log *log = &mi->mi_log; - struct read_log_state *head, *tail; + struct read_log_state state; s64 now_us = ktime_to_us(ktime_get()); - s64 relative_us; - union log_record record; - size_t record_size; + struct read_log_record record = { + .file_id = *id, + .timestamp_us = now_us + }; - spin_lock(&log->rl_lock); - if (log->rl_size == 0) { - spin_unlock(&log->rl_lock); - return; - } + set_block_index(&record, block_index); + set_timed_out(&record, timed_out); - head = &log->rl_head; - tail = &log->rl_tail; - relative_us = now_us - head->base_record.absolute_ts_us; - - if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) || - relative_us >= 1ll << 32) { - record.full_record = (struct full_record){ - .type = FULL, - .block_index = block_index, - .file_id = *id, - .absolute_ts_us = now_us, - }; - record_size = sizeof(struct full_record); - } else if (block_index != head->base_record.block_index + 1 || - relative_us >= 1 << 30) { - record.same_file_record = (struct same_file_record){ - .type = SAME_FILE, - .block_index = block_index, - .relative_ts_us = relative_us, - }; - record_size = sizeof(struct same_file_record); - } else if (relative_us >= 1 << 14) { - record.same_file_next_block = (struct same_file_next_block){ - .type = SAME_FILE_NEXT_BLOCK, - .relative_ts_us = relative_us, - }; - record_size = sizeof(struct same_file_next_block); - } else { - record.same_file_next_block_short = - (struct same_file_next_block_short){ - .type = SAME_FILE_NEXT_BLOCK_SHORT, - .relative_ts_us = relative_us, - }; - record_size = sizeof(struct same_file_next_block_short); - } + if (log->rl_size == 0) + return; - head->base_record.file_id = *id; - head->base_record.block_index = block_index; - head->base_record.absolute_ts_us = now_us; - - /* Advance tail beyond area we are going to overwrite */ - while (tail->current_pass_no < head->current_pass_no && - tail->next_offset < head->next_offset + record_size) - log_read_one_record(log, tail); - - memcpy(((u8 *)log->rl_ring_buf) + head->next_offset, &record, - record_size); - head->next_offset += record_size; - if (head->next_offset > log->rl_size - sizeof(record)) { - head->next_offset = 0; - ++head->current_pass_no; + spin_lock(&log->rl_writer_lock); + state = READ_ONCE(log->rl_state); + log->rl_ring_buf[state.next_index] = record; + if (++state.next_index == log->rl_size) { + state.next_index = 0; + ++state.current_pass_no; } - ++head->current_record_no; + WRITE_ONCE(log->rl_state, state); + spin_unlock(&log->rl_writer_lock); - spin_unlock(&log->rl_lock); wake_up_all(&log->ml_notif_wq); } @@ -370,7 +249,7 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, { u8 digest[INCFS_MAX_HASH_SIZE] = {}; struct mtree *tree = NULL; - struct incfs_df_signature *sig = NULL; + struct ondisk_signature *sig = NULL; struct mem_range calc_digest_rng; struct mem_range saved_digest_rng; struct mem_range root_hash_rng; @@ -393,8 +272,8 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, return res; for (lvl = 0; lvl < tree->depth; lvl++) { - loff_t lvl_off = - tree->hash_level_suboffset[lvl] + sig->hash_offset; + loff_t lvl_off = tree->hash_level_suboffset[lvl] + + sig->mtree_offset; loff_t hash_block_off = lvl_off + round_down(hash_block_index * digest_size, INCFS_DATA_FILE_BLOCK_SIZE); @@ -442,6 +321,72 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, return 0; } +static int revalidate_signature(struct file *bf, struct data_file *df) +{ + struct ondisk_signature *sig = df->df_signature; + struct mem_range root_hash = {}; + int result = 0; + u8 *sig_buf = NULL; + u8 *add_data_buf = NULL; + ssize_t read_res; + + /* File has no signature. */ + if (!sig || !df->df_hash_tree || sig->sig_size == 0) + return 0; + + /* Signature has already been validated. */ + if (df->df_signature_validated) + return 0; + + add_data_buf = kzalloc(sig->add_data_size, GFP_NOFS); + if (!add_data_buf) { + result = -ENOMEM; + goto out; + } + + read_res = incfs_kread(bf, add_data_buf, sig->add_data_size, + sig->add_data_offset); + if (read_res < 0) { + result = read_res; + goto out; + } + if (read_res != sig->add_data_size) { + result = -EIO; + goto out; + } + + sig_buf = kzalloc(sig->sig_size, GFP_NOFS); + if (!sig_buf) { + result = -ENOMEM; + goto out; + } + + read_res = incfs_kread(bf, sig_buf, sig->sig_size, sig->sig_offset); + if (read_res < 0) { + result = read_res; + goto out; + } + if (read_res != sig->sig_size) { + result = -EIO; + goto out; + } + + root_hash = range(df->df_hash_tree->root_hash, + df->df_hash_tree->alg->digest_size); + + result = incfs_validate_pkcs7_signature( + range(sig_buf, sig->sig_size), + root_hash, + range(add_data_buf, sig->add_data_size)); + + if (result == 0) + df->df_signature_validated = true; +out: + kfree(sig_buf); + kfree(add_data_buf); + return result; +} + static struct data_file_segment *get_file_segment(struct data_file *df, int block_index) { @@ -456,28 +401,13 @@ static bool is_data_block_present(struct data_file_block *block) (block->db_stored_size != 0); } -static void convert_data_file_block(struct incfs_blockmap_entry *bme, - struct data_file_block *res_block) -{ - u16 flags = le16_to_cpu(bme->me_flags); - - res_block->db_backing_file_data_offset = - le16_to_cpu(bme->me_data_offset_hi); - res_block->db_backing_file_data_offset <<= 32; - res_block->db_backing_file_data_offset |= - le32_to_cpu(bme->me_data_offset_lo); - res_block->db_stored_size = le16_to_cpu(bme->me_data_size); - res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? - COMPRESSION_LZ4 : - COMPRESSION_NONE; -} - static int get_data_file_block(struct data_file *df, int index, struct data_file_block *res_block) { struct incfs_blockmap_entry bme = {}; struct backing_file_context *bfc = NULL; loff_t blockmap_off = 0; + u16 flags = 0; int error = 0; if (!df || !res_block) @@ -486,184 +416,26 @@ static int get_data_file_block(struct data_file *df, int index, blockmap_off = df->df_blockmap_off; bfc = df->df_backing_file_context; - if (index < 0 || blockmap_off == 0) + if (index < 0 || index >= df->df_block_count || blockmap_off == 0) return -EINVAL; error = incfs_read_blockmap_entry(bfc, index, blockmap_off, &bme); if (error) return error; - convert_data_file_block(&bme, res_block); - return 0; -} - -static int check_room_for_one_range(u32 size, u32 size_out) -{ - if (size_out + sizeof(struct incfs_filled_range) > size) - return -ERANGE; - return 0; -} - -static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, - u32 size, u32 *size_out) -{ - int error = check_room_for_one_range(size, *size_out); - if (error) - return error; - - if (copy_to_user(((char __user *)buffer) + *size_out, range, - sizeof(*range))) - return -EFAULT; - - *size_out += sizeof(*range); + flags = le16_to_cpu(bme.me_flags); + res_block->db_backing_file_data_offset = + le16_to_cpu(bme.me_data_offset_hi); + res_block->db_backing_file_data_offset <<= 32; + res_block->db_backing_file_data_offset |= + le32_to_cpu(bme.me_data_offset_lo); + res_block->db_stored_size = le16_to_cpu(bme.me_data_size); + res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? + COMPRESSION_LZ4 : + COMPRESSION_NONE; return 0; } -static int update_file_header_flags(struct data_file *df, u32 bits_to_reset, - u32 bits_to_set) -{ - int result; - u32 new_flags; - struct backing_file_context *bfc; - - if (!df) - return -EFAULT; - bfc = df->df_backing_file_context; - if (!bfc) - return -EFAULT; - - result = mutex_lock_interruptible(&bfc->bc_mutex); - if (result) - return result; - - new_flags = (df->df_header_flags & ~bits_to_reset) | bits_to_set; - if (new_flags != df->df_header_flags) { - df->df_header_flags = new_flags; - result = incfs_write_file_header_flags(bfc, new_flags); - } - - mutex_unlock(&bfc->bc_mutex); - - return result; -} - -#define READ_BLOCKMAP_ENTRIES 512 -int incfs_get_filled_blocks(struct data_file *df, - struct incfs_get_filled_blocks_args *arg) -{ - int error = 0; - bool in_range = false; - struct incfs_filled_range range; - void __user *buffer = u64_to_user_ptr(arg->range_buffer); - u32 size = arg->range_buffer_size; - u32 end_index = - arg->end_index ? arg->end_index : df->df_total_block_count; - u32 *size_out = &arg->range_buffer_size_out; - int i = READ_BLOCKMAP_ENTRIES - 1; - int entries_read = 0; - struct incfs_blockmap_entry *bme; - - *size_out = 0; - if (end_index > df->df_total_block_count) - end_index = df->df_total_block_count; - arg->total_blocks_out = df->df_total_block_count; - arg->data_blocks_out = df->df_data_block_count; - - if (df->df_header_flags & INCFS_FILE_COMPLETE) { - pr_debug("File marked full, fast get_filled_blocks"); - if (arg->start_index > end_index) { - arg->index_out = arg->start_index; - return 0; - } - arg->index_out = arg->start_index; - - error = check_room_for_one_range(size, *size_out); - if (error) - return error; - - range = (struct incfs_filled_range){ - .begin = arg->start_index, - .end = end_index, - }; - - error = copy_one_range(&range, buffer, size, size_out); - if (error) - return error; - arg->index_out = end_index; - return 0; - } - - bme = kzalloc(sizeof(*bme) * READ_BLOCKMAP_ENTRIES, - GFP_NOFS | __GFP_COMP); - if (!bme) - return -ENOMEM; - - for (arg->index_out = arg->start_index; arg->index_out < end_index; - ++arg->index_out) { - struct data_file_block dfb; - - if (++i == READ_BLOCKMAP_ENTRIES) { - entries_read = incfs_read_blockmap_entries( - df->df_backing_file_context, bme, - arg->index_out, READ_BLOCKMAP_ENTRIES, - df->df_blockmap_off); - if (entries_read < 0) { - error = entries_read; - break; - } - - i = 0; - } - - if (i >= entries_read) { - error = -EIO; - break; - } - - convert_data_file_block(bme + i, &dfb); - - if (is_data_block_present(&dfb) == in_range) - continue; - - if (!in_range) { - error = check_room_for_one_range(size, *size_out); - if (error) - break; - in_range = true; - range.begin = arg->index_out; - } else { - range.end = arg->index_out; - error = copy_one_range(&range, buffer, size, size_out); - if (error) { - /* there will be another try out of the loop, - * it will reset the index_out if it fails too - */ - break; - } - in_range = false; - } - } - - if (in_range) { - range.end = arg->index_out; - error = copy_one_range(&range, buffer, size, size_out); - if (error) - arg->index_out = range.begin; - } - - if (!error && in_range && arg->start_index == 0 && - end_index == df->df_total_block_count && - *size_out == sizeof(struct incfs_filled_range)) { - int result = - update_file_header_flags(df, 0, INCFS_FILE_COMPLETE); - /* Log failure only, since it's just a failed optimization */ - pr_debug("Marked file full with result %d", result); - } - - kfree(bme); - return error; -} - static bool is_read_done(struct pending_read *read) { return atomic_read_acquire(&read->done) != 0; @@ -763,7 +535,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (!df || !res_block) return -EFAULT; - if (block_index < 0 || block_index >= df->df_data_block_count) + if (block_index < 0 || block_index >= df->df_block_count) return -EINVAL; if (df->df_blockmap_off <= 0) @@ -794,7 +566,8 @@ static int wait_for_data_block(struct data_file *df, int block_index, mi = df->df_mount_info; if (timeout_ms == 0) { - log_block_read(mi, &df->df_id, block_index); + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); return -ETIME; } @@ -813,7 +586,8 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (wait_res == 0) { /* Wait has timed out */ - log_block_read(mi, &df->df_id, block_index); + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); return -ETIME; } if (wait_res < 0) { @@ -908,15 +682,22 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, result = err; } + if (result > 0) { + int err = revalidate_signature(bf, df); + + if (err < 0) + result = err; + } + if (result >= 0) - log_block_read(mi, &df->df_id, index); + log_block_read(mi, &df->df_id, index, false /*timed out*/); out: return result; } int incfs_process_new_data_block(struct data_file *df, - struct incfs_fill_block *block, u8 *data) + struct incfs_new_data_block *block, u8 *data) { struct mount_info *mi = NULL; struct backing_file_context *bfc = NULL; @@ -931,7 +712,7 @@ int incfs_process_new_data_block(struct data_file *df, bfc = df->df_backing_file_context; mi = df->df_mount_info; - if (block->block_index >= df->df_data_block_count) + if (block->block_index >= df->df_block_count) return -ERANGE; segment = get_file_segment(df, block->block_index); @@ -973,7 +754,7 @@ int incfs_process_new_data_block(struct data_file *df, int incfs_read_file_signature(struct data_file *df, struct mem_range dst) { struct file *bf = df->df_backing_file_context->bc_file; - struct incfs_df_signature *sig; + struct ondisk_signature *sig; int read_res = 0; if (!dst.data) @@ -998,12 +779,12 @@ int incfs_read_file_signature(struct data_file *df, struct mem_range dst) } int incfs_process_new_hash_block(struct data_file *df, - struct incfs_fill_block *block, u8 *data) + struct incfs_new_data_block *block, u8 *data) { struct backing_file_context *bfc = NULL; struct mount_info *mi = NULL; struct mtree *hash_tree = NULL; - struct incfs_df_signature *sig = NULL; + struct ondisk_signature *sig = NULL; loff_t hash_area_base = 0; loff_t hash_area_size = 0; int error = 0; @@ -1022,11 +803,11 @@ int incfs_process_new_hash_block(struct data_file *df, hash_tree = df->df_hash_tree; sig = df->df_signature; - if (!hash_tree || !sig || sig->hash_offset == 0) + if (!hash_tree || !sig || sig->mtree_offset == 0) return -ENOTSUPP; - hash_area_base = sig->hash_offset; - hash_area_size = sig->hash_size; + hash_area_base = sig->mtree_offset; + hash_area_size = sig->mtree_size; if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE + block->data_len) { /* Hash block goes beyond dedicated hash area of this file. */ @@ -1037,7 +818,7 @@ int incfs_process_new_hash_block(struct data_file *df, if (!error) error = incfs_write_hash_block_to_backing_file( bfc, range(data, block->data_len), block->block_index, - hash_area_base, df->df_blockmap_off, df->df_size); + hash_area_base); mutex_unlock(&bfc->bc_mutex); return error; } @@ -1053,10 +834,9 @@ static int process_blockmap_md(struct incfs_blockmap *bm, if (!df) return -EFAULT; - if (df->df_data_block_count > block_count) + if (df->df_block_count != block_count) return -EBADMSG; - df->df_total_block_count = block_count; df->df_blockmap_off = base_off; return error; } @@ -1085,69 +865,58 @@ static int process_file_signature_md(struct incfs_file_signature *sg, { struct data_file *df = handler->context; struct mtree *hash_tree = NULL; + struct ondisk_signature *signature = NULL; int error = 0; - struct incfs_df_signature *signature = - kzalloc(sizeof(*signature), GFP_NOFS); - void *buf = NULL; - ssize_t read; - - if (!df || !df->df_backing_file_context || - !df->df_backing_file_context->bc_file) { - error = -ENOENT; - goto out; - } + loff_t base_tree_off = le64_to_cpu(sg->sg_hash_tree_offset); + u32 tree_size = le32_to_cpu(sg->sg_hash_tree_size); + loff_t sig_off = le64_to_cpu(sg->sg_sig_offset); + u32 sig_size = le32_to_cpu(sg->sg_sig_size); + loff_t add_data_off = le64_to_cpu(sg->sg_add_data_offset); + u32 add_data_size = le32_to_cpu(sg->sg_add_data_size); - signature->hash_offset = le64_to_cpu(sg->sg_hash_tree_offset); - signature->hash_size = le32_to_cpu(sg->sg_hash_tree_size); - signature->sig_offset = le64_to_cpu(sg->sg_sig_offset); - signature->sig_size = le32_to_cpu(sg->sg_sig_size); + if (!df) + return -ENOENT; - buf = kzalloc(signature->sig_size, GFP_NOFS); - if (!buf) { + signature = kzalloc(sizeof(*signature), GFP_NOFS); + if (!signature) { error = -ENOMEM; goto out; } - read = incfs_kread(df->df_backing_file_context->bc_file, buf, - signature->sig_size, signature->sig_offset); - if (read < 0) { - error = read; - goto out; - } + signature->add_data_offset = add_data_off; + signature->add_data_size = add_data_size; + signature->sig_offset = sig_off; + signature->sig_size = sig_size; + signature->mtree_offset = base_tree_off; + signature->mtree_size = tree_size; - if (read != signature->sig_size) { - error = -EINVAL; - goto out; - } - - hash_tree = incfs_alloc_mtree(range(buf, signature->sig_size), - df->df_data_block_count); + hash_tree = incfs_alloc_mtree(sg->sg_hash_alg, df->df_block_count, + range(sg->sg_root_hash, sizeof(sg->sg_root_hash))); if (IS_ERR(hash_tree)) { error = PTR_ERR(hash_tree); hash_tree = NULL; goto out; } - if (hash_tree->hash_tree_area_size != signature->hash_size) { + if (hash_tree->hash_tree_area_size != tree_size) { error = -EINVAL; goto out; } - if (signature->hash_size > 0 && - handler->md_record_offset <= signature->hash_offset) { + if (tree_size > 0 && handler->md_record_offset <= base_tree_off) { error = -EINVAL; goto out; } - if (handler->md_record_offset <= signature->sig_offset) { + if (handler->md_record_offset <= signature->add_data_offset || + handler->md_record_offset <= signature->sig_offset) { error = -EINVAL; goto out; } df->df_hash_tree = hash_tree; - hash_tree = NULL; df->df_signature = signature; - signature = NULL; out: - incfs_free_mtree(hash_tree); - kfree(signature); - kfree(buf); + if (error) { + incfs_free_mtree(hash_tree); + kfree(signature); + } return error; } @@ -1203,17 +972,6 @@ int incfs_scan_metadata_chain(struct data_file *df) result = records_count; } mutex_unlock(&bfc->bc_mutex); - - if (df->df_hash_tree) { - int hash_block_count = get_blocks_count_for_size( - df->df_hash_tree->hash_tree_area_size); - - if (df->df_data_block_count + hash_block_count != - df->df_total_block_count) - result = -EINVAL; - } else if (df->df_data_block_count != df->df_total_block_count) - result = -EINVAL; - out: kfree(handler); return result; @@ -1279,29 +1037,36 @@ struct read_log_state incfs_get_log_state(struct mount_info *mi) struct read_log *log = &mi->mi_log; struct read_log_state result; - spin_lock(&log->rl_lock); - result = log->rl_head; - spin_unlock(&log->rl_lock); + spin_lock(&log->rl_writer_lock); + result = READ_ONCE(log->rl_state); + spin_unlock(&log->rl_writer_lock); return result; } +static u64 calc_record_count(const struct read_log_state *state, int rl_size) +{ + return state->current_pass_no * (u64)rl_size + state->next_index; +} + int incfs_get_uncollected_logs_count(struct mount_info *mi, - const struct read_log_state *state) + struct read_log_state state) { struct read_log *log = &mi->mi_log; - u32 generation; - u64 head_no, tail_no; - - spin_lock(&log->rl_lock); - tail_no = log->rl_tail.current_record_no; - head_no = log->rl_head.current_record_no; - generation = log->rl_head.generation_id; - spin_unlock(&log->rl_lock); - - if (generation != state->generation_id) - return head_no - tail_no; - else - return head_no - max_t(u64, tail_no, state->current_record_no); + + u64 count = calc_record_count(&log->rl_state, log->rl_size) - + calc_record_count(&state, log->rl_size); + return min_t(int, count, log->rl_size); +} + +static void fill_pending_read_from_log_record( + struct incfs_pending_read_info *dest, const struct read_log_record *src, + struct read_log_state *state, u64 log_size) +{ + dest->file_id = src->file_id; + dest->block_index = get_block_index(src); + dest->serial_number = + state->current_pass_no * log_size + state->next_index; + dest->timestamp_us = src->timestamp_us; } int incfs_collect_logged_reads(struct mount_info *mi, @@ -1309,47 +1074,58 @@ int incfs_collect_logged_reads(struct mount_info *mi, struct incfs_pending_read_info *reads, int reads_size) { - int dst_idx; struct read_log *log = &mi->mi_log; - struct read_log_state *head, *tail; + struct read_log_state live_state = incfs_get_log_state(mi); + u64 read_count = calc_record_count(reader_state, log->rl_size); + u64 written_count = calc_record_count(&live_state, log->rl_size); + int dst_idx; - spin_lock(&log->rl_lock); - head = &log->rl_head; - tail = &log->rl_tail; + if (reader_state->next_index >= log->rl_size || + read_count > written_count) + return -ERANGE; - if (reader_state->generation_id != head->generation_id) { - pr_debug("read ptr is wrong generation: %u/%u", - reader_state->generation_id, head->generation_id); + if (read_count == written_count) + return 0; - *reader_state = (struct read_log_state){ - .generation_id = head->generation_id, - }; + if (read_count > written_count) { + /* This reader is somehow ahead of the writer. */ + pr_debug("incfs: Log reader is ahead of writer\n"); + *reader_state = live_state; } - if (reader_state->current_record_no < tail->current_record_no) { - pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n", - (u32)reader_state->next_offset, - (u32)reader_state->current_pass_no, - (u32)tail->next_offset, (u32)tail->current_pass_no); + if (written_count - read_count > log->rl_size) { + /* + * Reading pointer is too far behind, + * start from the record following the write pointer. + */ + pr_debug("incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", + (u32)reader_state->next_index, + (u32)reader_state->current_pass_no, + (u32)live_state.next_index, + (u32)live_state.current_pass_no - 1, (u32)log->rl_size); - *reader_state = *tail; + *reader_state = (struct read_log_state){ + .next_index = live_state.next_index, + .current_pass_no = live_state.current_pass_no - 1, + }; } for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { - if (reader_state->current_record_no == head->current_record_no) + if (reader_state->next_index == live_state.next_index && + reader_state->current_pass_no == live_state.current_pass_no) break; - log_read_one_record(log, reader_state); + fill_pending_read_from_log_record( + &reads[dst_idx], + &log->rl_ring_buf[reader_state->next_index], + reader_state, log->rl_size); - reads[dst_idx] = (struct incfs_pending_read_info){ - .file_id = reader_state->base_record.file_id, - .block_index = reader_state->base_record.block_index, - .serial_number = reader_state->current_record_no, - .timestamp_us = reader_state->base_record.absolute_ts_us - }; + reader_state->next_index++; + if (reader_state->next_index == log->rl_size) { + reader_state->next_index = 0; + reader_state->current_pass_no++; + } } - - spin_unlock(&log->rl_lock); return dst_idx; } diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index b7aecdd5bf4a..6722cef1608c 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -20,74 +20,63 @@ #define SEGMENTS_PER_FILE 3 -enum LOG_RECORD_TYPE { - FULL, - SAME_FILE, - SAME_FILE_NEXT_BLOCK, - SAME_FILE_NEXT_BLOCK_SHORT, -}; +struct read_log_record { + u32 bitfield; + + u64 timestamp_us; -struct full_record { - enum LOG_RECORD_TYPE type : 2; /* FULL */ - u32 block_index : 30; incfs_uuid_t file_id; - u64 absolute_ts_us; -} __packed; /* 28 bytes */ - -struct same_file_record { - enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */ - u32 block_index : 30; - u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */ -} __packed; /* 12 bytes */ - -struct same_file_next_block { - enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */ - u32 relative_ts_us : 30; /* max 2^30 us ~= 15 min (17:50) */ -} __packed; /* 4 bytes */ - -struct same_file_next_block_short { - enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK_SHORT */ - u16 relative_ts_us : 14; /* max 2^14 us ~= 16 ms */ -} __packed; /* 2 bytes */ - -union log_record { - struct full_record full_record; - struct same_file_record same_file_record; - struct same_file_next_block same_file_next_block; - struct same_file_next_block_short same_file_next_block_short; -}; +} __packed; -struct read_log_state { - /* Log buffer generation id, incremented on configuration changes */ - u32 generation_id; +#define RLR_BLOCK_INDEX_MASK 0x7fff +#define RLR_TIMED_OUT_MASK 0x8000 - /* Offset in rl_ring_buf to write into. */ - u32 next_offset; +static inline u32 get_block_index(const struct read_log_record *rlr) +{ + return rlr->bitfield & RLR_BLOCK_INDEX_MASK; +} - /* Current number of writer passes over rl_ring_buf */ - u32 current_pass_no; +static inline void set_block_index(struct read_log_record *rlr, + u32 block_index) +{ + rlr->bitfield = (rlr->bitfield & ~RLR_BLOCK_INDEX_MASK) + | (block_index & RLR_BLOCK_INDEX_MASK); +} + +static inline bool get_timed_out(const struct read_log_record *rlr) +{ + return (rlr->bitfield & RLR_TIMED_OUT_MASK) == RLR_TIMED_OUT_MASK; +} + +static inline void set_timed_out(struct read_log_record *rlr, bool timed_out) +{ + if (timed_out) + rlr->bitfield |= RLR_TIMED_OUT_MASK; + else + rlr->bitfield &= ~RLR_TIMED_OUT_MASK; +} - /* Current full_record to diff against */ - struct full_record base_record; +struct read_log_state { + /* Next slot in rl_ring_buf to write to. */ + u32 next_index; - /* Current record number counting from configuration change */ - u64 current_record_no; + /* Current number of writer pass over rl_ring_buf */ + u32 current_pass_no; }; /* A ring buffer to save records about data blocks which were recently read. */ struct read_log { - void *rl_ring_buf; - - int rl_size; + struct read_log_record *rl_ring_buf; - struct read_log_state rl_head; + struct read_log_state rl_state; - struct read_log_state rl_tail; + spinlock_t rl_writer_lock; - /* A lock to protect the above fields */ - spinlock_t rl_lock; + int rl_size; - /* A queue of waiters who want to be notified about reads */ + /* + * A queue of waiters who want to be notified about reads. + */ wait_queue_head_t ml_notif_wq; }; @@ -142,12 +131,6 @@ struct mount_info { /* Temporary buffer for read logger. */ struct read_log mi_log; - - void *log_xattr; - size_t log_xattr_size; - - void *pending_read_xattr; - size_t pending_read_xattr_size; }; struct data_file_block { @@ -220,20 +203,16 @@ struct data_file { /* File size in bytes */ loff_t df_size; - /* File header flags */ - u32 df_header_flags; - - /* File size in DATA_FILE_BLOCK_SIZE blocks */ - int df_data_block_count; - - /* Total number of blocks, data + hash */ - int df_total_block_count; + int df_block_count; /* File size in DATA_FILE_BLOCK_SIZE blocks */ struct file_attr n_attr; struct mtree *df_hash_tree; - struct incfs_df_signature *df_signature; + struct ondisk_signature *df_signature; + + /* True, if file signature has already been validated. */ + bool df_signature_validated; }; struct dir_file { @@ -260,9 +239,6 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, struct mount_options *options, struct path *backing_dir_path); -int incfs_realloc_mount_info(struct mount_info *mi, - struct mount_options *options); - void incfs_free_mount_info(struct mount_info *mi); struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); @@ -277,16 +253,14 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, int index, int timeout_ms, struct mem_range tmp); -int incfs_get_filled_blocks(struct data_file *df, - struct incfs_get_filled_blocks_args *arg); - int incfs_read_file_signature(struct data_file *df, struct mem_range dst); int incfs_process_new_data_block(struct data_file *df, - struct incfs_fill_block *block, u8 *data); + struct incfs_new_data_block *block, u8 *data); int incfs_process_new_hash_block(struct data_file *df, - struct incfs_fill_block *block, u8 *data); + struct incfs_new_data_block *block, u8 *data); + bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); @@ -305,7 +279,7 @@ int incfs_collect_logged_reads(struct mount_info *mi, int reads_size); struct read_log_state incfs_get_log_state(struct mount_info *mi); int incfs_get_uncollected_logs_count(struct mount_info *mi, - const struct read_log_state *state); + struct read_log_state state); static inline struct inode_info *get_incfs_node(struct inode *inode) { @@ -323,7 +297,7 @@ static inline struct inode_info *get_incfs_node(struct inode *inode) static inline struct data_file *get_incfs_data_file(struct file *f) { - struct inode_info *node = NULL; + struct inode_info *node; if (!f) return NULL; diff --git a/fs/incfs/format.c b/fs/incfs/format.c index c56e559b6893..db71f527cf36 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -13,7 +13,6 @@ #include #include "format.h" -#include "data_mgmt.h" struct backing_file_context *incfs_alloc_bfc(struct file *backing_file) { @@ -94,6 +93,7 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) { loff_t file_size = 0; loff_t new_last_byte_offset = 0; + int res = 0; if (!bfc) return -EFAULT; @@ -110,18 +110,28 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) */ file_size = incfs_get_end_offset(bfc->bc_file); new_last_byte_offset = file_size + len - 1; - return vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + res = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + if (res) + return res; + + res = vfs_fsync_range(bfc->bc_file, file_size, file_size + len, 1); + return res; } static int write_to_bf(struct backing_file_context *bfc, const void *buf, - size_t count, loff_t pos) + size_t count, loff_t pos, bool sync) { - ssize_t res = incfs_kwrite(bfc->bc_file, buf, count, pos); + ssize_t res = 0; + res = incfs_kwrite(bfc->bc_file, buf, count, pos); if (res < 0) return res; if (res != count) return -EIO; + + if (sync) + return vfs_fsync_range(bfc->bc_file, pos, pos + count, 1); + return 0; } @@ -175,7 +185,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, /* Write the metadata record to the end of the backing file */ record_offset = file_pos; new_md_offset = cpu_to_le64(record_offset); - result = write_to_bf(bfc, record, record_size, file_pos); + result = write_to_bf(bfc, record, record_size, file_pos, true); if (result) return result; @@ -196,7 +206,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, fh_first_md_offset); } result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset), - file_pos); + file_pos, true); if (result) return result; @@ -204,22 +214,12 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, return result; } -int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags) -{ - if (!bfc) - return -EFAULT; - - return write_to_bf(bfc, &flags, sizeof(flags), - offsetof(struct incfs_file_header, - fh_file_header_flags)); -} - /* * Reserve 0-filled space for the blockmap body, and append * incfs_blockmap metadata record pointing to it. */ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, - u32 block_count) + u32 block_count, loff_t *map_base_off) { struct incfs_blockmap blockmap = {}; int result = 0; @@ -245,9 +245,12 @@ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, /* Write blockmap metadata record pointing to the body written above. */ blockmap.m_base_offset = cpu_to_le64(file_end); result = append_md_to_backing_file(bfc, &blockmap.m_header); - if (result) + if (result) { /* Error, rollback file changes */ truncate_backing_file(bfc, file_end); + } else if (map_base_off) { + *map_base_off = file_end; + } return result; } @@ -280,7 +283,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, file_attr.fa_offset = cpu_to_le64(value_offset); file_attr.fa_crc = cpu_to_le32(crc); - result = write_to_bf(bfc, value.data, value.len, value_offset); + result = write_to_bf(bfc, value.data, value.len, value_offset, true); if (result) return result; @@ -296,7 +299,9 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, } int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, - struct mem_range sig, u32 tree_size) + u8 hash_alg, u32 tree_size, + struct mem_range root_hash, struct mem_range add_data, + struct mem_range sig) { struct incfs_file_signature sg = {}; int result = 0; @@ -306,6 +311,8 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, if (!bfc) return -EFAULT; + if (root_hash.len > sizeof(sg.sg_root_hash)) + return -E2BIG; LOCK_REQUIRED(bfc->bc_mutex); @@ -314,19 +321,32 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); sg.sg_header.h_next_md_offset = cpu_to_le64(0); + sg.sg_hash_alg = hash_alg; if (sig.data != NULL && sig.len > 0) { loff_t pos = incfs_get_end_offset(bfc->bc_file); sg.sg_sig_size = cpu_to_le32(sig.len); sg.sg_sig_offset = cpu_to_le64(pos); - result = write_to_bf(bfc, sig.data, sig.len, pos); + result = write_to_bf(bfc, sig.data, sig.len, pos, false); + if (result) + goto err; + } + + if (add_data.len > 0) { + loff_t pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_add_data_size = cpu_to_le32(add_data.len); + sg.sg_add_data_offset = cpu_to_le64(pos); + + result = write_to_bf(bfc, add_data.data, + add_data.len, pos, false); if (result) goto err; } tree_area_pos = incfs_get_end_offset(bfc->bc_file); - if (tree_size > 0) { + if (hash_alg && tree_size > 0) { if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { /* * If hash tree is big enough, it makes sense to @@ -349,13 +369,15 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_hash_tree_size = cpu_to_le32(tree_size); sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); } + memcpy(sg.sg_root_hash, root_hash.data, root_hash.len); /* Write a hash tree metadata record pointing to the hash tree above. */ result = append_md_to_backing_file(bfc, &sg.sg_header); err: - if (result) + if (result) { /* Error, rollback file changes */ truncate_backing_file(bfc, rollback_pos); + } return result; } @@ -389,7 +411,7 @@ int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, if (file_pos != 0) return -EEXIST; - return write_to_bf(bfc, &fh, sizeof(fh), file_pos); + return write_to_bf(bfc, &fh, sizeof(fh), file_pos, true); } /* Write a given data block and update file's blockmap to point it. */ @@ -418,7 +440,7 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, } /* Write the block data at the end of the backing file. */ - result = write_to_bf(bfc, block.data, block.len, data_offset); + result = write_to_bf(bfc, block.data, block.len, data_offset, false); if (result) return result; @@ -428,25 +450,18 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, bm_entry.me_data_size = cpu_to_le16((u16)block.len); bm_entry.me_flags = cpu_to_le16(flags); - return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), - bm_entry_off); + result = write_to_bf(bfc, &bm_entry, sizeof(bm_entry), + bm_entry_off, false); + return result; } int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, - struct mem_range block, - int block_index, - loff_t hash_area_off, - loff_t bm_base_off, - loff_t file_size) + struct mem_range block, + int block_index, loff_t hash_area_off) { - struct incfs_blockmap_entry bm_entry = {}; - int result; loff_t data_offset = 0; loff_t file_end = 0; - loff_t bm_entry_off = - bm_base_off + - sizeof(struct incfs_blockmap_entry) * - (block_index + get_blocks_count_for_size(file_size)); + if (!bfc) return -EFAULT; @@ -460,16 +475,7 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, return -EINVAL; } - result = write_to_bf(bfc, block.data, block.len, data_offset); - if (result) - return result; - - bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); - bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); - bm_entry.me_data_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); - bm_entry.me_flags = cpu_to_le16(INCFS_BLOCK_HASH); - - return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off); + return write_to_bf(bfc, block.data, block.len, data_offset, false); } /* Initialize a new image in a given backing file. */ @@ -499,19 +505,8 @@ int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, loff_t bm_base_off, struct incfs_blockmap_entry *bm_entry) { - int error = incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, - bm_base_off); - - if (error < 0) - return error; - - if (error == 0) - return -EIO; - - if (error != 1) - return -EFAULT; - - return 0; + return incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, + bm_base_off); } int incfs_read_blockmap_entries(struct backing_file_context *bfc, @@ -535,12 +530,15 @@ int incfs_read_blockmap_entries(struct backing_file_context *bfc, bm_entry_off); if (result < 0) return result; - return result / sizeof(*entries); + if (result < bytes_to_read) + return -EIO; + return 0; } + int incfs_read_file_header(struct backing_file_context *bfc, loff_t *first_md_off, incfs_uuid_t *uuid, - u64 *file_size, u32 *flags) + u64 *file_size) { ssize_t bytes_read = 0; struct incfs_file_header fh = {}; @@ -574,8 +572,6 @@ int incfs_read_file_header(struct backing_file_context *bfc, *uuid = fh.fh_uuid; if (file_size) *file_size = le64_to_cpu(fh.fh_file_size); - if (flags) - *flags = le32_to_cpu(fh.fh_file_header_flags); return 0; } diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 1a83349bb2eb..a86881482e19 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -121,10 +121,6 @@ enum incfs_metadata_type { INCFS_MD_SIGNATURE = 3 }; -enum incfs_file_header_flags { - INCFS_FILE_COMPLETE = 1 << 0, -}; - /* Header included at the beginning of all metadata records on the disk. */ struct incfs_md_header { __u8 h_md_entry_type; @@ -163,8 +159,8 @@ struct incfs_file_header { /* INCFS_DATA_FILE_BLOCK_SIZE */ __le16 fh_data_block_size; - /* File flags, from incfs_file_header_flags */ - __le32 fh_file_header_flags; + /* Padding, also reserved for future use. */ + __le32 fh_dummy; /* Offset of the first metadata record */ __le64 fh_first_md_offset; @@ -182,7 +178,6 @@ struct incfs_file_header { enum incfs_block_map_entry_flags { INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), - INCFS_BLOCK_HASH = (1 << 1), }; /* Block map entry pointing to an actual location of the data block. */ @@ -222,26 +217,26 @@ struct incfs_file_attr { __le32 fa_crc; } __packed; -/* Metadata record for file signature. Type = INCFS_MD_SIGNATURE */ +/* Metadata record for file attribute. Type = INCFS_MD_SIGNATURE */ struct incfs_file_signature { struct incfs_md_header sg_header; - __le32 sg_sig_size; /* The size of the signature. */ - - __le64 sg_sig_offset; /* Signature's offset in the backing file */ + __u8 sg_hash_alg; /* Value from incfs_hash_tree_algorithm */ __le32 sg_hash_tree_size; /* The size of the hash tree. */ __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ -} __packed; -/* In memory version of above */ -struct incfs_df_signature { - u32 sig_size; - u64 sig_offset; - u32 hash_size; - u64 hash_offset; -}; + __u8 sg_root_hash[INCFS_MAX_HASH_SIZE]; + + __le32 sg_sig_size; /* The size of the pkcs7 signature. */ + + __le64 sg_sig_offset; /* pkcs7 signature's offset in the backing file */ + + __le32 sg_add_data_size; /* The size of the additional data. */ + + __le64 sg_add_data_offset; /* Additional data's offset */ +} __packed; /* State of the backing file. */ struct backing_file_context { @@ -258,6 +253,23 @@ struct backing_file_context { loff_t bc_last_md_record_offset; }; + +/* Backing file locations of things required for signature validation. */ +struct ondisk_signature { + + loff_t add_data_offset; /* Additional data's offset */ + + loff_t sig_offset; /* pkcs7 signature's offset in the backing file */ + + loff_t mtree_offset; /* Backing file offset of the hash tree. */ + + u32 add_data_size; /* The size of the additional data. */ + + u32 sig_size; /* The size of the pkcs7 signature. */ + + u32 mtree_size; /* The size of the hash tree. */ +}; + struct metadata_handler { loff_t md_record_offset; loff_t md_prev_record_offset; @@ -289,7 +301,7 @@ void incfs_free_bfc(struct backing_file_context *bfc); /* Writing stuff */ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, - u32 block_count); + u32 block_count, loff_t *map_base_off); int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); @@ -300,19 +312,16 @@ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, u16 flags); int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, - struct mem_range block, - int block_index, - loff_t hash_area_off, - loff_t bm_base_off, - loff_t file_size); + struct mem_range block, + int block_index, loff_t hash_area_off); int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, struct mem_range value, struct incfs_file_attr *attr); int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, - struct mem_range sig, u32 tree_size); - -int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags); + u8 hash_alg, u32 tree_size, + struct mem_range root_hash, struct mem_range add_data, + struct mem_range sig); int incfs_make_empty_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); @@ -320,7 +329,7 @@ int incfs_make_empty_backing_file(struct backing_file_context *bfc, /* Reading stuff */ int incfs_read_file_header(struct backing_file_context *bfc, loff_t *first_md_off, incfs_uuid_t *uuid, - u64 *file_size, u32 *flags); + u64 *file_size); int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, loff_t bm_base_off, diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index d049988ef037..feb212c38945 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -10,6 +10,70 @@ #include "integrity.h" +int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, + struct mem_range root_hash, struct mem_range add_data) +{ + struct pkcs7_message *pkcs7 = NULL; + const void *data = NULL; + size_t data_len = 0; + const char *p; + int err; + + pkcs7 = pkcs7_parse_message(pkcs7_blob.data, pkcs7_blob.len); + if (IS_ERR(pkcs7)) { + pr_debug("PKCS#7 parsing error. ptr=%p size=%ld err=%ld\n", + pkcs7_blob.data, pkcs7_blob.len, -PTR_ERR(pkcs7)); + return PTR_ERR(pkcs7); + } + + err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL); + if (err || data_len == 0 || data == NULL) { + pr_debug("PKCS#7 message does not contain data\n"); + err = -EBADMSG; + goto out; + } + + if (root_hash.len == 0) { + pr_debug("Root hash is empty.\n"); + err = -EBADMSG; + goto out; + } + + if (data_len != root_hash.len + add_data.len) { + pr_debug("PKCS#7 data size doesn't match arguments.\n"); + err = -EKEYREJECTED; + goto out; + } + + p = data; + if (memcmp(p, root_hash.data, root_hash.len) != 0) { + pr_debug("Root hash mismatch.\n"); + err = -EKEYREJECTED; + goto out; + } + p += root_hash.len; + if (memcmp(p, add_data.data, add_data.len) != 0) { + pr_debug("Additional data mismatch.\n"); + err = -EKEYREJECTED; + goto out; + } + + err = pkcs7_verify(pkcs7, VERIFYING_UNSPECIFIED_SIGNATURE); + if (err) + pr_debug("PKCS#7 signature verification error: %d\n", -err); + + /* + * RSA signature verification sometimes returns unexpected error codes + * when signature doesn't match. + */ + if (err == -ERANGE || err == -EINVAL) + err = -EBADMSG; + +out: + pkcs7_free_message(pkcs7); + return err; +} + struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) { static struct incfs_hash_alg sha256 = { @@ -49,90 +113,11 @@ struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) return result; } -struct signature_info { - u32 version; - enum incfs_hash_tree_algorithm hash_algorithm; - u8 log2_blocksize; - struct mem_range salt; - struct mem_range root_hash; -}; - -static bool read_u32(u8 **p, u8 *top, u32 *result) -{ - if (*p + sizeof(u32) > top) - return false; - - *result = le32_to_cpu(*(__le32 *)*p); - *p += sizeof(u32); - return true; -} - -static bool read_u8(u8 **p, u8 *top, u8 *result) -{ - if (*p + sizeof(u8) > top) - return false; - - *result = *(u8 *)*p; - *p += sizeof(u8); - return true; -} - -static bool read_mem_range(u8 **p, u8 *top, struct mem_range *range) -{ - u32 len; - - if (!read_u32(p, top, &len) || *p + len > top) - return false; - - range->len = len; - range->data = *p; - *p += len; - return true; -} -static int incfs_parse_signature(struct mem_range signature, - struct signature_info *si) +struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, + int data_block_count, + struct mem_range root_hash) { - u8 *p = signature.data; - u8 *top = signature.data + signature.len; - u32 hash_section_size; - - if (signature.len > INCFS_MAX_SIGNATURE_SIZE) - return -EINVAL; - - if (!read_u32(&p, top, &si->version) || - si->version != INCFS_SIGNATURE_VERSION) - return -EINVAL; - - if (!read_u32(&p, top, &hash_section_size) || - p + hash_section_size > top) - return -EINVAL; - top = p + hash_section_size; - - if (!read_u32(&p, top, &si->hash_algorithm) || - si->hash_algorithm != INCFS_HASH_TREE_SHA256) - return -EINVAL; - - if (!read_u8(&p, top, &si->log2_blocksize) || si->log2_blocksize != 12) - return -EINVAL; - - if (!read_mem_range(&p, top, &si->salt)) - return -EINVAL; - - if (!read_mem_range(&p, top, &si->root_hash)) - return -EINVAL; - - if (p != top) - return -EINVAL; - - return 0; -} - -struct mtree *incfs_alloc_mtree(struct mem_range signature, - int data_block_count) -{ - int error; - struct signature_info si; struct mtree *result = NULL; struct incfs_hash_alg *hash_alg = NULL; int hash_per_block; @@ -144,15 +129,11 @@ struct mtree *incfs_alloc_mtree(struct mem_range signature, if (data_block_count <= 0) return ERR_PTR(-EINVAL); - error = incfs_parse_signature(signature, &si); - if (error) - return ERR_PTR(error); - - hash_alg = incfs_get_hash_alg(si.hash_algorithm); + hash_alg = incfs_get_hash_alg(id); if (IS_ERR(hash_alg)) return ERR_PTR(PTR_ERR(hash_alg)); - if (si.root_hash.len < hash_alg->digest_size) + if (root_hash.len < hash_alg->digest_size) return ERR_PTR(-EINVAL); result = kzalloc(sizeof(*result), GFP_NOFS); @@ -192,7 +173,7 @@ struct mtree *incfs_alloc_mtree(struct mem_range signature, } /* Root hash is stored separately from the rest of the tree. */ - memcpy(result->root_hash, si.root_hash.data, hash_alg->digest_size); + memcpy(result->root_hash, root_hash.data, hash_alg->digest_size); return result; err: @@ -217,20 +198,16 @@ int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, return -EINVAL; desc->tfm = alg->shash; - - if (data.len < INCFS_DATA_FILE_BLOCK_SIZE) { - int err; - void *buf = kzalloc(INCFS_DATA_FILE_BLOCK_SIZE, GFP_NOFS); - - if (!buf) - return -ENOMEM; - - memcpy(buf, data.data, data.len); - err = crypto_shash_digest(desc, buf, INCFS_DATA_FILE_BLOCK_SIZE, - digest.data); - kfree(buf); - return err; - } return crypto_shash_digest(desc, data.data, data.len, digest.data); } +void incfs_free_signature_info(struct signature_info *si) +{ + if (!si) + return; + kfree(si->root_hash.data); + kfree(si->additional_data.data); + kfree(si->signature.data); + kfree(si); +} + diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h index cf79b64da736..da1c38486b2f 100644 --- a/fs/incfs/integrity.h +++ b/fs/incfs/integrity.h @@ -38,10 +38,21 @@ struct mtree { int depth; }; +struct signature_info { + struct mem_range root_hash; + + struct mem_range additional_data; + + struct mem_range signature; + + enum incfs_hash_tree_algorithm hash_alg; +}; + struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); -struct mtree *incfs_alloc_mtree(struct mem_range signature, - int data_block_count); +struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, + int data_block_count, + struct mem_range root_hash); void incfs_free_mtree(struct mtree *tree); @@ -53,4 +64,9 @@ size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, struct mem_range digest); +int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, + struct mem_range root_hash, struct mem_range add_data); + +void incfs_free_signature_info(struct signature_info *si); + #endif /* _INCFS_INTEGRITY_H */ diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index d2c82047dd7c..aebd2b02bd83 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -52,6 +52,8 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, static int file_open(struct inode *inode, struct file *file); static int file_release(struct inode *inode, struct file *file); +static ssize_t file_write(struct file *f, const char __user *buf, + size_t size, loff_t *offset); static int read_single_page(struct file *f, struct page *page); static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); @@ -71,11 +73,8 @@ static struct inode *alloc_inode(struct super_block *sb); static void free_inode(struct inode *inode); static void evict_inode(struct inode *inode); -static int incfs_setattr(struct dentry *dentry, struct iattr *ia); static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size); -static ssize_t incfs_setxattr(struct dentry *d, const char *name, - const void *value, size_t size, int flags); static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); static int show_options(struct seq_file *, struct dentry *); @@ -102,8 +101,7 @@ static const struct inode_operations incfs_dir_inode_ops = { .rename = dir_rename_wrap, .unlink = dir_unlink, .link = dir_link, - .rmdir = dir_rmdir, - .setattr = incfs_setattr, + .rmdir = dir_rmdir }; static const struct file_operations incfs_dir_fops = { @@ -129,6 +127,7 @@ static const struct address_space_operations incfs_address_space_ops = { static const struct file_operations incfs_file_ops = { .open = file_open, .release = file_release, + .write = file_write, .read_iter = generic_file_read_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, @@ -137,11 +136,6 @@ static const struct file_operations incfs_file_ops = { .compat_ioctl = dispatch_ioctl }; -enum FILL_PERMISSION { - CANT_FILL = 0, - CAN_FILL = 1, -}; - static const struct file_operations incfs_pending_read_file_ops = { .read = pending_reads_read, .poll = pending_reads_poll, @@ -163,7 +157,7 @@ static const struct file_operations incfs_log_file_ops = { }; static const struct inode_operations incfs_file_inode_ops = { - .setattr = incfs_setattr, + .setattr = simple_setattr, .getattr = simple_getattr, .listxattr = incfs_listxattr }; @@ -175,18 +169,9 @@ static int incfs_handler_getxattr(const struct xattr_handler *xh, return incfs_getxattr(d, name, buffer, size); } -static int incfs_handler_setxattr(const struct xattr_handler *xh, - struct dentry *d, struct inode *inode, - const char *name, const void *buffer, - size_t size, int flags) -{ - return incfs_setxattr(d, name, buffer, size, flags); -} - static const struct xattr_handler incfs_xattr_handler = { .prefix = "", /* AKA all attributes */ .get = incfs_handler_getxattr, - .set = incfs_handler_setxattr, }; static const struct xattr_handler *incfs_xattr_ops[] = { @@ -351,8 +336,8 @@ static int inode_test(struct inode *inode, void *opaque) return (node->n_backing_inode == backing_inode) && inode->i_ino == search->ino; - } else - return inode->i_ino == search->ino; + } + return 1; } static int inode_set(struct inode *inode, void *opaque) @@ -374,7 +359,6 @@ static int inode_set(struct inode *inode, void *opaque) inode->i_mapping->a_ops = &incfs_address_space_ops; inode->i_op = &incfs_file_inode_ops; inode->i_fop = &incfs_file_ops; - inode->i_mode &= ~0222; } else if (S_ISDIR(inode->i_mode)) { inode->i_size = 0; inode->i_blocks = 1; @@ -470,6 +454,9 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, ssize_t result = 0; int i = 0; + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) return 0; @@ -587,27 +574,22 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len, { struct log_file_state *log_state = f->private_data; struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = + (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); + size_t reads_to_collect = len / sizeof(*reads_buf); + size_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); int total_reads_collected = 0; - int rl_size; ssize_t result = 0; - struct incfs_pending_read_info *reads_buf; - ssize_t reads_to_collect = len / sizeof(*reads_buf); - ssize_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); - rl_size = READ_ONCE(mi->mi_log.rl_size); - if (rl_size == 0) - return 0; - - reads_buf = (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); if (!reads_buf) return -ENOMEM; - reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); + reads_to_collect = min_t(size_t, mi->mi_log.rl_size, reads_to_collect); while (reads_to_collect > 0) { struct read_log_state next_state = READ_ONCE(log_state->state); int reads_collected = incfs_collect_logged_reads( mi, &next_state, reads_buf, - min_t(ssize_t, reads_to_collect, reads_per_page)); + min_t(size_t, reads_to_collect, reads_per_page)); if (reads_collected <= 0) { result = total_reads_collected ? total_reads_collected * @@ -646,7 +628,7 @@ static __poll_t log_poll(struct file *file, poll_table *wait) __poll_t ret = 0; poll_wait(file, &mi->mi_log.ml_notif_wq, wait); - count = incfs_get_uncollected_logs_count(mi, &log_state->state); + count = incfs_get_uncollected_logs_count(mi, log_state->state); if (count >= mi->mi_options.read_log_wakeup_count) ret = EPOLLIN | EPOLLRDNORM; @@ -807,6 +789,9 @@ static int read_single_page(struct file *f, struct page *page) size = df->df_size; timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; + pr_debug("incfs: %s %s %lld\n", __func__, + f->f_path.dentry->d_name.name, offset); + if (offset < size) { struct mem_range tmp = { .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE @@ -853,39 +838,107 @@ static char *file_id_to_str(incfs_uuid_t id) return result; } -static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, - u64 size) +static struct signature_info *incfs_copy_signature_info_from_user( + struct incfs_file_signature_info __user *original) { - u8 *result; + struct incfs_file_signature_info usr_si; + struct signature_info *result; + int error; if (!original) - return range(NULL, 0); + return NULL; + + if (!access_ok(VERIFY_READ, original, sizeof(usr_si))) + return ERR_PTR(-EFAULT); - if (size > INCFS_MAX_SIGNATURE_SIZE) - return range(ERR_PTR(-EFAULT), 0); + if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) + return ERR_PTR(-EFAULT); - result = kzalloc(size, GFP_NOFS | __GFP_COMP); + result = kzalloc(sizeof(*result), GFP_NOFS); if (!result) - return range(ERR_PTR(-ENOMEM), 0); + return ERR_PTR(-ENOMEM); + + result->hash_alg = usr_si.hash_tree_alg; + + if (result->hash_alg) { + void *p = kzalloc(INCFS_MAX_HASH_SIZE, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + + /* TODO this sets the root_hash length to MAX_HASH_SIZE not + * the actual size. Fix, then set INCFS_MAX_HASH_SIZE back + * to 64 + */ + result->root_hash = range(p, INCFS_MAX_HASH_SIZE); + if (copy_from_user(p, u64_to_user_ptr(usr_si.root_hash), + result->root_hash.len) > 0) { + error = -EFAULT; + goto err; + } + } - if (copy_from_user(result, original, size)) { - kfree(result); - return range(ERR_PTR(-EFAULT), 0); + if (usr_si.additional_data_size > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto err; } - return range(result, size); + if (usr_si.additional_data && usr_si.additional_data_size) { + void *p = kzalloc(usr_si.additional_data_size, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + result->additional_data = range(p, + usr_si.additional_data_size); + if (copy_from_user(p, u64_to_user_ptr(usr_si.additional_data), + result->additional_data.len) > 0) { + error = -EFAULT; + goto err; + } + } + + if (usr_si.signature_size > INCFS_MAX_SIGNATURE_SIZE) { + error = -E2BIG; + goto err; + } + + if (usr_si.signature && usr_si.signature_size) { + void *p = kzalloc(usr_si.signature_size, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + result->signature = range(p, usr_si.signature_size); + if (copy_from_user(p, u64_to_user_ptr(usr_si.signature), + result->signature.len) > 0) { + error = -EFAULT; + goto err; + } + } + + return result; + +err: + incfs_free_signature_info(result); + return ERR_PTR(-error); } static int init_new_file(struct mount_info *mi, struct dentry *dentry, - incfs_uuid_t *uuid, u64 size, struct mem_range attr, - u8 __user *user_signature_info, u64 signature_size) + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + struct incfs_file_signature_info __user *fsi) { struct path path = {}; struct file *new_file; int error = 0; struct backing_file_context *bfc = NULL; u32 block_count; - struct mem_range raw_signature = { NULL }; + struct mem_range mem_range = {NULL}; + struct signature_info *si = NULL; struct mtree *hash_tree = NULL; if (!mi || !dentry || !uuid) @@ -896,8 +949,7 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, .mnt = mi->mi_backing_dir_path.mnt, .dentry = dentry }; - new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, - mi->mi_owner); + new_file = dentry_open(&path, O_RDWR | O_NOATIME, mi->mi_owner); if (IS_ERR(new_file)) { error = PTR_ERR(new_file); @@ -905,7 +957,6 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, } bfc = incfs_alloc_bfc(new_file); - fput(new_file); if (IS_ERR(bfc)) { error = PTR_ERR(bfc); bfc = NULL; @@ -917,6 +968,19 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, if (error) goto out; + block_count = (u32)get_blocks_count_for_size(size); + error = incfs_write_blockmap_to_backing_file(bfc, block_count, NULL); + if (error) + goto out; + + /* This fill has data, reserve space for the block map. */ + if (block_count > 0) { + error = incfs_write_blockmap_to_backing_file( + bfc, block_count, NULL); + if (error) + goto out; + } + if (attr.data && attr.len) { error = incfs_write_file_attr_to_backing_file(bfc, attr, NULL); @@ -924,46 +988,54 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, goto out; } - block_count = (u32)get_blocks_count_for_size(size); - - if (user_signature_info) { - raw_signature = incfs_copy_signature_info_from_user( - user_signature_info, signature_size); + if (fsi) { + si = incfs_copy_signature_info_from_user(fsi); - if (IS_ERR(raw_signature.data)) { - error = PTR_ERR(raw_signature.data); - raw_signature.data = NULL; + if (IS_ERR(si)) { + error = PTR_ERR(si); + si = NULL; goto out; } - hash_tree = incfs_alloc_mtree(raw_signature, block_count); - if (IS_ERR(hash_tree)) { - error = PTR_ERR(hash_tree); - hash_tree = NULL; - goto out; - } + if (si->hash_alg) { + hash_tree = incfs_alloc_mtree(si->hash_alg, block_count, + si->root_hash); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } - error = incfs_write_signature_to_backing_file( - bfc, raw_signature, hash_tree->hash_tree_area_size); - if (error) - goto out; + /* TODO This code seems wrong when len is zero - we + * should error out?? + */ + if (si->signature.len > 0) + error = incfs_validate_pkcs7_signature( + si->signature, + si->root_hash, + si->additional_data); + if (error) + goto out; - block_count += get_blocks_count_for_size( - hash_tree->hash_tree_area_size); - } + error = incfs_write_signature_to_backing_file(bfc, + si->hash_alg, + hash_tree->hash_tree_area_size, + si->root_hash, si->additional_data, + si->signature); - if (block_count) - error = incfs_write_blockmap_to_backing_file(bfc, block_count); + if (error) + goto out; + } + } - if (error) - goto out; out: if (bfc) { mutex_unlock(&bfc->bc_mutex); incfs_free_bfc(bfc); } incfs_free_mtree(hash_tree); - kfree(raw_signature.data); + incfs_free_signature_info(si); + kfree(mem_range.data); if (error) pr_debug("incfs: %s error: %d\n", __func__, error); @@ -1108,7 +1180,10 @@ static long ioctl_create_file(struct mount_info *mi, error = -EFAULT; goto out; } - + if (!access_ok(VERIFY_READ, usr_args, sizeof(args))) { + error = -EFAULT; + goto out; + } if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { error = -EFAULT; goto out; @@ -1221,7 +1296,7 @@ static long ioctl_create_file(struct mount_info *mi, goto delete_index_file; } - /* Save the file's attribute as an xattr */ + /* Save the file's attrubute as an xattr */ if (args.file_attr_len && args.file_attr) { if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { error = -E2BIG; @@ -1234,6 +1309,12 @@ static long ioctl_create_file(struct mount_info *mi, goto delete_index_file; } + if (!access_ok(VERIFY_READ, u64_to_user_ptr(args.file_attr), + args.file_attr_len)) { + error = -EFAULT; + goto delete_index_file; + } + if (copy_from_user(attr_value, u64_to_user_ptr(args.file_attr), args.file_attr_len) > 0) { @@ -1252,9 +1333,9 @@ static long ioctl_create_file(struct mount_info *mi, /* Initializing a newly created file. */ error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, - range(attr_value, args.file_attr_len), - (u8 __user *)args.signature_info, - args.signature_size); + range(attr_value, args.file_attr_len), + (struct incfs_file_signature_info __user *) + args.signature_info); if (error) goto delete_index_file; @@ -1282,123 +1363,6 @@ static long ioctl_create_file(struct mount_info *mi, return error; } -static long ioctl_fill_blocks(struct file *f, void __user *arg) -{ - struct incfs_fill_blocks __user *usr_fill_blocks = arg; - struct incfs_fill_blocks fill_blocks; - struct incfs_fill_block __user *usr_fill_block_array; - struct data_file *df = get_incfs_data_file(f); - const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; - u8 *data_buf = NULL; - ssize_t error = 0; - int i = 0; - - if (!df) - return -EBADF; - - if ((uintptr_t)f->private_data != CAN_FILL) - return -EPERM; - - if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) - return -EFAULT; - - usr_fill_block_array = u64_to_user_ptr(fill_blocks.fill_blocks); - data_buf = (u8 *)__get_free_pages(GFP_NOFS | __GFP_COMP, - get_order(data_buf_size)); - if (!data_buf) - return -ENOMEM; - - for (i = 0; i < fill_blocks.count; i++) { - struct incfs_fill_block fill_block = {}; - - if (copy_from_user(&fill_block, &usr_fill_block_array[i], - sizeof(fill_block)) > 0) { - error = -EFAULT; - break; - } - - if (fill_block.data_len > data_buf_size) { - error = -E2BIG; - break; - } - - if (copy_from_user(data_buf, u64_to_user_ptr(fill_block.data), - fill_block.data_len) > 0) { - error = -EFAULT; - break; - } - fill_block.data = 0; /* To make sure nobody uses it. */ - if (fill_block.flags & INCFS_BLOCK_FLAGS_HASH) { - error = incfs_process_new_hash_block(df, &fill_block, - data_buf); - } else { - error = incfs_process_new_data_block(df, &fill_block, - data_buf); - } - if (error) - break; - } - - if (data_buf) - free_pages((unsigned long)data_buf, get_order(data_buf_size)); - - /* - * Only report the error if no records were processed, otherwise - * just return how many were processed successfully. - */ - if (i == 0) - return error; - - return i; -} - -static long ioctl_permit_fill(struct file *f, void __user *arg) -{ - struct incfs_permit_fill __user *usr_permit_fill = arg; - struct incfs_permit_fill permit_fill; - long error = 0; - struct file *file = NULL; - - if (f->f_op != &incfs_pending_read_file_ops) - return -EPERM; - - if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) - return -EFAULT; - - file = fget(permit_fill.file_descriptor); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (file->f_op != &incfs_file_ops) { - error = -EPERM; - goto out; - } - - if (file->f_inode->i_sb != f->f_inode->i_sb) { - error = -EPERM; - goto out; - } - - switch ((uintptr_t)file->private_data) { - case CANT_FILL: - file->private_data = (void *)CAN_FILL; - break; - - case CAN_FILL: - pr_debug("CAN_FILL already set"); - break; - - default: - pr_warn("Invalid file private data"); - error = -EFAULT; - goto out; - } - -out: - fput(file); - return error; -} - static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1412,14 +1376,20 @@ static long ioctl_read_file_signature(struct file *f, void __user *arg) if (!df) return -EINVAL; + if (!access_ok(VERIFY_READ, args_usr_ptr, sizeof(args))) + return -EFAULT; if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) return -EINVAL; + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(args.file_signature), + args.file_signature_buf_size)) + return -EFAULT; + sig_buf_size = args.file_signature_buf_size; if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) return -E2BIG; - sig_buffer = kzalloc(sig_buf_size, GFP_NOFS | __GFP_COMP); + sig_buffer = kzalloc(sig_buf_size, GFP_NOFS); if (!sig_buffer) return -ENOMEM; @@ -1447,30 +1417,6 @@ static long ioctl_read_file_signature(struct file *f, void __user *arg) return error; } -static long ioctl_get_filled_blocks(struct file *f, void __user *arg) -{ - struct incfs_get_filled_blocks_args __user *args_usr_ptr = arg; - struct incfs_get_filled_blocks_args args = {}; - struct data_file *df = get_incfs_data_file(f); - int error; - - if (!df) - return -EINVAL; - - if ((uintptr_t)f->private_data != CAN_FILL) - return -EPERM; - - if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) - return -EINVAL; - - error = incfs_get_filled_blocks(df, &args); - - if (copy_to_user(args_usr_ptr, &args, sizeof(args))) - return -EFAULT; - - return error; -} - static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { struct mount_info *mi = get_mount_info(file_superblock(f)); @@ -1478,14 +1424,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) switch (req) { case INCFS_IOC_CREATE_FILE: return ioctl_create_file(mi, (void __user *)arg); - case INCFS_IOC_FILL_BLOCKS: - return ioctl_fill_blocks(f, (void __user *)arg); - case INCFS_IOC_PERMIT_FILL: - return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); - case INCFS_IOC_GET_FILLED_BLOCKS: - return ioctl_get_filled_blocks(f, (void __user *)arg); default: return -EINVAL; } @@ -1692,7 +1632,6 @@ static int final_file_delete(struct mount_info *mi, if (d_really_is_positive(index_file_dentry)) error = incfs_unlink(index_file_dentry); out: - dput(index_file_dentry); if (error) pr_debug("incfs: delete_file_from_index err:%d\n", error); return error; @@ -1905,8 +1844,8 @@ static int file_open(struct inode *inode, struct file *file) int err = 0; get_incfs_backing_path(file->f_path.dentry, &backing_path); - backing_file = dentry_open( - &backing_path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); + backing_file = dentry_open(&backing_path, O_RDWR | O_NOATIME, + mi->mi_owner); path_put(&backing_path); if (IS_ERR(backing_file)) { @@ -1915,10 +1854,9 @@ static int file_open(struct inode *inode, struct file *file) goto out; } - if (S_ISREG(inode->i_mode)) { + if (S_ISREG(inode->i_mode)) err = make_inode_ready_for_data_ops(mi, inode, backing_file); - file->private_data = (void *)CANT_FILL; - } else if (S_ISDIR(inode->i_mode)) { + else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = NULL; dir = incfs_open_dir_file(mi, backing_file); @@ -1953,6 +1891,77 @@ static int file_release(struct inode *inode, struct file *file) return 0; } +static ssize_t file_write(struct file *f, const char __user *buf, + size_t size, loff_t *offset) +{ + struct data_file *df = get_incfs_data_file(f); + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_count = size / sizeof(struct incfs_new_data_block); + struct incfs_new_data_block __user *usr_blocks = + (struct incfs_new_data_block __user *)buf; + u8 *data_buf = NULL; + ssize_t error = 0; + int i = 0; + + if (!df) + return -EBADF; + + if (!access_ok(VERIFY_READ, usr_blocks, size)) + return -EFAULT; + + data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + if (!data_buf) + return -ENOMEM; + + for (i = 0; i < block_count; i++) { + struct incfs_new_data_block block = {}; + + if (copy_from_user(&block, &usr_blocks[i], sizeof(block)) > 0) { + error = -EFAULT; + break; + } + + if (block.data_len > data_buf_size) { + error = -E2BIG; + break; + } + if (!access_ok(VERIFY_READ, u64_to_user_ptr(block.data), + block.data_len)) { + error = -EFAULT; + break; + } + if (copy_from_user(data_buf, u64_to_user_ptr(block.data), + block.data_len) > 0) { + error = -EFAULT; + break; + } + block.data = 0; /* To make sure nobody uses it. */ + if (block.flags & INCFS_BLOCK_FLAGS_HASH) { + error = incfs_process_new_hash_block(df, &block, + data_buf); + } else { + error = incfs_process_new_data_block(df, &block, + data_buf); + } + if (error) + break; + } + + if (data_buf) + free_pages((unsigned long)data_buf, get_order(data_buf_size)); + *offset = 0; + + /* + * Only report the error if no records were processed, otherwise + * just return how many were processed successfully. + */ + if (i == 0) + return error; + + return i * sizeof(struct incfs_new_data_block); +} + + static int dentry_revalidate(struct dentry *d, unsigned int flags) { struct path backing_path = {}; @@ -1995,7 +2004,6 @@ static void dentry_release(struct dentry *d) if (di) path_put(&di->backing_path); - kfree(d->d_fsdata); d->d_fsdata = NULL; } @@ -2036,117 +2044,15 @@ static void evict_inode(struct inode *inode) clear_inode(inode); } -static int incfs_setattr(struct dentry *dentry, struct iattr *ia) -{ - struct dentry_info *di = get_incfs_dentry(dentry); - struct dentry *backing_dentry; - struct inode *backing_inode; - int error; - - if (ia->ia_valid & ATTR_SIZE) - return -EINVAL; - - if (!di) - return -EINVAL; - backing_dentry = di->backing_path.dentry; - if (!backing_dentry) - return -EINVAL; - - backing_inode = d_inode(backing_dentry); - - /* incfs files are readonly, but the backing files must be writeable */ - if (S_ISREG(backing_inode->i_mode)) { - if ((ia->ia_valid & ATTR_MODE) && (ia->ia_mode & 0222)) - return -EINVAL; - - ia->ia_mode |= 0222; - } - - inode_lock(d_inode(backing_dentry)); - error = notify_change(backing_dentry, ia, NULL); - inode_unlock(d_inode(backing_dentry)); - - if (error) - return error; - - if (S_ISREG(backing_inode->i_mode)) - ia->ia_mode &= ~0222; - - return simple_setattr(dentry, ia); -} - static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size) { struct dentry_info *di = get_incfs_dentry(d); - struct mount_info *mi = get_mount_info(d->d_sb); - char *stored_value; - size_t stored_size; - - if (di && di->backing_path.dentry) - return vfs_getxattr(di->backing_path.dentry, name, value, size); - - if (strcmp(name, "security.selinux")) - return -ENODATA; - - if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { - stored_value = mi->pending_read_xattr; - stored_size = mi->pending_read_xattr_size; - } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { - stored_value = mi->log_xattr; - stored_size = mi->log_xattr_size; - } else { - return -ENODATA; - } - - if (!stored_value) - return -ENODATA; - - if (stored_size > size) - return -E2BIG; - - memcpy(value, stored_value, stored_size); - return stored_size; - -} - -static ssize_t incfs_setxattr(struct dentry *d, const char *name, - const void *value, size_t size, int flags) -{ - struct dentry_info *di = get_incfs_dentry(d); - struct mount_info *mi = get_mount_info(d->d_sb); - void **stored_value; - size_t *stored_size; - - if (di && di->backing_path.dentry) - return vfs_setxattr(di->backing_path.dentry, name, value, size, - flags); - - if (strcmp(name, "security.selinux")) - return -ENODATA; - - if (size > INCFS_MAX_FILE_ATTR_SIZE) - return -E2BIG; - - if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { - stored_value = &mi->pending_read_xattr; - stored_size = &mi->pending_read_xattr_size; - } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { - stored_value = &mi->log_xattr; - stored_size = &mi->log_xattr_size; - } else { + if (!di || !di->backing_path.dentry) return -ENODATA; - } - - kfree (*stored_value); - *stored_value = kzalloc(size, GFP_NOFS); - if (!*stored_value) - return -ENOMEM; - memcpy(*stored_value, value, size); - *stored_size = size; - return 0; + return vfs_getxattr(di->backing_path.dentry, name, value, size); } static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) @@ -2246,7 +2152,7 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, path_put(&backing_dir_path); sb->s_flags |= SB_ACTIVE; - pr_debug("incfs: mount\n"); + pr_debug("infs: mount\n"); return dget(sb->s_root); err: sb->s_fs_info = NULL; @@ -2267,11 +2173,12 @@ static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) if (err) return err; - err = incfs_realloc_mount_info(mi, &options); - if (err) - return err; + if (mi->mi_options.read_timeout_ms != options.read_timeout_ms) { + mi->mi_options.read_timeout_ms = options.read_timeout_ms; + pr_debug("incfs: new timeout_ms=%d", options.read_timeout_ms); + } - pr_debug("incfs: remount\n"); + pr_debug("infs: remount\n"); return 0; } @@ -2279,7 +2186,7 @@ void incfs_kill_sb(struct super_block *sb) { struct mount_info *mi = sb->s_fs_info; - pr_debug("incfs: unmount\n"); + pr_debug("infs: unmount\n"); incfs_free_mount_info(mi); generic_shutdown_super(sb); } diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 13c3d5173e14..787049031cca 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -35,8 +35,6 @@ #define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") #define INCFS_MAX_SIGNATURE_SIZE 8096 -#define INCFS_SIGNATURE_VERSION 2 -#define INCFS_SIGNATURE_SECTIONS 2 #define INCFS_IOCTL_BASE_CODE 'g' @@ -48,49 +46,7 @@ /* Read file signature */ #define INCFS_IOC_READ_FILE_SIGNATURE \ - _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) - -/* - * Fill in one or more data block. This may only be called on a handle - * passed as a parameter to INCFS_IOC_PERMIT_FILLING - * - * Returns number of blocks filled in, or error if none were - */ -#define INCFS_IOC_FILL_BLOCKS \ - _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) - -/* - * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor - * May only be called on .pending_reads file - * - * Returns 0 on success or error - */ -#define INCFS_IOC_PERMIT_FILL \ - _IOW(INCFS_IOCTL_BASE_CODE, 33, struct incfs_permit_fill) - -/* - * Fills buffer with ranges of populated blocks - * - * Returns 0 if all ranges written - * error otherwise - * - * Either way, range_buffer_size_out is set to the number - * of bytes written. Should be set to 0 by caller. The ranges - * filled are valid, but if an error was returned there might - * be more ranges to come. - * - * Ranges are ranges of filled blocks: - * - * 1 2 7 9 - * - * means blocks 1, 2, 7, 8, 9 are filled, 0, 3, 4, 5, 6 and 10 on - * are not - * - * If hashing is enabled for the file, the hash blocks are simply - * treated as though they immediately followed the data blocks. - */ -#define INCFS_IOC_GET_FILLED_BLOCKS \ - _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) + _IOWR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) enum incfs_compression_alg { COMPRESSION_NONE = 0, @@ -125,9 +81,10 @@ struct incfs_pending_read_info { }; /* - * Description of a data or hash block to add to a data file. + * A struct to be written into a control file to load a data or hash + * block to a data file. */ -struct incfs_fill_block { +struct incfs_new_data_block { /* Index of a data block. */ __u32 block_index; @@ -150,42 +107,63 @@ struct incfs_fill_block { /* Values from enum incfs_block_flags */ __u8 flags; + /* Reserved - must be 0 */ __u16 reserved1; + /* Reserved - must be 0 */ __u32 reserved2; + /* Reserved - must be 0 */ __aligned_u64 reserved3; }; -/* - * Description of a number of blocks to add to a data file - * - * Argument for INCFS_IOC_FILL_BLOCKS - */ -struct incfs_fill_blocks { - /* Number of blocks */ - __u64 count; - - /* A pointer to an array of incfs_fill_block structs */ - __aligned_u64 fill_blocks; -}; - -/* - * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor - * May only be called on .pending_reads file - * - * Argument for INCFS_IOC_PERMIT_FILL - */ -struct incfs_permit_fill { - /* File to permit fills on */ - __u32 file_descriptor; -}; - enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_NONE = 0, INCFS_HASH_TREE_SHA256 = 1 }; +struct incfs_file_signature_info { + /* + * A pointer to file's root hash (if determined != 0) + * Actual hash size determined by hash_tree_alg. + * Size of the buffer should be at least INCFS_MAX_HASH_SIZE + * + * Equivalent to: u8 *root_hash; + */ + __aligned_u64 root_hash; + + /* + * A pointer to additional data that was attached to the root hash + * before signing. + * + * Equivalent to: u8 *additional_data; + */ + __aligned_u64 additional_data; + + /* Size of additional data. */ + __u32 additional_data_size; + + /* Reserved - must be 0 */ + __u32 reserved1; + + /* + * A pointer to pkcs7 signature DER blob. + * + * Equivalent to: u8 *signature; + */ + __aligned_u64 signature; + + + /* Size of pkcs7 signature DER blob */ + __u32 signature_size; + + /* Reserved - must be 0 */ + __u32 reserved2; + + /* Value from incfs_hash_tree_algorithm */ + __u8 hash_tree_alg; +}; + /* * Create a new file or directory. */ @@ -203,8 +181,10 @@ struct incfs_new_file_args { */ __u16 mode; + /* Reserved - must be 0 */ __u16 reserved1; + /* Reserved - must be 0 */ __u32 reserved2; /* @@ -237,33 +217,16 @@ struct incfs_new_file_args { */ __u32 file_attr_len; + /* Reserved - must be 0 */ __u32 reserved4; - /* - * Points to an APK V4 Signature data blob - * Signature must have two sections - * Format is: - * u32 version - * u32 size_of_hash_info_section - * u8 hash_info_section[] - * u32 size_of_signing_info_section - * u8 signing_info_section[] - * - * Note that incfs does not care about what is in signing_info_section - * - * hash_info_section has following format: - * u32 hash_algorithm; // Must be SHA256 == 1 - * u8 log2_blocksize; // Must be 12 for 4096 byte blocks - * u32 salt_size; - * u8 salt[]; - * u32 hash_size; - * u8 root_hash[]; - */ + /* struct incfs_file_signature_info *signature_info; */ __aligned_u64 signature_info; - /* Size of signature_info */ - __aligned_u64 signature_size; + /* Reserved - must be 0 */ + __aligned_u64 reserved5; + /* Reserved - must be 0 */ __aligned_u64 reserved6; }; @@ -289,46 +252,4 @@ struct incfs_get_file_sig_args { __u32 file_signature_len_out; }; -struct incfs_filled_range { - __u32 begin; - __u32 end; -}; - -/* - * Request ranges of filled blocks - * Argument for INCFS_IOC_GET_FILLED_BLOCKS - */ -struct incfs_get_filled_blocks_args { - /* - * A buffer to populate with ranges of filled blocks - * - * Equivalent to struct incfs_filled_ranges *range_buffer - */ - __aligned_u64 range_buffer; - - /* Size of range_buffer */ - __u32 range_buffer_size; - - /* Start index to read from */ - __u32 start_index; - - /* - * End index to read to. 0 means read to end. This is a range, - * so incfs will read from start_index to end_index - 1 - */ - __u32 end_index; - - /* Actual number of blocks in file */ - __u32 total_blocks_out; - - /* The number of data blocks in file */ - __u32 data_blocks_out; - - /* Number of bytes written to range buffer */ - __u32 range_buffer_size_out; - - /* Sector scanned up to, if the call was interrupted */ - __u32 index_out; -}; - #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 5b2e627ce883..1f13573d3617 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,11 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -CFLAGS += -I../.. -I../../../../.. +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -lssl -lcrypto -llz4 +CFLAGS += -I../../../../../usr/include/ +CFLAGS += -I../../../../include/uapi/ +CFLAGS += -I../../../../lib -LDLIBS := -llz4 -lcrypto EXTRA_SOURCES := utils.c -TEST_GEN_PROGS := incfs_test +CFLAGS += $(EXTRA_SOURCES) -$(TEST_GEN_PROGS): $(EXTRA_SOURCES) +TEST_GEN_PROGS := incfs_test include ../../lib.mk + +$(OUTPUT)incfs_test: incfs_test.c $(EXTRA_SOURCES) +all: $(OUTPUT)incfs_test + +clean: + rm -rf $(OUTPUT)incfs_test *.o diff --git a/tools/testing/selftests/filesystems/incfs/config b/tools/testing/selftests/filesystems/incfs/config new file mode 100644 index 000000000000..b6749837a318 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/config @@ -0,0 +1 @@ +CONFIG_INCREMENTAL_FS=y \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 6809399eac97..dd70e019dc4c 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2,31 +2,31 @@ /* * Copyright 2018 Google LLC */ -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include - -#include -#include #include +#include +#include +#include +#include +#include #include #include - +#include +#include +#include +#include +#include #include #include -#include +#include "../../kselftest.h" +#include "lz4.h" #include "utils.h" +#define __packed __attribute__((__packed__)) + #define TEST_FAILURE 1 #define TEST_SUCCESS 0 #define INCFS_MAX_MTREE_LEVELS 8 @@ -69,6 +69,101 @@ struct linux_dirent64 { char d_name[0]; } __packed; +/* + * The certificate below and the private key were created by calling: + * openssl req -x509 -newkey rsa:4096 -keyout private.key -out cert.crt + * -days 1000 -sha256 -nodes -outform PEM -subj + * "/C=US/ST=WA/L=Kirkland/O=Example/OU=Org/CN=www.example.com" + */ +char x509_cert[] = +"-----BEGIN CERTIFICATE-----\n" +"MIIFvzCCA6egAwIBAgIUXpwqelEljm6BBllRQGHLrls2MYgwDQYJKoZIhvcNAQEL\n" +"BQAwbzELMAkGA1UEBhMCVVMxEzARBgNVBAgMCldhc2hpbmd0b24xETAPBgNVBAcM\n" +"CEtpcmtsYW5kMRAwDgYDVQQKDAdFeGFtcGxlMQwwCgYDVQQLDANPcmcxGDAWBgNV\n" +"BAMMD3d3dy5leGFtcGxlLmNvbTAeFw0xOTA4MDgyMzA3MDZaFw0yMjA1MDQyMzA3\n" +"MDZaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApXYXNoaW5ndG9uMREwDwYDVQQH\n" +"DAhLaXJrbGFuZDEQMA4GA1UECgwHRXhhbXBsZTEMMAoGA1UECwwDT3JnMRgwFgYD\n" +"VQQDDA93d3cuZXhhbXBsZS5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK\n" +"AoICAQC1LuFW/lDV/GflqFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43\n" +"NeeJtqUoVxSLS9wHURjSjD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtA\n" +"uYcY4P9GHQEXYUX+ue82A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt\n" +"4/NXS/Dn+S0/mJlxw34IKfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RAD\n" +"qGewNNCab3ClJDP7/M32BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolV\n" +"gSL1HM2jin5bi4bpFMreY0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBT\n" +"qjjFb3oiSMugJzY+MhISM754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3\n" +"UgC6SyVmZxG2o+AO6m8TRTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiV\n" +"XDmotNb2myXNYHHTjRYNxkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61S\n" +"oxKWi+LGa7B4NaCMjz1LnaOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAb\n" +"uxkq9EYUDg+w9broltiBf4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABo1MwUTAd\n" +"BgNVHQ4EFgQUo6JN3gY2yGbzOTNj8Al7hNB3rw0wHwYDVR0jBBgwFoAUo6JN3gY2\n" +"yGbzOTNj8Al7hNB3rw0wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n" +"AgEAQb3pJqOzM4whfNVdpEOswd1EApcWNM1ps9iTlEEjDoRv9F7F1PW0uXCIpk3B\n" +"j5JgCmIxAcPnzj42rduRSx421hHMZhbAIWI/JL4ZSF64qlG0YrmJDXlJgSMoyst5\n" +"biUqeWgO7Js5udPt3zhkeA62z3hGM6dE5B3k7gHTaKKtK17+UeR9imZKsOK8GBnM\n" +"rxMPI6XghxxAK2OQ/r09DHDiyf/GxgOE46oknfXfMPx3HaSvDKrZUTZ+UvVbM5c2\n" +"5eXOgH5UO/e4llLknJK7CoP/R6G7pV44iT4t4t9FMnvCYvavAHwfR+6z5vTF3o8a\n" +"wd80fC8z1vfLsIPLROdzBl9rGCvv536fPiEA677CM1AZkjfT0a9DVzrE1NDvuCUF\n" +"0KgEdiNwux+hO6dbTyiS38yPT6TbpoWJptJmFhFkC4hGvUgoX/TI0covSyf74VRH\n" +"k3BHojOBMYiX1K66xoN7fhlGK8cith3L0XXPB8CgSEUPWURvm8RCaGuX2T3FZomF\n" +"BCnNpN+WNnN3Yf4OkjtuvtxxktUU7pfVLsUxrdpo/ph4rWm6U83VT/Zlq92aF4vW\n" +"QJ+7uraQFip7e+Gy9g3UJINm3B7b1C4ch/Z/upCZESOI/23sVGzkfTgOrS+23i6/\n" +"Vi9YW75zySC2FCa1AWMS1NmS5qfDSycJUgD6YvOUg0C54ZI=\n" +"-----END CERTIFICATE-----"; + +char private_key[] = +"-----BEGIN PRIVATE KEY-----\n" +"MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1LuFW/lDV/Gfl\n" +"qFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43NeeJtqUoVxSLS9wHURjS\n" +"jD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtAuYcY4P9GHQEXYUX+ue82\n" +"A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt4/NXS/Dn+S0/mJlxw34I\n" +"KfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RADqGewNNCab3ClJDP7/M32\n" +"BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolVgSL1HM2jin5bi4bpFMre\n" +"Y0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBTqjjFb3oiSMugJzY+MhIS\n" +"M754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3UgC6SyVmZxG2o+AO6m8T\n" +"RTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiVXDmotNb2myXNYHHTjRYN\n" +"xkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61SoxKWi+LGa7B4NaCMjz1L\n" +"naOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAbuxkq9EYUDg+w9broltiB\n" +"f4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABAoICAQCMKul/0J2e/ncub6t2t4dr\n" +"PnTrfCT6xKqPqciny4Ee6hr9So1jR2gvink380bd/mQFMmEdZqGhM3cdpAzLf82f\n" +"hu7BSNxsYIF0er0PB4MZFMJ4sMaXC+zp5/TJnP5MG/zBND0c5k8tQpEyWy8O28Jj\n" +"FKW/0F5P90Q0ncP20EJUS50tXgniOMsU2Prtw/UE6yZDgD0mPxsurMu66ycXSFwM\n" +"WqyfqEeBk7lw/AjR6Sft71W31lTbl+DclG0MN2OIKUPcxiwCRmDFKI36MDgERk1x\n" +"sMPfdrWRLj2ryDFTUuLAWBTOVEGWS0RdRsWWVaJCuHbKd6FLl0TW2xQbOfWDTjYC\n" +"Ps31ejh163qdbk7OGOZIbd83fP3jsyL+4eNzhUpeXMKhfG58mFIv4yhdZIUOpuL6\n" +"aqnoU9z9wEsJKj/SrKr3nw6tuTnmbXgNjun9LfTFmqqDRBYd0Okiprw6jHNM1jgA\n" +"GG0kC/K7r89jKymVDABwGMFCS33ynR1Tb6zG+cqgNMPw19Fy3uQuW21CjqSzCOyP\n" +"aEVCEUZeP+ofql5+7ZKi6Dj+EdTfeKt2ihgheHZZoaYSINb8tsnKbdJhwBfW9PFT\n" +"aT/hu3bnO2FPC8H2NGOqxOEeel9ALU4SFu1pOknEhiL3/mNfOQ+KgrSRDtNRlcL0\n" +"cto05J90u0cmqwWKlshfaQKCAQEA5dcklxs4ezyzt28NcsiyS02oZ+9TkQp6pCXV\n" +"kx7AwhivAmVTlJ+c6BegA5EPd7A1gknM3+EKzGpoBOqmlF45G57phVIAphAp4oCH\n" +"UOVtIQgM8p4EU2gtX+uNOopdYlpBQnWimXaHA2sOD9/yTbZ03j/McRH6D15+iCld\n" +"3880GHdZaYYbQmHoSDg39LRRO1bdS3WC0oKBD2gPi3K0b9RaZSwKzuVrmlvrLURj\n" +"WMZfmkGl4BsITfuoTxbWFVncG3Kb9eYkYUFZy4M2G/s849PS/HjrN7BvgpanjtVp\n" +"1/39APQfAYfUuBPbKYnb6F8dE0pb5cVd4uMZklAeTb3bXjOO9QKCAQEAyc4CxWXr\n" +"bG6Do5dGpWudQ7ucq00MR0T3MHQIu5XTn6BsPHAJ9ZgrQw9C24PXm2VEjjsrMs5T\n" +"rHNF9oeO39s25Za1iyJ+893icqA3h3ivCUOOoVE54BkuJK6REhkXPD5G1ubmxeBz\n" +"MKNehlpd/eSbJJArkzKFZ8sBtLt8i9VFhRnXSpDAbiMpCbjW+bem9MWdLmkenSnu\n" +"OUbnqYcJhFBCvOT7ZCHFCDNUNPfHcaReSY2EYjw0ZqtqAZD0Q+DL+RkLz7l1+/bF\n" +"eEwNjmjFTcwRyawqf38D4miU0H6ca16FkeSlbmM5p3HdwZK2HVYYz3FSwhox6Ebd\n" +"n6in42qfL4Ug6wKCAQAh9IDRWhIkErmyNdPUy1WbzmM8x5ye5t9rdLNywq5TfnYM\n" +"co/AezwhBax8GmgglIWzM9fykzqXLHklkMz/SlRBgl6ZdZ3m6qhlb/uNtfdDU/8l\n" +"sLaO4+sgKpp4tYxKRW8ytFJLPbmAhcZUDg+r73KgiuhXJAK/VoR29TWLJP9bRfaN\n" +"omRQkEpSsQuDOUhu7cxPo5KqKuGKNyNkxJNnmgWowLLwEfCtozrBO0M6EER7c4tf\n" +"6l51tuIMnSEPknD0FSB5WYCyZYcwi7fotlsuhVK8PdjyJzyyHDOw5FJ4uGsyQt55\n" +"yWlhsH1GS7mTQMn42Zlt/pR6OnbCqNdxQMUxy4gpAoIBAFvMbs5E0pb8nr0n72cI\n" +"UP2itl3mKpOw95D+94n9WcrfOt0zShSCKAvVQWCB1O5HXqwklj4CRWXI+iZu+7sx\n" +"CQPfTq3//ygH4x6paxkg+N6J8LPJMz6Rtb/R+QP2je9FlQvk9U1GEKArcLBFI0R/\n" +"XWOAgZHwBWd1nU0NjFY/qeQmIR02Q5LWQ7C8eG4X8MafriSShO6RSGCdtHwVhWq+\n" +"59ztfL3L7skQMFn37K3xS0LCMVpOcLfTeeFEgxjthVvG3OydPOJlGubiEbiaSEZf\n" +"cif/PUXKDYZMdIVzUsw0ryXykJ5qXKuizHFlv5oQtDCJKFBLgjBbLC2YluaIdekz\n" +"8gkCggEBAJWxS7EuB/qL7fOz0o3HRy0plR3qbwZ0pLoCz0Ii7WxraBS1yQwmxif1\n" +"Rgv89GyFqg1yQl3CSrMiw7oC9WxxxuiEZDO18c4KO3NTv9K4itN9OPQVBTHmEhod\n" +"KWcyP4/W/Sfuae77PyclSqUsAARRrKYn2fpLTS5ibaU0QZgHmdPgYDUrPr+6PHKK\n" +"ZfQKU2uBfuo6zoMbMmFi3UYG49j9rv4d6v+44vS1MPHV9JK/LD8YfBhgx8Pg/u6D\n" +"nUgipS48pkGjJr2u2Vu7Mx70vqz0Yf2neyyDbdLtkYauC4w7YKPTD0yzDJyGuAeB\n" +"GyPbW1yZa5vE302a1Cr0Cd7RC4AFAAw=\n" +"-----END PRIVATE KEY-----"; + struct test_files_set get_test_files_set(void) { static struct test_file files[] = { @@ -195,7 +290,7 @@ char *bin2hex(char *dst, const void *src, size_t count) return dst; } -static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) +static char *get_index_filename(char *mnt_dir, incfs_uuid_t id) { char path[FILENAME_MAX]; char str_id[1 + 2 * sizeof(id)]; @@ -206,43 +301,15 @@ static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) return strdup(path); } -int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) +int open_file_by_id(char *mnt_dir, incfs_uuid_t id) { char *path = get_index_filename(mnt_dir, id); - int cmd_fd = open_commands_file(mnt_dir); - int fd = open(path, O_RDWR | O_CLOEXEC); - struct incfs_permit_fill permit_fill = { - .file_descriptor = fd, - }; - int error = 0; + int fd = open(path, O_RDWR); + free(path); if (fd < 0) { print_error("Can't open file by id."); - error = -errno; - goto out; - } - - if (use_ioctl && ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { - print_error("Failed to call PERMIT_FILL"); - error = -errno; - goto out; - } - - if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1 || - errno != EPERM) { - print_error( - "Successfully called PERMIT_FILL on non pending_read file"); return -errno; - goto out; - } - -out: - free(path); - close(cmd_fd); - - if (error) { - close(fd); - return error; } return fd; @@ -276,18 +343,20 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, uint8_t *data_buf = malloc(data_buf_size); uint8_t *current_data = data_buf; uint8_t *data_end = data_buf + data_buf_size; - struct incfs_fill_block *block_buf = - calloc(block_count, sizeof(struct incfs_fill_block)); - struct incfs_fill_blocks fill_blocks = { - .count = block_count, - .fill_blocks = ptr_to_u64(block_buf), - }; + struct incfs_new_data_block *block_buf = + calloc(block_count, sizeof(*block_buf)); ssize_t write_res = 0; - int fd = -1; + int fd; int error = 0; int i = 0; int blocks_written = 0; + fd = open_file_by_id(mnt_dir, file->id); + if (fd <= 0) { + error = -errno; + goto out; + } + for (i = 0; i < block_count; i++) { int block_index = blocks[i]; bool compress = (file->index + block_index) % 2 == 0; @@ -335,33 +404,17 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, block_buf[i].block_index = block_index; block_buf[i].data_len = block_size; block_buf[i].data = ptr_to_u64(current_data); + block_buf[i].compression = + compress ? COMPRESSION_LZ4 : COMPRESSION_NONE; current_data += block_size; } if (!error) { - fd = open_file_by_id(mnt_dir, file->id, false); - if (fd < 0) { - error = -errno; - goto out; - } - write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); - if (write_res >= 0) { - ksft_print_msg("Wrote to file via normal fd error\n"); - error = -EPERM; - goto out; - } - - close(fd); - fd = open_file_by_id(mnt_dir, file->id, true); - if (fd < 0) { - error = -errno; - goto out; - } - write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + write_res = write(fd, block_buf, sizeof(*block_buf) * i); if (write_res < 0) error = -errno; else - blocks_written = write_res; + blocks_written = write_res / sizeof(*block_buf); } if (error) { ksft_print_msg( @@ -446,7 +499,7 @@ static loff_t read_whole_file(char *filename) loff_t bytes_read = 0; uint8_t buff[16 * 1024]; - fd = open(filename, O_RDONLY | O_CLOEXEC); + fd = open(filename, O_RDONLY); if (fd <= 0) return fd; @@ -478,7 +531,7 @@ static int read_test_file(uint8_t *buf, size_t len, char *filename, size_t bytes_to_read = len; off_t offset = ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE; - fd = open(filename, O_RDONLY | O_CLOEXEC); + fd = open(filename, O_RDONLY); if (fd <= 0) return fd; @@ -667,6 +720,8 @@ static int build_mtree(struct test_file *file) int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; int levels_count = 0; + char data_to_sign[256] = {}; + int sig_data_size; int i, level; if (file->size == 0) @@ -693,9 +748,8 @@ static int build_mtree(struct test_file *file) if (block_count == 1) { int seed = get_file_block_seed(file->index, 0); - memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); rnd_buf((uint8_t *)data, file->size, seed); - sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); + sha256(data, file->size, file->root_hash); return 0; } @@ -710,13 +764,11 @@ static int build_mtree(struct test_file *file) int seed = get_file_block_seed(file->index, i); char *hash_ptr = file->mtree[block_index].data + block_off; - if (file->size - offset < block_size) { + if (file->size - offset < block_size) block_size = file->size - offset; - memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); - } rnd_buf((uint8_t *)data, block_size, seed); - sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + sha256(data, block_size, hash_ptr); } /* Build higher levels of hash tree. */ @@ -740,6 +792,19 @@ static int build_mtree(struct test_file *file) sha256(file->mtree[0].data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); + /* Calculating digital signature */ + snprintf(file->sig.add_data, sizeof(file->sig.add_data), "%ld", + file->size); + memcpy(data_to_sign, file->root_hash, SHA256_DIGEST_SIZE); + memcpy(data_to_sign + SHA256_DIGEST_SIZE, file->sig.add_data, + strlen(file->sig.add_data)); + sig_data_size = SHA256_DIGEST_SIZE + strlen(file->sig.add_data); + if (!sign_pkcs7(data_to_sign, sig_data_size, private_key, x509_cert, + &file->sig.data, &file->sig.size)) { + ksft_print_msg("Signing failed.\n"); + return -EINVAL; + } + return 0; } @@ -748,21 +813,21 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) int err; int i; int fd; - struct incfs_fill_blocks fill_blocks = { - .count = file->mtree_block_count, - }; - struct incfs_fill_block *fill_block_array = - calloc(fill_blocks.count, sizeof(struct incfs_fill_block)); - if (fill_blocks.count == 0) + size_t blocks_size = + file->mtree_block_count * sizeof(struct incfs_new_data_block); + struct incfs_new_data_block *blocks = NULL; + char *file_path; + + if (blocks_size == 0) return 0; - if (!fill_block_array) + blocks = malloc(blocks_size); + if (!blocks) return -ENOMEM; - fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); - for (i = 0; i < fill_blocks.count; i++) { - fill_block_array[i] = (struct incfs_fill_block){ + for (i = 0; i < file->mtree_block_count; i++) { + blocks[i] = (struct incfs_new_data_block){ .block_index = i, .data_len = INCFS_DATA_FILE_BLOCK_SIZE, .data = ptr_to_u64(file->mtree[i].data), @@ -770,28 +835,18 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) }; } - fd = open_file_by_id(mount_dir, file->id, false); + file_path = concat_file_name(mount_dir, file->name); + fd = open(file_path, O_RDWR); + free(file_path); if (fd < 0) { err = errno; goto failure; } - err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + err = write(fd, blocks, blocks_size); close(fd); - if (err >= 0) { - err = -EPERM; - goto failure; - } - - fd = open_file_by_id(mount_dir, file->id, true); - if (fd < 0) { - err = errno; - goto failure; - } - err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); - close(fd); - if (err < fill_blocks.count) + if (err < blocks_size) err = errno; else { err = 0; @@ -799,7 +854,7 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) } failure: - free(fill_block_array); + free(blocks); return err; } @@ -911,7 +966,7 @@ static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) int i; /* Test directory iteration */ - int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY | O_CLOEXEC); + int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY); if (fd < 0) { print_error("Can't open directory\n"); @@ -1112,7 +1167,7 @@ static int basic_file_ops_test(char *mount_dir) char *path = concat_file_name(mount_dir, file->name); int fd; - fd = open(path, O_RDWR | O_CLOEXEC); + fd = open(path, O_RDWR); free(path); if (fd <= 0) { print_error("Can't open file"); @@ -1219,6 +1274,13 @@ static int dynamic_files_and_data_test(char *mount_dir) if (i == missing_file_idx) continue; + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + res = emit_test_file_data(mount_dir, file); if (res) { ksft_print_msg("Error %s emiting data for %s.\n", @@ -1417,6 +1479,7 @@ static int work_after_remount_test(char *mount_dir) /* Write first half of the data into the command file. (stage 1) */ for (i = 0; i < file_num_stage1; i++) { struct test_file *file = &test.files[i]; + int res; build_mtree(file); if (emit_file(cmd_fd, NULL, file->name, &file->id, @@ -1425,7 +1488,14 @@ static int work_after_remount_test(char *mount_dir) if (emit_test_file_data(mount_dir, file)) goto failure; - } + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } +} /* Unmount and mount again, to see that data is persistent. */ close(cmd_fd); @@ -1812,6 +1882,162 @@ static int multiple_providers_test(char *mount_dir) return TEST_FAILURE; } +static int signature_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int i = 0; + unsigned char sig_buf[INCFS_MAX_SIGNATURE_SIZE]; + char *backing_dir; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write hashes and data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + + res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.data, file->sig.size, file->sig.add_data); + + if (res) { + ksft_print_msg("Emit failed for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int sig_len; + char *path; + int fd; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + + path = concat_file_name(mount_dir, file->name); + fd = open(path, O_RDWR); + free(path); + if (fd < 0) { + print_error("Can't open file"); + goto failure; + } + + sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); + + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + + if (sig_len < 0) { + ksft_print_msg("Can't load signature %s. error: %s\n", + file->name, strerror(-sig_len)); + goto failure; + } + + if (sig_len != file->sig.size || + memcmp(sig_buf, file->sig.data, sig_len)) { + ksft_print_msg("Signature mismatch %s.\n", + file->name); + goto failure; + } + } + + /* Unmount and mount again, to make sure the signature is persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int sig_len; + char *path; + int fd; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + + path = concat_file_name(mount_dir, file->name); + fd = open(path, O_RDWR); + free(path); + if (fd < 0) { + print_error("Can't open file"); + goto failure; + } + + sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); + + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + + if (sig_len < 0) { + ksft_print_msg("Can't load signature %s. error: %s\n", + file->name, strerror(-sig_len)); + goto failure; + } + if (sig_len != file->sig.size || + memcmp(sig_buf, file->sig.data, sig_len)) { + ksft_print_msg("Signature mismatch %s.\n", + file->name); + goto failure; + } + } + + /* Final unmount */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + static int hash_tree_test(char *mount_dir) { char *backing_dir; @@ -1840,8 +2066,8 @@ static int hash_tree_test(char *mount_dir) build_mtree(file); res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, file->root_hash, - file->sig.add_data); + file->size, file->root_hash, + file->sig.data, file->sig.size, file->sig.add_data); if (i == corrupted_file_idx) { /* Corrupt third blocks hash */ @@ -1932,88 +2158,48 @@ static int hash_tree_test(char *mount_dir) return TEST_FAILURE; } -enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG }; - -static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, - enum expected_log expected_log) +static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; - struct incfs_pending_read_info prs[2048] = {}; + struct incfs_pending_read_info prs[100] = {}; int prs_size = ARRAY_SIZE(prs); int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - int expected_read_block_cnt; int res; int read_count; - int i, j; + int i; char *filename = concat_file_name(mount_dir, file->name); int fd; - fd = open(filename, O_RDONLY | O_CLOEXEC); + fd = open(filename, O_RDONLY); free(filename); if (fd <= 0) return TEST_FAILURE; if (block_cnt > prs_size) block_cnt = prs_size; - expected_read_block_cnt = block_cnt; for (i = 0; i < block_cnt; i++) { res = pread(fd, data, sizeof(data), INCFS_DATA_FILE_BLOCK_SIZE * i); - - /* Make some read logs of type SAME_FILE_NEXT_BLOCK */ - if (i % 10 == 0) - usleep(20000); - - /* Skip some blocks to make logs of type SAME_FILE */ - if (i % 10 == 5) { - ++i; - --expected_read_block_cnt; - } - if (res <= 0) goto failure; } - read_count = wait_for_pending_reads( - log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size); - if (expected_log == NO_LOG) { - if (read_count == 0) - goto success; - if (read_count < 0) - ksft_print_msg("Error reading logged reads %s.\n", - strerror(-read_count)); - else - ksft_print_msg("Somehow read empty logs.\n"); - goto failure; - } - + read_count = wait_for_pending_reads(log_fd, 0, prs, prs_size); if (read_count < 0) { ksft_print_msg("Error reading logged reads %s.\n", strerror(-read_count)); goto failure; } - i = 0; - if (expected_log == PARTIAL_LOG) { - if (read_count == 0) { - ksft_print_msg("No logs %s.\n", file->name); - goto failure; - } - - for (i = 0, j = 0; j < expected_read_block_cnt - read_count; - i++, j++) - if (i % 10 == 5) - ++i; - - } else if (read_count != expected_read_block_cnt) { + if (read_count != block_cnt) { ksft_print_msg("Bad log read count %s %d %d.\n", file->name, - read_count, expected_read_block_cnt); + read_count, block_cnt); goto failure; } - for (j = 0; j < read_count; i++, j++) { - struct incfs_pending_read_info *read = &prs[j]; + for (i = 0; i < read_count; i++) { + struct incfs_pending_read_info *read = &prs[i]; if (!same_id(&read->file_id, &file->id)) { ksft_print_msg("Bad log read ino %s\n", file->name); @@ -2026,8 +2212,8 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, goto failure; } - if (j != 0) { - unsigned long psn = prs[j - 1].serial_number; + if (i != 0) { + unsigned long psn = prs[i - 1].serial_number; if (read->serial_number != psn + 1) { ksft_print_msg("Bad log read sn %s %d %d.\n", @@ -2042,12 +2228,7 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, file->name); goto failure; } - - if (i % 10 == 5) - ++i; } - -success: close(fd); return TEST_SUCCESS; @@ -2061,14 +2242,14 @@ static int read_log_test(char *mount_dir) struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; int i = 0; - int cmd_fd = -1, log_fd = -1, drop_caches = -1; + int cmd_fd = -1, log_fd = -1; char *backing_dir; backing_dir = create_backing_dir(mount_dir); if (!backing_dir) goto failure; - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -2076,7 +2257,7 @@ static int read_log_test(char *mount_dir) goto failure; log_fd = open_log_file(mount_dir); - if (log_fd < 0) + if (cmd_fd < 0) ksft_print_msg("Can't open log file.\n"); /* Write data. */ @@ -2095,7 +2276,7 @@ static int read_log_test(char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) + if (validate_logs(mount_dir, log_fd, file)) goto failure; } @@ -2108,7 +2289,7 @@ static int read_log_test(char *mount_dir) goto failure; } - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -2116,557 +2297,35 @@ static int read_log_test(char *mount_dir) goto failure; log_fd = open_log_file(mount_dir); - if (log_fd < 0) + if (cmd_fd < 0) ksft_print_msg("Can't open log file.\n"); /* Validate data again */ for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) + if (validate_logs(mount_dir, log_fd, file)) goto failure; } - /* - * Unmount and mount again with no read log to make sure poll - * doesn't crash - */ + /* Final unmount */ close(cmd_fd); close(log_fd); + free(backing_dir); if (umount(mount_dir) != 0) { print_error("Can't unmout FS"); goto failure; } - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=0", - false) != 0) - goto failure; - - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); + return TEST_SUCCESS; - /* Validate data again - note should fail this time */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, NO_LOG)) - goto failure; - } - - /* - * Remount and check that logs start working again - */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); - if (drop_caches == -1) - goto failure; - i = write(drop_caches, "3", 1); - close(drop_caches); - if (i != 1) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1", - true) != 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG)) - goto failure; - } - - /* - * Remount and check that logs start working again - */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); - if (drop_caches == -1) - goto failure; - i = write(drop_caches, "3", 1); - close(drop_caches); - if (i != 1) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=4", - true) != 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) - goto failure; - } - - /* Final unmount */ - close(log_fd); - free(backing_dir); - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - - return TEST_SUCCESS; - -failure: - close(cmd_fd); - close(log_fd); - free(backing_dir); - umount(mount_dir); - return TEST_FAILURE; -} - -static int emit_partial_test_file_data(char *mount_dir, struct test_file *file) -{ - int i, j; - int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - int *block_indexes = NULL; - int result = 0; - int blocks_written = 0; - - if (file->size == 0) - return 0; - - /* Emit 2 blocks, skip 2 blocks etc*/ - block_indexes = calloc(block_cnt, sizeof(*block_indexes)); - for (i = 0, j = 0; i < block_cnt; ++i) - if ((i & 2) == 0) { - block_indexes[j] = i; - ++j; - } - - for (i = 0; i < j; i += blocks_written) { - blocks_written = emit_test_blocks(mount_dir, file, - block_indexes + i, j - i); - if (blocks_written < 0) { - result = blocks_written; - goto out; - } - if (blocks_written == 0) { - result = -EIO; - goto out; - } - } -out: - free(block_indexes); - return result; -} - -static int validate_ranges(const char *mount_dir, struct test_file *file) -{ - int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - char *filename = concat_file_name(mount_dir, file->name); - int fd; - struct incfs_filled_range ranges[128]; - struct incfs_get_filled_blocks_args fba = { - .range_buffer = ptr_to_u64(ranges), - .range_buffer_size = sizeof(ranges), - }; - int error = TEST_SUCCESS; - int i; - int range_cnt; - int cmd_fd = -1; - struct incfs_permit_fill permit_fill; - - fd = open(filename, O_RDONLY | O_CLOEXEC); - free(filename); - if (fd <= 0) - return TEST_FAILURE; - - error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); - if (error != -1 || errno != EPERM) { - ksft_print_msg("INCFS_IOC_GET_FILLED_BLOCKS not blocked\n"); - error = -EPERM; - goto out; - } - - cmd_fd = open_commands_file(mount_dir); - permit_fill.file_descriptor = fd; - if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { - print_error("INCFS_IOC_PERMIT_FILL failed"); - return -EPERM; - goto out; - } - - error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); - if (error && errno != ERANGE) - goto out; - - if (error && errno == ERANGE && block_cnt < 509) - goto out; - - if (!error && block_cnt >= 509) { - error = -ERANGE; - goto out; - } - - if (fba.total_blocks_out != block_cnt) { - error = -EINVAL; - goto out; - } - - if (fba.data_blocks_out != block_cnt) { - error = -EINVAL; - goto out; - } - - range_cnt = (block_cnt + 3) / 4; - if (range_cnt > 128) - range_cnt = 128; - if (range_cnt != fba.range_buffer_size_out / sizeof(*ranges)) { - error = -ERANGE; - goto out; - } - - error = TEST_SUCCESS; - for (i = 0; i < fba.range_buffer_size_out / sizeof(*ranges) - 1; ++i) - if (ranges[i].begin != i * 4 || ranges[i].end != i * 4 + 2) { - error = -EINVAL; - goto out; - } - - if (ranges[i].begin != i * 4 || - (ranges[i].end != i * 4 + 1 && ranges[i].end != i * 4 + 2)) { - error = -EINVAL; - goto out; - } - - for (i = 0; i < 64; ++i) { - fba.start_index = i * 2; - fba.end_index = i * 2 + 2; - error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); - if (error) - goto out; - - if (fba.total_blocks_out != block_cnt) { - error = -EINVAL; - goto out; - } - - if (fba.start_index >= block_cnt) { - if (fba.index_out != fba.start_index) { - error = -EINVAL; - goto out; - } - - break; - } - - if (i % 2) { - if (fba.range_buffer_size_out != 0) { - error = -EINVAL; - goto out; - } - } else { - if (fba.range_buffer_size_out != sizeof(*ranges)) { - error = -EINVAL; - goto out; - } - - if (ranges[0].begin != i * 2) { - error = -EINVAL; - goto out; - } - - if (ranges[0].end != i * 2 + 1 && - ranges[0].end != i * 2 + 2) { - error = -EINVAL; - goto out; - } - } - } - -out: - close(fd); - close(cmd_fd); - return error; -} - -static int get_blocks_test(char *mount_dir) -{ - char *backing_dir; - int cmd_fd = -1; - int i; - struct test_files_set test = get_test_files_set(); - const int file_num = test.files_count; - - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - /* Write data. */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (emit_file(cmd_fd, NULL, file->name, &file->id, file->size, - NULL)) - goto failure; - - if (emit_partial_test_file_data(mount_dir, file)) - goto failure; - } - - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_ranges(mount_dir, file)) - goto failure; - - /* - * The smallest files are filled completely, so this checks that - * the fast get_filled_blocks path is not causing issues - */ - if (validate_ranges(mount_dir, file)) - goto failure; - } - - close(cmd_fd); - umount(mount_dir); - free(backing_dir); - return TEST_SUCCESS; - -failure: - close(cmd_fd); - umount(mount_dir); - free(backing_dir); - return TEST_FAILURE; -} - -static int emit_partial_test_file_hash(char *mount_dir, struct test_file *file) -{ - int err; - int fd; - struct incfs_fill_blocks fill_blocks = { - .count = 1, - }; - struct incfs_fill_block *fill_block_array = - calloc(fill_blocks.count, sizeof(struct incfs_fill_block)); - uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; - - if (file->size <= 4096 / 32 * 4096) - return 0; - - if (fill_blocks.count == 0) - return 0; - - if (!fill_block_array) - return -ENOMEM; - fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); - - rnd_buf(data, sizeof(data), 0); - - fill_block_array[0] = - (struct incfs_fill_block){ .block_index = 1, - .data_len = - INCFS_DATA_FILE_BLOCK_SIZE, - .data = ptr_to_u64(data), - .flags = INCFS_BLOCK_FLAGS_HASH }; - - fd = open_file_by_id(mount_dir, file->id, true); - if (fd < 0) { - err = errno; - goto failure; - } - - err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); - close(fd); - if (err < fill_blocks.count) - err = errno; - else - err = 0; - -failure: - free(fill_block_array); - return err; -} - -static int validate_hash_ranges(const char *mount_dir, struct test_file *file) -{ - int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - char *filename = concat_file_name(mount_dir, file->name); - int fd; - struct incfs_filled_range ranges[128]; - struct incfs_get_filled_blocks_args fba = { - .range_buffer = ptr_to_u64(ranges), - .range_buffer_size = sizeof(ranges), - }; - int error = TEST_SUCCESS; - int file_blocks = (file->size + INCFS_DATA_FILE_BLOCK_SIZE - 1) / - INCFS_DATA_FILE_BLOCK_SIZE; - int cmd_fd = -1; - struct incfs_permit_fill permit_fill; - - if (file->size <= 4096 / 32 * 4096) - return 0; - - fd = open(filename, O_RDONLY | O_CLOEXEC); - free(filename); - if (fd <= 0) - return TEST_FAILURE; - - error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); - if (error != -1 || errno != EPERM) { - ksft_print_msg("INCFS_IOC_GET_FILLED_BLOCKS not blocked\n"); - error = -EPERM; - goto out; - } - - cmd_fd = open_commands_file(mount_dir); - permit_fill.file_descriptor = fd; - if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { - print_error("INCFS_IOC_PERMIT_FILL failed"); - return -EPERM; - goto out; - } - - error = ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); - if (error) - goto out; - - if (fba.total_blocks_out <= block_cnt) { - error = -EINVAL; - goto out; - } - - if (fba.data_blocks_out != block_cnt) { - error = -EINVAL; - goto out; - } - - if (fba.range_buffer_size_out != sizeof(struct incfs_filled_range)) { - error = -EINVAL; - goto out; - } - - if (ranges[0].begin != file_blocks + 1 || - ranges[0].end != file_blocks + 2) { - error = -EINVAL; - goto out; - } - -out: - close(cmd_fd); - close(fd); - return error; -} - -static int get_hash_blocks_test(char *mount_dir) -{ - char *backing_dir; - int cmd_fd = -1; - int i; - struct test_files_set test = get_test_files_set(); - const int file_num = test.files_count; - - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (crypto_emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, file->root_hash, - file->sig.add_data)) - goto failure; - - if (emit_partial_test_file_hash(mount_dir, file)) - goto failure; - } - - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_hash_ranges(mount_dir, file)) - goto failure; - } - - close(cmd_fd); - umount(mount_dir); - free(backing_dir); - return TEST_SUCCESS; - -failure: - close(cmd_fd); - umount(mount_dir); - free(backing_dir); - return TEST_FAILURE; -} - -static int large_file(char *mount_dir) -{ - char *backing_dir; - int cmd_fd = -1; - int i; - int result = TEST_FAILURE; - uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; - int block_count = 3LL * 1024 * 1024 * 1024 / INCFS_DATA_FILE_BLOCK_SIZE; - struct incfs_fill_block *block_buf = - calloc(block_count, sizeof(struct incfs_fill_block)); - struct incfs_fill_blocks fill_blocks = { - .count = block_count, - .fill_blocks = ptr_to_u64(block_buf), - }; - incfs_uuid_t id; - int fd; - - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - if (emit_file(cmd_fd, NULL, "very_large_file", &id, - (uint64_t)block_count * INCFS_DATA_FILE_BLOCK_SIZE, - NULL) < 0) - goto failure; - - for (i = 0; i < block_count; i++) { - block_buf[i].compression = COMPRESSION_NONE; - block_buf[i].block_index = i; - block_buf[i].data_len = INCFS_DATA_FILE_BLOCK_SIZE; - block_buf[i].data = ptr_to_u64(data); - } - - fd = open_file_by_id(mount_dir, id, true); - if (fd < 0) - goto failure; - - if (ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks) != block_count) - goto failure; - - if (emit_file(cmd_fd, NULL, "very_very_large_file", &id, 1LL << 40, - NULL) < 0) - goto failure; - - result = TEST_SUCCESS; - -failure: - close(fd); - close(cmd_fd); - return result; -} +failure: + close(cmd_fd); + close(log_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} static char *setup_mount_dir() { @@ -2702,7 +2361,7 @@ int main(int argc, char *argv[]) // NOTE - this abuses the concept of randomness - do *not* ever do this // on a machine for production use - the device will think it has good // randomness when it does not. - fd = open("/dev/urandom", O_WRONLY | O_CLOEXEC); + fd = open("/dev/urandom", O_WRONLY); count = 4096; for (int i = 0; i < 128; ++i) ioctl(fd, RNDADDTOENTCNT, &count); @@ -2733,11 +2392,9 @@ int main(int argc, char *argv[]) MAKE_TEST(work_after_remount_test), MAKE_TEST(child_procs_waiting_for_data_test), MAKE_TEST(multiple_providers_test), + MAKE_TEST(signature_test), MAKE_TEST(hash_tree_test), MAKE_TEST(read_log_test), - MAKE_TEST(get_blocks_test), - MAKE_TEST(get_hash_blocks_test), - MAKE_TEST(large_file), }; #undef MAKE_TEST @@ -2758,7 +2415,7 @@ int main(int argc, char *argv[]) rmdir(mount_dir); if (fails > 0) - ksft_exit_fail(); + ksft_exit_pass(); else ksft_exit_pass(); return 0; diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index e194f63ba922..08b8452ad0bc 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -2,31 +2,28 @@ /* * Copyright 2018 Google LLC */ +#include +#include #include -#include +#include +#include #include -#include -#include -#include -#include #include - #include #include -#include -#include - +#include +#include +#include +#include +#include +#include +#include #include #include #include "utils.h" -#ifndef __S_IFREG -#define __S_IFREG S_IFREG -#endif - -int mount_fs(const char *mount_dir, const char *backing_dir, - int read_timeout_ms) +int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) { static const char fs_name[] = INCFS_NAME; char mount_options[512]; @@ -42,107 +39,190 @@ int mount_fs(const char *mount_dir, const char *backing_dir, return result; } -int mount_fs_opt(const char *mount_dir, const char *backing_dir, - const char *opt, bool remount) +int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) { static const char fs_name[] = INCFS_NAME; int result; - result = mount(backing_dir, mount_dir, fs_name, - remount ? MS_REMOUNT : 0, opt); + result = mount(backing_dir, mount_dir, fs_name, 0, opt); if (result != 0) perror("Error mounting fs."); return result; } -struct hash_section { - uint32_t algorithm; - uint8_t log2_blocksize; - uint32_t salt_size; - /* no salt */ - uint32_t hash_size; - uint8_t hash[SHA256_DIGEST_SIZE]; -} __packed; - -struct signature_blob { - uint32_t version; - uint32_t hash_section_size; - struct hash_section hash_section; - uint32_t signing_section_size; - uint8_t signing_section[]; -} __packed; - -size_t format_signature(void **buf, const char *root_hash, const char *add_data) +int unlink_node(int fd, int parent_ino, char *filename) { - size_t size = sizeof(struct signature_blob) + strlen(add_data) + 1; - struct signature_blob *sb = malloc(size); - - *sb = (struct signature_blob){ - .version = INCFS_SIGNATURE_VERSION, - .hash_section_size = sizeof(struct hash_section), - .hash_section = - (struct hash_section){ - .algorithm = INCFS_HASH_TREE_SHA256, - .log2_blocksize = 12, - .salt_size = 0, - .hash_size = SHA256_DIGEST_SIZE, - }, - .signing_section_size = sizeof(uint32_t) + strlen(add_data) + 1, - }; + return 0; +} + + +static EVP_PKEY *deserialize_private_key(const char *pem_key) +{ + BIO *bio = NULL; + EVP_PKEY *pkey = NULL; + int len = strlen(pem_key); + + bio = BIO_new_mem_buf(pem_key, len); + if (!bio) + return NULL; + + pkey = PEM_read_bio_PrivateKey(bio, NULL, NULL, NULL); + BIO_free(bio); + return pkey; +} + +static X509 *deserialize_cert(const char *pem_cert) +{ + BIO *bio = NULL; + X509 *cert = NULL; + int len = strlen(pem_cert); + + bio = BIO_new_mem_buf(pem_cert, len); + if (!bio) + return NULL; + + cert = PEM_read_bio_X509(bio, NULL, NULL, NULL); + BIO_free(bio); + return cert; +} + +bool sign_pkcs7(const void *data_to_sign, size_t data_size, + char *pkey_pem, char *cert_pem, + void **sig_ret, size_t *sig_size_ret) +{ + /* + * PKCS#7 signing flags: + * + * - PKCS7_BINARY signing binary data, so skip MIME translation + * + * - PKCS7_NOATTR omit extra authenticated attributes, such as + * SMIMECapabilities + * + * - PKCS7_PARTIAL PKCS7_sign() creates a handle only, then + * PKCS7_sign_add_signer() can add a signer later. + * This is necessary to change the message digest + * algorithm from the default of SHA-1. Requires + * OpenSSL 1.0.0 or later. + */ + int pkcs7_flags = PKCS7_BINARY | PKCS7_NOATTR | PKCS7_PARTIAL; + void *sig; + size_t sig_size; + BIO *bio = NULL; + PKCS7 *p7 = NULL; + EVP_PKEY *pkey = NULL; + X509 *cert = NULL; + bool ok = false; + + const EVP_MD *md = EVP_sha256(); + + pkey = deserialize_private_key(pkey_pem); + if (!pkey) { + printf("deserialize_private_key failed\n"); + goto out; + } + + cert = deserialize_cert(cert_pem); + if (!cert) { + printf("deserialize_cert failed\n"); + goto out; + } + + bio = BIO_new_mem_buf(data_to_sign, data_size); + if (!bio) + goto out; + + p7 = PKCS7_sign(NULL, NULL, NULL, bio, pkcs7_flags); + if (!p7) { + printf("failed to initialize PKCS#7 signature object\n"); + goto out; + } + + if (!PKCS7_sign_add_signer(p7, cert, pkey, md, pkcs7_flags)) { + printf("failed to add signer to PKCS#7 signature object\n"); + goto out; + } + + if (PKCS7_final(p7, bio, pkcs7_flags) != 1) { + printf("failed to finalize PKCS#7 signature\n"); + goto out; + } + + BIO_free(bio); + bio = BIO_new(BIO_s_mem()); + if (!bio) { + printf("out of memory\n"); + goto out; + } + + if (i2d_PKCS7_bio(bio, p7) != 1) { + printf("failed to DER-encode PKCS#7 signature object\n"); + goto out; + } - memcpy(sb->hash_section.hash, root_hash, SHA256_DIGEST_SIZE); - memcpy((char *)sb->signing_section, add_data, strlen(add_data) + 1); - *buf = sb; - return size; + sig_size = BIO_get_mem_data(bio, &sig); + *sig_ret = malloc(sig_size); + memcpy(*sig_ret, sig, sig_size); + *sig_size_ret = sig_size; + ok = true; +out: + PKCS7_free(p7); + BIO_free(bio); + return ok; } -int crypto_emit_file(int fd, const char *dir, const char *filename, - incfs_uuid_t *id_out, size_t size, const char *root_hash, - const char *add_data) +int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, const char *root_hash, char *sig, size_t sig_size, + char *add_data) { int mode = __S_IFREG | 0555; - void *signature; - int error = 0; + struct incfs_file_signature_info sig_info = { + .hash_tree_alg = root_hash + ? INCFS_HASH_TREE_SHA256 + : 0, + .root_hash = ptr_to_u64(root_hash), + .additional_data = ptr_to_u64(add_data), + .additional_data_size = strlen(add_data), + .signature = ptr_to_u64(sig), + .signature_size = sig_size, + }; struct incfs_new_file_args args = { .size = size, .mode = mode, .file_name = ptr_to_u64(filename), .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(&sig_info), .file_attr = 0, .file_attr_len = 0 }; - args.signature_size = format_signature(&signature, root_hash, add_data); - args.signature_info = ptr_to_u64(signature); - md5(filename, strlen(filename), (char *)args.file_id.bytes); - if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) { - error = -errno; - goto out; - } + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) + return -errno; *id_out = args.file_id; - -out: - free(signature); - return error; + return 0; } -int emit_file(int fd, const char *dir, const char *filename, - incfs_uuid_t *id_out, size_t size, const char *attr) + +int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, char *attr) { int mode = __S_IFREG | 0555; - struct incfs_new_file_args args = { .size = size, - .mode = mode, - .file_name = ptr_to_u64(filename), - .directory_path = ptr_to_u64(dir), - .signature_info = ptr_to_u64(NULL), - .signature_size = 0, - .file_attr = ptr_to_u64(attr), - .file_attr_len = - attr ? strlen(attr) : 0 }; + struct incfs_file_signature_info sig_info = { + .hash_tree_alg = 0, + .root_hash = ptr_to_u64(NULL) + }; + struct incfs_new_file_args args = { + .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(&sig_info), + .file_attr = ptr_to_u64(attr), + .file_attr_len = attr ? strlen(attr) : 0 + }; md5(filename, strlen(filename), (char *)args.file_id.bytes); @@ -170,7 +250,7 @@ int get_file_signature(int fd, unsigned char *buf, int buf_size) return -errno; } -loff_t get_file_size(const char *name) +loff_t get_file_size(char *name) { struct stat st; @@ -179,27 +259,27 @@ loff_t get_file_size(const char *name) return -ENOENT; } -int open_commands_file(const char *mount_dir) +int open_commands_file(char *mount_dir) { char cmd_file[255]; int cmd_fd; snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/%s", mount_dir, INCFS_PENDING_READS_FILENAME); - cmd_fd = open(cmd_file, O_RDONLY | O_CLOEXEC); + cmd_fd = open(cmd_file, O_RDONLY); if (cmd_fd < 0) perror("Can't open commands file"); return cmd_fd; } -int open_log_file(const char *mount_dir) +int open_log_file(char *mount_dir) { char cmd_file[255]; int cmd_fd; snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); - cmd_fd = open(cmd_file, O_RDWR | O_CLOEXEC); + cmd_fd = open(cmd_file, O_RDWR); if (cmd_fd < 0) perror("Can't open log file"); return cmd_fd; @@ -278,7 +358,7 @@ int delete_dir_tree(const char *dir_path) return result; } -void sha256(const char *data, size_t dsize, char *hash) +void sha256(char *data, size_t dsize, char *hash) { SHA256_CTX ctx; @@ -287,7 +367,7 @@ void sha256(const char *data, size_t dsize, char *hash) SHA256_Final((unsigned char *)hash, &ctx); } -void md5(const char *data, size_t dsize, char *hash) +void md5(char *data, size_t dsize, char *hash) { MD5_CTX ctx; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 9af63e4e922c..9c9ba3c5f70a 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -5,12 +5,10 @@ #include #include -#include +#include "../../include/uapi/linux/incrementalfs.h" #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) -#define __packed __attribute__((__packed__)) - #ifdef __LP64__ #define ptr_to_u64(p) ((__u64)p) #else @@ -19,11 +17,9 @@ #define SHA256_DIGEST_SIZE 32 -int mount_fs(const char *mount_dir, const char *backing_dir, - int read_timeout_ms); +int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms); -int mount_fs_opt(const char *mount_dir, const char *backing_dir, - const char *opt, bool remount); +int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt); int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); @@ -32,26 +28,32 @@ int get_file_signature(int fd, unsigned char *buf, int buf_size); int emit_node(int fd, char *filename, int *ino_out, int parent_ino, size_t size, mode_t mode, char *attr); -int emit_file(int fd, const char *dir, const char *filename, - incfs_uuid_t *id_out, size_t size, const char *attr); +int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, char *attr); + +int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, const char *root_hash, char *sig, size_t sig_size, + char *add_data); -int crypto_emit_file(int fd, const char *dir, const char *filename, - incfs_uuid_t *id_out, size_t size, const char *root_hash, - const char *add_data); +int unlink_node(int fd, int parent_ino, char *filename); -loff_t get_file_size(const char *name); +loff_t get_file_size(char *name); -int open_commands_file(const char *mount_dir); +int open_commands_file(char *mount_dir); -int open_log_file(const char *mount_dir); +int open_log_file(char *mount_dir); int wait_for_pending_reads(int fd, int timeout_ms, struct incfs_pending_read_info *prs, int prs_count); char *concat_file_name(const char *dir, char *file); -void sha256(const char *data, size_t dsize, char *hash); +void sha256(char *data, size_t dsize, char *hash); + +void md5(char *data, size_t dsize, char *hash); -void md5(const char *data, size_t dsize, char *hash); +bool sign_pkcs7(const void *data_to_sign, size_t data_size, + char *pkey_pem, char *cert_pem, + void **sig_ret, size_t *sig_size_ret); int delete_dir_tree(const char *path); -- GitLab From b73e822d12ecbea7cad3742c46fd1be17aa141c8 Mon Sep 17 00:00:00 2001 From: Srinivasarao P Date: Thu, 30 Jul 2020 01:04:27 +0530 Subject: [PATCH 1277/1278] Reverting crypto patches c57952b UPSTREAM: ubifs: wire up FS_IOC_GET_ENCRYPTION_NONCE 379237b UPSTREAM: f2fs: wire up FS_IOC_GET_ENCRYPTION_NONCE 10e5acf UPSTREAM: ext4: wire up FS_IOC_GET_ENCRYPTION_NONCE 63bf273 ANDROID: scsi: ufs: add ->map_sg_crypto() variant op 10d4512 FROMLIST: f2fs: Handle casefolding with Encryption 4efb7e2 ANDROID: fscrypt: fall back to filesystem-layer crypto when needed a14fa7b ANDROID: block: require drivers to declare supported crypto key type(s) 5578bea ANDROID: block: make blk_crypto_start_using_mode() properly check for support e9c80bd UPSTREAM: fscrypt: add FS_IOC_GET_ENCRYPTION_NONCE ioctl 9e469e7 UPSTREAM: fscrypt: don't evict dirty inodes after removing key 53f2446 fscrypt: don't evict dirty inodes after removing key 207be96 FROMLIST: fscrypt: Have filesystems handle their d_ops 06ab740 ANDROID: dm: Add wrapped key support in dm-default-key 23e670a ANDROID: dm: add support for passing through derive_raw_secret 166fda7 ANDROID: block: Prevent crypto fallback for wrapped keys fe6e855 fscrypt: improve format of no-key names 216d8ca fscrypt: clarify what is meant by a per-file key 7e25032 fscrypt: derive dirhash key for casefolded directories e16d849 fscrypt: don't allow v1 policies with casefolding 0bc68c1 fscrypt: add "fscrypt_" prefix to fname_encrypt() 85b9c3e fscrypt: don't print name of busy file when removing key 9c5c8c5 fscrypt: document gfp_flags for bounce page allocation bee5bd5 fscrypt: optimize fscrypt_zeroout_range() 1c88eea fscrypt: remove redundant bi_status check 04f5184 fscrypt: Allow modular crypto algorithms 737ae90 fscrypt: include in UAPI header 8842133 fscrypt: don't check for ENOKEY from fscrypt_get_encryption_info() b21b79d fscrypt: remove fscrypt_is_direct_key_policy() 19b132b fscrypt: move fscrypt_valid_enc_modes() to policy.c add6ac4 fscrypt: check for appropriate use of DIRECT_KEY flag earlier 2454b5b fscrypt: split up fscrypt_supported_policy() by policy version bfa4ca6 fscrypt: introduce fscrypt_needs_contents_encryption() 3871977 fscrypt: move fscrypt_d_revalidate() to fname.c 39a0acc fscrypt: constify inode parameter to filename encryption functions 3942229 fscrypt: constify struct fscrypt_hkdf parameter to fscrypt_hkdf_expand() a7b6398 fscrypt: verify that the crypto_skcipher has the correct ivsize 9c1b3af fscrypt: use crypto_skcipher_driver_name() 3529026 fscrypt: support passing a keyring key to FS_IOC_ADD_ENCRYPTION_KEY Change-Id: Ib1abe832e16d5f40bfcc9e34bdccbb063b37dbbc Signed-off-by: Srinivasarao P --- Documentation/filesystems/fscrypt.rst | 86 ++----- block/blk-crypto-fallback.c | 50 ++-- block/blk-crypto-internal.h | 9 - block/blk-crypto.c | 55 +---- block/keyslot-manager.c | 30 +-- drivers/md/dm-default-key.c | 29 +-- drivers/md/dm.c | 80 +------ drivers/scsi/ufs/ufshcd-crypto.c | 12 +- drivers/scsi/ufs/ufshcd-crypto.h | 8 - drivers/scsi/ufs/ufshcd.c | 2 +- drivers/scsi/ufs/ufshcd.h | 1 - fs/crypto/Kconfig | 22 +- fs/crypto/bio.c | 177 ++++----------- fs/crypto/crypto.c | 57 ++++- fs/crypto/fname.c | 314 ++++++-------------------- fs/crypto/fscrypt_private.h | 87 ++++--- fs/crypto/hkdf.c | 2 +- fs/crypto/hooks.c | 48 +--- fs/crypto/inline_crypt.c | 74 ++---- fs/crypto/keyring.c | 160 +++---------- fs/crypto/keysetup.c | 141 +++++------- fs/crypto/keysetup_v1.c | 21 +- fs/crypto/policy.c | 191 +++++----------- fs/ext4/Kconfig | 1 - fs/ext4/dir.c | 9 +- fs/ext4/ioctl.c | 6 - fs/ext4/namei.c | 1 - fs/ext4/super.c | 5 + fs/f2fs/Kconfig | 1 - fs/f2fs/dir.c | 74 +++--- fs/f2fs/f2fs.h | 14 +- fs/f2fs/file.c | 11 - fs/f2fs/hash.c | 25 +- fs/f2fs/inline.c | 9 +- fs/f2fs/namei.c | 1 - fs/f2fs/super.c | 7 + fs/inode.c | 3 +- fs/libfs.c | 50 ---- fs/ubifs/Kconfig | 1 - fs/ubifs/dir.c | 20 +- fs/ubifs/ioctl.c | 4 - include/linux/bio-crypt-ctx.h | 3 - include/linux/blk-crypto.h | 18 +- include/linux/fs.h | 2 - include/linux/fscrypt.h | 134 ++++++----- include/linux/keyslot-manager.h | 14 +- include/uapi/linux/fscrypt.h | 15 +- 47 files changed, 618 insertions(+), 1466 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index dc444b8d3704..471a511c7508 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -234,8 +234,8 @@ HKDF is more flexible, is nonreversible, and evenly distributes entropy from the master key. HKDF is also standardized and widely used by other software, whereas the AES-128-ECB based KDF is ad-hoc. -Per-file encryption keys ------------------------- +Per-file keys +------------- Since each master key can protect many files, it is necessary to "tweak" the encryption of each file so that the same plaintext in two @@ -268,9 +268,9 @@ is greater than that of an AES-256-XTS key. Therefore, to improve performance and save memory, for Adiantum a "direct key" configuration is supported. When the user has enabled this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy, -per-file encryption keys are not used. Instead, whenever any data -(contents or filenames) is encrypted, the file's 16-byte nonce is -included in the IV. Moreover: +per-file keys are not used. Instead, whenever any data (contents or +filenames) is encrypted, the file's 16-byte nonce is included in the +IV. Moreover: - For v1 encryption policies, the encryption is done directly with the master key. Because of this, users **must not** use the same master @@ -302,16 +302,6 @@ For master keys used for v2 encryption policies, a unique 16-byte "key identifier" is also derived using the KDF. This value is stored in the clear, since it is needed to reliably identify the key itself. -Dirhash keys ------------- - -For directories that are indexed using a secret-keyed dirhash over the -plaintext filenames, the KDF is also used to derive a 128-bit -SipHash-2-4 key per directory in order to hash filenames. This works -just like deriving a per-file encryption key, except that a different -KDF context is used. Currently, only casefolded ("case-insensitive") -encrypted directories use this style of hashing. - Encryption modes and usage ========================== @@ -335,11 +325,11 @@ used. Adiantum is a (primarily) stream cipher-based mode that is fast even on CPUs without dedicated crypto instructions. It's also a true wide-block mode, unlike XTS. It can also eliminate the need to derive -per-file encryption keys. However, it depends on the security of two -primitives, XChaCha12 and AES-256, rather than just one. See the -paper "Adiantum: length-preserving encryption for entry-level -processors" (https://eprint.iacr.org/2018/720.pdf) for more details. -To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast +per-file keys. However, it depends on the security of two primitives, +XChaCha12 and AES-256, rather than just one. See the paper +"Adiantum: length-preserving encryption for entry-level processors" +(https://eprint.iacr.org/2018/720.pdf) for more details. To use +Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast implementations of ChaCha and NHPoly1305 should be enabled, e.g. CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM. @@ -523,9 +513,7 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - ``EEXIST``: the file is already encrypted with an encryption policy different from the one specified - ``EINVAL``: an invalid encryption policy was specified (invalid - version, mode(s), or flags; or reserved bits were set); or a v1 - encryption policy was specified but the directory has the casefold - flag enabled (casefolding is incompatible with v1 policies). + version, mode(s), or flags; or reserved bits were set) - ``ENOKEY``: a v2 encryption policy was specified, but the key with the specified ``master_key_identifier`` has not been added, nor does the process have the CAP_FOWNER capability in the initial user @@ -633,17 +621,6 @@ from a passphrase or other low-entropy user credential. FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to generate and manage any needed salt(s) in userspace. -Getting a file's encryption nonce ---------------------------------- - -Since Linux v5.7, the ioctl FS_IOC_GET_ENCRYPTION_NONCE is supported. -On encrypted files and directories it gets the inode's 16-byte nonce. -On unencrypted files and directories, it fails with ENODATA. - -This ioctl can be useful for automated tests which verify that the -encryption is being done correctly. It is not needed for normal use -of fscrypt. - Adding keys ----------- @@ -661,8 +638,7 @@ follows:: struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 key_id; - __u32 __reserved[8]; + __u32 __reserved[9]; __u8 raw[]; }; @@ -679,12 +655,6 @@ follows:: } u; }; - struct fscrypt_provisioning_key_payload { - __u32 type; - __u32 __reserved; - __u8 raw[]; - }; - :c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized as follows: @@ -707,26 +677,9 @@ as follows: ``Documentation/security/keys/core.rst``). - ``raw_size`` must be the size of the ``raw`` key provided, in bytes. - Alternatively, if ``key_id`` is nonzero, this field must be 0, since - in that case the size is implied by the specified Linux keyring key. - -- ``key_id`` is 0 if the raw key is given directly in the ``raw`` - field. Otherwise ``key_id`` is the ID of a Linux keyring key of - type "fscrypt-provisioning" whose payload is a :c:type:`struct - fscrypt_provisioning_key_payload` whose ``raw`` field contains the - raw key and whose ``type`` field matches ``key_spec.type``. Since - ``raw`` is variable-length, the total size of this key's payload - must be ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the - raw key size. The process must have Search permission on this key. - - Most users should leave this 0 and specify the raw key directly. - The support for specifying a Linux keyring key is intended mainly to - allow re-adding keys after a filesystem is unmounted and re-mounted, - without having to store the raw keys in userspace memory. - ``raw`` is a variable-length field which must contain the actual - key, ``raw_size`` bytes long. Alternatively, if ``key_id`` is - nonzero, then this field is unused. + key, ``raw_size`` bytes long. For v2 policy keys, the kernel keeps track of which user (identified by effective user ID) added the key, and only allows the key to be @@ -748,16 +701,11 @@ FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors: - ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the caller does not have the CAP_SYS_ADMIN capability in the initial - user namespace; or the raw key was specified by Linux key ID but the - process lacks Search permission on the key. + user namespace - ``EDQUOT``: the key quota for this user would be exceeded by adding the key - ``EINVAL``: invalid key size or key specifier type, or reserved bits were set -- ``EKEYREJECTED``: the raw key was specified by Linux key ID, but the - key has the wrong type -- ``ENOKEY``: the raw key was specified by Linux key ID, but no key - exists with that ID - ``ENOTTY``: this type of filesystem does not implement encryption - ``EOPNOTSUPP``: the kernel was not configured with encryption support for this filesystem, or the filesystem superblock has not @@ -1160,8 +1108,8 @@ The context structs contain the same information as the corresponding policy structs (see `Setting an encryption policy`_), except that the context structs also contain a nonce. The nonce is randomly generated by the kernel and is used as KDF input or as a tweak to cause -different files to be encrypted differently; see `Per-file encryption -keys`_ and `DIRECT_KEY policies`_. +different files to be encrypted differently; see `Per-file keys`_ and +`DIRECT_KEY policies`_. Data path changes ----------------- @@ -1213,7 +1161,7 @@ filesystem-specific hash(es) needed for directory lookups. This allows the filesystem to still, with a high degree of confidence, map the filename given in ->lookup() back to a particular directory entry that was previously listed by readdir(). See :c:type:`struct -fscrypt_nokey_name` in the source for more details. +fscrypt_digested_name` in the source for more details. Note that the precise way that filenames are presented to userspace without the key is subject to change in the future. It is only meant diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index ad83e1077ba3..b8e9ae1c1d5b 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -487,13 +487,21 @@ bool blk_crypto_queue_decrypt_bio(struct bio *bio) return false; } -/* - * Prepare blk-crypto-fallback for the specified crypto mode. - * Returns -ENOPKG if the needed crypto API support is missing. +/** + * blk_crypto_start_using_mode() - Start using a crypto algorithm on a device + * @mode_num: the blk_crypto_mode we want to allocate ciphers for. + * @data_unit_size: the data unit size that will be used + * @q: the request queue for the device + * + * Upper layers must call this function to ensure that a the crypto API fallback + * has transforms for this algorithm, if they become necessary. + * + * Return: 0 on success and -err on error. */ -int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) { - const char *cipher_str = blk_crypto_modes[mode_num].cipher_str; struct blk_crypto_keyslot *slotp; unsigned int i; int err = 0; @@ -506,20 +514,25 @@ int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) if (likely(smp_load_acquire(&tfms_inited[mode_num]))) return 0; + /* + * If the keyslot manager of the request queue supports this + * crypto mode, then we don't need to allocate this mode. + */ + if (keyslot_manager_crypto_mode_supported(q->ksm, mode_num, + data_unit_size)) + return 0; + mutex_lock(&tfms_init_lock); if (likely(tfms_inited[mode_num])) goto out; for (i = 0; i < blk_crypto_num_keyslots; i++) { slotp = &blk_crypto_keyslots[i]; - slotp->tfms[mode_num] = crypto_alloc_skcipher(cipher_str, 0, 0); + slotp->tfms[mode_num] = crypto_alloc_skcipher( + blk_crypto_modes[mode_num].cipher_str, + 0, 0); if (IS_ERR(slotp->tfms[mode_num])) { err = PTR_ERR(slotp->tfms[mode_num]); - if (err == -ENOENT) { - pr_warn_once("Missing crypto API support for \"%s\"\n", - cipher_str); - err = -ENOPKG; - } slotp->tfms[mode_num] = NULL; goto out_free_tfms; } @@ -545,6 +558,7 @@ int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) mutex_unlock(&tfms_init_lock); return err; } +EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) { @@ -557,12 +571,6 @@ int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) struct bio_crypt_ctx *bc = bio->bi_crypt_context; struct bio_fallback_crypt_ctx *f_ctx; - if (bc->bc_key->is_hw_wrapped) { - pr_warn_once("HW wrapped key cannot be used with fallback.\n"); - bio->bi_status = BLK_STS_NOTSUPP; - return -EOPNOTSUPP; - } - if (!tfms_inited[bc->bc_key->crypto_mode]) { bio->bi_status = BLK_STS_IOERR; return -EIO; @@ -600,11 +608,9 @@ int __init blk_crypto_fallback_init(void) crypto_mode_supported[i] = 0xFFFFFFFF; crypto_mode_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; - blk_crypto_ksm = keyslot_manager_create( - NULL, blk_crypto_num_keyslots, - &blk_crypto_ksm_ll_ops, - BLK_CRYPTO_FEATURE_STANDARD_KEYS, - crypto_mode_supported, NULL); + blk_crypto_ksm = keyslot_manager_create(NULL, blk_crypto_num_keyslots, + &blk_crypto_ksm_ll_ops, + crypto_mode_supported, NULL); if (!blk_crypto_ksm) return -ENOMEM; diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 4da998c803f2..40d826b743da 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -19,8 +19,6 @@ extern const struct blk_crypto_mode blk_crypto_modes[]; #ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK -int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num); - int blk_crypto_fallback_submit_bio(struct bio **bio_ptr); bool blk_crypto_queue_decrypt_bio(struct bio *bio); @@ -31,13 +29,6 @@ bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc); #else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ -static inline int -blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) -{ - pr_warn_once("crypto API fallback is disabled\n"); - return -ENOPKG; -} - static inline bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) { return false; diff --git a/block/blk-crypto.c b/block/blk-crypto.c index f56bbec1132f..a8de0d9680e0 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -109,8 +109,7 @@ int blk_crypto_submit_bio(struct bio **bio_ptr) /* Get device keyslot if supported */ if (keyslot_manager_crypto_mode_supported(q->ksm, bc->bc_key->crypto_mode, - bc->bc_key->data_unit_size, - bc->bc_key->is_hw_wrapped)) { + bc->bc_key->data_unit_size)) { err = bio_crypt_ctx_acquire_keyslot(bc, q->ksm); if (!err) return 0; @@ -176,9 +175,7 @@ bool blk_crypto_endio(struct bio *bio) * @raw_key_size: Size of raw key. Must be at least the required size for the * chosen @crypto_mode; see blk_crypto_modes[]. (It's allowed * to be longer than the mode's actual key size, in order to - * support inline encryption hardware that accepts wrapped keys. - * @is_hw_wrapped has to be set for such keys) - * @is_hw_wrapped: Denotes @raw_key is wrapped. + * support inline encryption hardware that accepts wrapped keys.) * @crypto_mode: identifier for the encryption algorithm to use * @data_unit_size: the data unit size to use for en/decryption * @@ -187,7 +184,6 @@ bool blk_crypto_endio(struct bio *bio) */ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size) { @@ -202,14 +198,9 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, BUILD_BUG_ON(BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE < BLK_CRYPTO_MAX_KEY_SIZE); mode = &blk_crypto_modes[crypto_mode]; - if (is_hw_wrapped) { - if (raw_key_size < mode->keysize || - raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) - return -EINVAL; - } else { - if (raw_key_size != mode->keysize) - return -EINVAL; - } + if (raw_key_size < mode->keysize || + raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) + return -EINVAL; if (!is_power_of_2(data_unit_size)) return -EINVAL; @@ -218,7 +209,6 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, blk_key->data_unit_size = data_unit_size; blk_key->data_unit_size_bits = ilog2(data_unit_size); blk_key->size = raw_key_size; - blk_key->is_hw_wrapped = is_hw_wrapped; memcpy(blk_key->raw, raw_key, raw_key_size); /* @@ -233,38 +223,6 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, } EXPORT_SYMBOL_GPL(blk_crypto_init_key); -/** - * blk_crypto_start_using_mode() - Start using blk-crypto on a device - * @crypto_mode: the crypto mode that will be used - * @data_unit_size: the data unit size that will be used - * @is_hw_wrapped_key: whether the key will be hardware-wrapped - * @q: the request queue for the device - * - * Upper layers must call this function to ensure that either the hardware - * supports the needed crypto settings, or the crypto API fallback has - * transforms for the needed mode allocated and ready to go. - * - * Return: 0 on success; -ENOPKG if the hardware doesn't support the crypto - * settings and blk-crypto-fallback is either disabled or the needed - * algorithm is disabled in the crypto API; or another -errno code. - */ -int blk_crypto_start_using_mode(enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, - bool is_hw_wrapped_key, - struct request_queue *q) -{ - if (keyslot_manager_crypto_mode_supported(q->ksm, crypto_mode, - data_unit_size, - is_hw_wrapped_key)) - return 0; - if (is_hw_wrapped_key) { - pr_warn_once("hardware doesn't support wrapped keys\n"); - return -EOPNOTSUPP; - } - return blk_crypto_fallback_start_using_mode(crypto_mode); -} -EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); - /** * blk_crypto_evict_key() - Evict a key from any inline encryption hardware * it may have been programmed into @@ -285,8 +243,7 @@ int blk_crypto_evict_key(struct request_queue *q, { if (q->ksm && keyslot_manager_crypto_mode_supported(q->ksm, key->crypto_mode, - key->data_unit_size, - key->is_hw_wrapped)) + key->data_unit_size)) return keyslot_manager_evict_key(q->ksm, key); return blk_crypto_fallback_evict_key(key); diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index fe7dff3cae79..0b6dd460645e 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -44,7 +44,6 @@ struct keyslot { struct keyslot_manager { unsigned int num_slots; struct keyslot_mgmt_ll_ops ksm_ll_ops; - unsigned int features; unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; void *ll_priv_data; @@ -137,8 +136,6 @@ static inline void keyslot_manager_hw_exit(struct keyslot_manager *ksm) * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot * manager will use to perform operations like programming and * evicting keys. - * @features: The supported features as a bitmask of BLK_CRYPTO_FEATURE_* flags. - * Most drivers should set BLK_CRYPTO_FEATURE_STANDARD_KEYS here. * @crypto_mode_supported: Array of size BLK_ENCRYPTION_MODE_MAX of * bitmasks that represents whether a crypto mode * and data unit size are supported. The i'th bit @@ -158,7 +155,6 @@ struct keyslot_manager *keyslot_manager_create( struct device *dev, unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, - unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) { @@ -180,7 +176,6 @@ struct keyslot_manager *keyslot_manager_create( ksm->num_slots = num_slots; ksm->ksm_ll_ops = *ksm_ll_ops; - ksm->features = features; memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; @@ -387,24 +382,23 @@ void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot) } /** - * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode / - * data unit size / is_hw_wrapped_key - * combination is supported by a ksm. + * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode/data + * unit size combination is supported + * by a ksm. * @ksm: The keyslot manager to check * @crypto_mode: The crypto mode to check for. * @data_unit_size: The data_unit_size for the mode. - * @is_hw_wrapped_key: Whether a hardware-wrapped key will be used. * * Calls and returns the result of the crypto_mode_supported function specified * by the ksm. * * Context: Process context. - * Return: Whether or not this ksm supports the specified crypto settings. + * Return: Whether or not this ksm supports the specified crypto_mode/ + * data_unit_size combo. */ bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, - bool is_hw_wrapped_key) + unsigned int data_unit_size) { if (!ksm) return false; @@ -412,13 +406,6 @@ bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, return false; if (WARN_ON(!is_power_of_2(data_unit_size))) return false; - if (is_hw_wrapped_key) { - if (!(ksm->features & BLK_CRYPTO_FEATURE_WRAPPED_KEYS)) - return false; - } else { - if (!(ksm->features & BLK_CRYPTO_FEATURE_STANDARD_KEYS)) - return false; - } return ksm->crypto_mode_supported[crypto_mode] & data_unit_size; } @@ -534,7 +521,6 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); * keyslot_manager_create_passthrough() - Create a passthrough keyslot manager * @dev: Device for runtime power management (NULL if none) * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops - * @features: Bitmask of BLK_CRYPTO_FEATURE_* flags * @crypto_mode_supported: Bitmasks for supported encryption modes * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. * @@ -552,7 +538,6 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); struct keyslot_manager *keyslot_manager_create_passthrough( struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, - unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) { @@ -563,7 +548,6 @@ struct keyslot_manager *keyslot_manager_create_passthrough( return NULL; ksm->ksm_ll_ops = *ksm_ll_ops; - ksm->features = features; memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; @@ -592,13 +576,11 @@ void keyslot_manager_intersect_modes(struct keyslot_manager *parent, if (child) { unsigned int i; - parent->features &= child->features; for (i = 0; i < ARRAY_SIZE(child->crypto_mode_supported); i++) { parent->crypto_mode_supported[i] &= child->crypto_mode_supported[i]; } } else { - parent->features = 0; memset(parent->crypto_mode_supported, 0, sizeof(parent->crypto_mode_supported)); } diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c index 3d0bd0645f7a..43a30c076aa6 100644 --- a/drivers/md/dm-default-key.c +++ b/drivers/md/dm-default-key.c @@ -9,7 +9,7 @@ #define DM_MSG_PREFIX "default-key" -#define DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE 128 +#define DM_DEFAULT_KEY_MAX_KEY_SIZE 64 #define SECTOR_SIZE (1 << SECTOR_SHIFT) @@ -49,7 +49,6 @@ struct default_key_c { unsigned int sector_size; unsigned int sector_bits; struct blk_crypto_key key; - bool is_hw_wrapped; }; static const struct dm_default_key_cipher * @@ -85,7 +84,7 @@ static int default_key_ctr_optional(struct dm_target *ti, struct default_key_c *dkc = ti->private; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 4, "Invalid number of feature args"}, + {0, 3, "Invalid number of feature args"}, }; unsigned int opt_params; const char *opt_string; @@ -118,8 +117,6 @@ static int default_key_ctr_optional(struct dm_target *ti, } } else if (!strcmp(opt_string, "iv_large_sectors")) { iv_large_sectors = true; - } else if (!strcmp(opt_string, "wrappedkey_v0")) { - dkc->is_hw_wrapped = true; } else { ti->error = "Invalid feature arguments"; return -EINVAL; @@ -147,8 +144,7 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct default_key_c *dkc; const struct dm_default_key_cipher *cipher; - u8 raw_key[DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE]; - unsigned int raw_key_size; + u8 raw_key[DM_DEFAULT_KEY_MAX_KEY_SIZE]; unsigned long long tmpll; char dummy; int err; @@ -180,15 +176,12 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* */ - raw_key_size = strlen(argv[1]); - if (raw_key_size > 2 * DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE || - raw_key_size % 2) { - ti->error = "Invalid keysize"; + if (strlen(argv[1]) != 2 * cipher->key_size) { + ti->error = "Incorrect key size for cipher"; err = -EINVAL; goto bad; } - raw_key_size /= 2; - if (hex2bin(raw_key, argv[1], raw_key_size) != 0) { + if (hex2bin(raw_key, argv[1], cipher->key_size) != 0) { ti->error = "Malformed key string"; err = -EINVAL; goto bad; @@ -233,15 +226,13 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, - dkc->is_hw_wrapped, cipher->mode_num, - dkc->sector_size); + cipher->mode_num, dkc->sector_size); if (err) { ti->error = "Error initializing blk-crypto key"; goto bad; } err = blk_crypto_start_using_mode(cipher->mode_num, dkc->sector_size, - dkc->is_hw_wrapped, dkc->dev->bdev->bd_queue); if (err) { ti->error = "Error starting to use blk-crypto"; @@ -328,8 +319,6 @@ static void default_key_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; if (dkc->sector_size != SECTOR_SIZE) num_feature_args += 2; - if (dkc->is_hw_wrapped) - num_feature_args += 1; if (num_feature_args != 0) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) @@ -338,8 +327,6 @@ static void default_key_status(struct dm_target *ti, status_type_t type, DMEMIT(" sector_size:%u", dkc->sector_size); DMEMIT(" iv_large_sectors"); } - if (dkc->is_hw_wrapped) - DMEMIT(" wrappedkey_v0"); } break; } @@ -385,7 +372,7 @@ static void default_key_io_hints(struct dm_target *ti, static struct target_type default_key_target = { .name = "default-key", - .version = {2, 1, 0}, + .version = {2, 0, 0}, .module = THIS_MODULE, .ctr = default_key_ctr, .dtr = default_key_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e4f0b67cd015..0271ca072453 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2088,98 +2088,22 @@ static int dm_keyslot_evict(struct keyslot_manager *ksm, return args.err; } -struct dm_derive_raw_secret_args { - const u8 *wrapped_key; - unsigned int wrapped_key_size; - u8 *secret; - unsigned int secret_size; - int err; -}; - -static int dm_derive_raw_secret_callback(struct dm_target *ti, - struct dm_dev *dev, sector_t start, - sector_t len, void *data) -{ - struct dm_derive_raw_secret_args *args = data; - struct request_queue *q = dev->bdev->bd_queue; - - if (!args->err) - return 0; - - if (!q->ksm) { - args->err = -EOPNOTSUPP; - return 0; - } - - args->err = keyslot_manager_derive_raw_secret(q->ksm, args->wrapped_key, - args->wrapped_key_size, - args->secret, - args->secret_size); - /* Try another device in case this fails. */ - return 0; -} - -/* - * Retrieve the raw_secret from the underlying device. Given that - * only only one raw_secret can exist for a particular wrappedkey, - * retrieve it only from the first device that supports derive_raw_secret() - */ -static int dm_derive_raw_secret(struct keyslot_manager *ksm, - const u8 *wrapped_key, - unsigned int wrapped_key_size, - u8 *secret, unsigned int secret_size) -{ - struct mapped_device *md = keyslot_manager_private(ksm); - struct dm_derive_raw_secret_args args = { - .wrapped_key = wrapped_key, - .wrapped_key_size = wrapped_key_size, - .secret = secret, - .secret_size = secret_size, - .err = -EOPNOTSUPP, - }; - struct dm_table *t; - int srcu_idx; - int i; - struct dm_target *ti; - - t = dm_get_live_table(md, &srcu_idx); - if (!t) - return -EOPNOTSUPP; - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); - if (!ti->type->iterate_devices) - continue; - ti->type->iterate_devices(ti, dm_derive_raw_secret_callback, - &args); - if (!args.err) - break; - } - dm_put_live_table(md, srcu_idx); - return args.err; -} - static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { .keyslot_evict = dm_keyslot_evict, - .derive_raw_secret = dm_derive_raw_secret, }; static int dm_init_inline_encryption(struct mapped_device *md) { - unsigned int features; unsigned int mode_masks[BLK_ENCRYPTION_MODE_MAX]; /* - * Initially declare support for all crypto settings. Anything - * unsupported by a child device will be removed later when calculating - * the device restrictions. + * Start out with all crypto mode support bits set. Any unsupported + * bits will be cleared later when calculating the device restrictions. */ - features = BLK_CRYPTO_FEATURE_STANDARD_KEYS | - BLK_CRYPTO_FEATURE_WRAPPED_KEYS; memset(mode_masks, 0xFF, sizeof(mode_masks)); md->queue->ksm = keyslot_manager_create_passthrough(NULL, &dm_ksm_ll_ops, - features, mode_masks, md); if (!md->queue->ksm) return -ENOMEM; diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index d62ab7a9faff..e3de448c9bbe 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -336,9 +336,7 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, ufshcd_clear_all_keyslots(hba); hba->ksm = keyslot_manager_create(hba->dev, ufshcd_num_keyslots(hba), - ksm_ops, - BLK_CRYPTO_FEATURE_STANDARD_KEYS, - crypto_modes_supported, hba); + ksm_ops, crypto_modes_supported, hba); if (!hba->ksm) { err = -ENOMEM; @@ -458,14 +456,6 @@ int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, return ufshcd_prepare_lrbp_crypto_spec(hba, cmd, lrbp); } -int ufshcd_map_sg_crypto(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) -{ - if (hba->crypto_vops && hba->crypto_vops->map_sg_crypto) - return hba->crypto_vops->map_sg_crypto(hba, lrbp); - - return 0; -} - int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp) diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h index f223a06fbf93..95f37c9f7672 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.h +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -80,8 +80,6 @@ int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); -int ufshcd_map_sg_crypto(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); - int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); @@ -135,12 +133,6 @@ static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, return 0; } -static inline int ufshcd_map_sg_crypto(struct ufs_hba *hba, - struct ufshcd_lrb *lrbp) -{ - return 0; -} - static inline bool ufshcd_lrbp_crypto_enabled(struct ufshcd_lrb *lrbp) { return false; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b04967823764..64050a44ae9b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2023,7 +2023,7 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) lrbp->utr_descriptor_ptr->prd_table_length = 0; } - return ufshcd_map_sg_crypto(hba, lrbp); + return 0; } /** diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 52b5a070afa8..73124e85dada 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -354,7 +354,6 @@ struct ufs_hba_crypto_variant_ops { int (*prepare_lrbp_crypto)(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); - int (*map_sg_crypto)(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); int (*complete_lrbp_crypto)(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 97c0a113f4cc..0701bb90f99c 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,8 +1,13 @@ config FS_ENCRYPTION bool "FS Encryption (Per-file encryption)" select CRYPTO - select CRYPTO_HASH - select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_XTS + select CRYPTO_CTS + select CRYPTO_SHA512 + select CRYPTO_HMAC select KEYS help Enable encryption of files and directories. This @@ -11,19 +16,6 @@ config FS_ENCRYPTION decrypted pages in the page cache. Currently Ext4, F2FS and UBIFS make use of this feature. -# Filesystems supporting encryption must select this if FS_ENCRYPTION. This -# allows the algorithms to be built as modules when all the filesystems are. -config FS_ENCRYPTION_ALGS - tristate - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_CTS - select CRYPTO_ECB - select CRYPTO_HMAC - select CRYPTO_SHA256 - select CRYPTO_SHA512 - select CRYPTO_XTS - config FS_ENCRYPTION_INLINE_CRYPT bool "Enable fscrypt to use inline crypto" depends on FS_ENCRYPTION && BLK_INLINE_ENCRYPTION diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 0242088d213f..f62375d39b0f 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -41,154 +41,63 @@ void fscrypt_decrypt_bio(struct bio *bio) } EXPORT_SYMBOL(fscrypt_decrypt_bio); -static int fscrypt_zeroout_range_inlinecrypt(const struct inode *inode, - pgoff_t lblk, - sector_t pblk, unsigned int len) -{ - const unsigned int blockbits = inode->i_blkbits; - const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; - const unsigned int blocks_per_page = 1 << blocks_per_page_bits; - unsigned int i; - struct bio *bio; - int ret, err; - - /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ - bio = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); - - do { - bio_set_dev(bio, inode->i_sb->s_bdev); - bio->bi_iter.bi_sector = pblk << (blockbits - 9); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS); - - i = 0; - do { - unsigned int blocks_this_page = - min(len, blocks_per_page); - unsigned int bytes_this_page = - blocks_this_page << blockbits; - - ret = bio_add_page(bio, ZERO_PAGE(0), - bytes_this_page, 0); - if (WARN_ON(ret != bytes_this_page)) { - err = -EIO; - goto out; - } - lblk += blocks_this_page; - pblk += blocks_this_page; - len -= blocks_this_page; - } while (++i != BIO_MAX_PAGES && len != 0); - - err = submit_bio_wait(bio); - if (err) - goto out; - bio_reset(bio); - } while (len != 0); - err = 0; -out: - bio_put(bio); - return err; -} - -/** - * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file - * @inode: the file's inode - * @lblk: the first file logical block to zero out - * @pblk: the first filesystem physical block to zero out - * @len: number of blocks to zero out - * - * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write - * ciphertext blocks which decrypt to the all-zeroes block. The blocks must be - * both logically and physically contiguous. It's also assumed that the - * filesystem only uses a single block device, ->s_bdev. - * - * Note that since each block uses a different IV, this involves writing a - * different ciphertext to each block; we can't simply reuse the same one. - * - * Return: 0 on success; -errno on failure. - */ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) + sector_t pblk, unsigned int len) { const unsigned int blockbits = inode->i_blkbits; const unsigned int blocksize = 1 << blockbits; - const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; - const unsigned int blocks_per_page = 1 << blocks_per_page_bits; - struct page *pages[16]; /* write up to 16 pages at a time */ - unsigned int nr_pages; - unsigned int i; - unsigned int offset; + const bool inlinecrypt = fscrypt_inode_uses_inline_crypto(inode); + struct page *ciphertext_page; struct bio *bio; - int ret, err; - - if (len == 0) - return 0; - - if (fscrypt_inode_uses_inline_crypto(inode)) - return fscrypt_zeroout_range_inlinecrypt(inode, lblk, pblk, - len); - - BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES); - nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), - (len + blocks_per_page - 1) >> blocks_per_page_bits); - - /* - * We need at least one page for ciphertext. Allocate the first one - * from a mempool, with __GFP_DIRECT_RECLAIM set so that it can't fail. - * - * Any additional page allocations are allowed to fail, as they only - * help performance, and waiting on the mempool for them could deadlock. - */ - for (i = 0; i < nr_pages; i++) { - pages[i] = fscrypt_alloc_bounce_page(i == 0 ? GFP_NOFS : - GFP_NOWAIT | __GFP_NOWARN); - if (!pages[i]) - break; + int ret, err = 0; + + if (inlinecrypt) { + ciphertext_page = ZERO_PAGE(0); + } else { + ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); + if (!ciphertext_page) + return -ENOMEM; } - nr_pages = i; - if (WARN_ON(nr_pages <= 0)) - return -EINVAL; - /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ - bio = bio_alloc(GFP_NOFS, nr_pages); + while (len--) { + if (!inlinecrypt) { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), ciphertext_page, + blocksize, 0, GFP_NOFS); + if (err) + goto errout; + } + + bio = bio_alloc(GFP_NOWAIT, 1); + if (!bio) { + err = -ENOMEM; + goto errout; + } + fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOIO); - do { bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - i = 0; - offset = 0; - do { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), pages[i], - blocksize, offset, GFP_NOFS); - if (err) - goto out; - lblk++; - pblk++; - len--; - offset += blocksize; - if (offset == PAGE_SIZE || len == 0) { - ret = bio_add_page(bio, pages[i++], offset, 0); - if (WARN_ON(ret != offset)) { - err = -EIO; - goto out; - } - offset = 0; - } - } while (i != nr_pages && len != 0); - + ret = bio_add_page(bio, ciphertext_page, blocksize, 0); + if (WARN_ON(ret != blocksize)) { + /* should never happen! */ + bio_put(bio); + err = -EIO; + goto errout; + } err = submit_bio_wait(bio); + if (err == 0 && bio->bi_status) + err = -EIO; + bio_put(bio); if (err) - goto out; - bio_reset(bio); - } while (len != 0); + goto errout; + lblk++; + pblk++; + } err = 0; -out: - bio_put(bio); - for (i = 0; i < nr_pages; i++) - fscrypt_free_bounce_page(pages[i]); +errout: + if (!inlinecrypt) + fscrypt_free_bounce_page(ciphertext_page); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index ed6ea28dbdad..41b4fe15b4b6 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include "fscrypt_private.h" @@ -137,7 +139,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * multiple of the filesystem's block size. * @offs: Byte offset within @page of the first block to encrypt. Must be * a multiple of the filesystem's block size. - * @gfp_flags: Memory allocation flags. See details below. + * @gfp_flags: Memory allocation flags * * A new bounce page is allocated, and the specified block(s) are encrypted into * it. In the bounce page, the ciphertext block(s) will be located at the same @@ -147,11 +149,6 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * * This is for use by the filesystem's ->writepages() method. * - * The bounce page allocation is mempool-backed, so it will always succeed when - * @gfp_flags includes __GFP_DIRECT_RECLAIM, e.g. when it's GFP_NOFS. However, - * only the first page of each bio can be allocated this way. To prevent - * deadlocks, for any additional pages a mask like GFP_NOWAIT must be used. - * * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, @@ -288,6 +285,54 @@ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); +/* + * Validate dentries in encrypted directories to make sure we aren't potentially + * caching stale dentries after a key has been added. + */ +static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct dentry *dir; + int err; + int valid; + + /* + * Plaintext names are always valid, since fscrypt doesn't support + * reverting to ciphertext names without evicting the directory's inode + * -- which implies eviction of the dentries in the directory. + */ + if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) + return 1; + + /* + * Ciphertext name; valid if the directory's key is still unavailable. + * + * Although fscrypt forbids rename() on ciphertext names, we still must + * use dget_parent() here rather than use ->d_parent directly. That's + * because a corrupted fs image may contain directory hard links, which + * the VFS handles by moving the directory's dentry tree in the dcache + * each time ->lookup() finds the directory and it already has a dentry + * elsewhere. Thus ->d_parent can be changing, and we must safely grab + * a reference to some ->d_parent to prevent it from being freed. + */ + + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dget_parent(dentry); + err = fscrypt_get_encryption_info(d_inode(dir)); + valid = !fscrypt_has_encryption_key(d_inode(dir)); + dput(dir); + + if (err < 0) + return err; + + return valid; +} + +const struct dentry_operations fscrypt_d_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; + /** * fscrypt_initialize() - allocate major buffers for fs encryption. * @cop_flags: fscrypt operations flags diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 63bfe5e8accd..3aafddaab703 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -11,88 +11,10 @@ * This has not yet undergone a rigorous security audit. */ -#include #include -#include -#include #include #include "fscrypt_private.h" -/** - * struct fscrypt_nokey_name - identifier for directory entry when key is absent - * - * When userspace lists an encrypted directory without access to the key, the - * filesystem must present a unique "no-key name" for each filename that allows - * it to find the directory entry again if requested. Naively, that would just - * mean using the ciphertext filenames. However, since the ciphertext filenames - * can contain illegal characters ('\0' and '/'), they must be encoded in some - * way. We use base64. But that can cause names to exceed NAME_MAX (255 - * bytes), so we also need to use a strong hash to abbreviate long names. - * - * The filesystem may also need another kind of hash, the "dirhash", to quickly - * find the directory entry. Since filesystems normally compute the dirhash - * over the on-disk filename (i.e. the ciphertext), it's not computable from - * no-key names that abbreviate the ciphertext using the strong hash to fit in - * NAME_MAX. It's also not computable if it's a keyed hash taken over the - * plaintext (but it may still be available in the on-disk directory entry); - * casefolded directories use this type of dirhash. At least in these cases, - * each no-key name must include the name's dirhash too. - * - * To meet all these requirements, we base64-encode the following - * variable-length structure. It contains the dirhash, or 0's if the filesystem - * didn't provide one; up to 149 bytes of the ciphertext name; and for - * ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes. - * - * This ensures that each no-key name contains everything needed to find the - * directory entry again, contains only legal characters, doesn't exceed - * NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only - * take the performance hit of SHA-256 on very long filenames (which are rare). - */ -struct fscrypt_nokey_name { - u32 dirhash[2]; - u8 bytes[149]; - u8 sha256[SHA256_DIGEST_SIZE]; -}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */ - -/* - * Decoded size of max-size nokey name, i.e. a name that was abbreviated using - * the strong hash and thus includes the 'sha256' field. This isn't simply - * sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included. - */ -#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256) - -static struct crypto_shash *sha256_hash_tfm; - -static int fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result) -{ - struct crypto_shash *tfm = READ_ONCE(sha256_hash_tfm); - - if (unlikely(!tfm)) { - struct crypto_shash *prev_tfm; - - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - fscrypt_err(NULL, - "Error allocating SHA-256 transform: %ld", - PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - prev_tfm = cmpxchg(&sha256_hash_tfm, NULL, tfm); - if (prev_tfm) { - crypto_free_shash(tfm); - tfm = prev_tfm; - } - } - { - SHASH_DESC_ON_STACK(desc, tfm); - - desc->tfm = tfm; - desc->flags = 0; - - return crypto_shash_digest(desc, data, data_len, result); - } -} - static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') @@ -105,19 +27,19 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) } /** - * fscrypt_fname_encrypt() - encrypt a filename + * fname_encrypt() - encrypt a filename * * The output buffer must be at least as large as the input buffer. * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen) +int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - const struct fscrypt_info *ci = inode->i_crypt_info; + struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_key.tfm; union fscrypt_iv iv; struct scatterlist sg; @@ -163,14 +85,14 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, * * Return: 0 on success, -errno on failure */ -static int fname_decrypt(const struct inode *inode, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +static int fname_decrypt(struct inode *inode, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - const struct fscrypt_info *ci = inode->i_crypt_info; + struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_key.tfm; union fscrypt_iv iv; int res; @@ -284,7 +206,9 @@ int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len, struct fscrypt_str *crypto_str) { - const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX); + const u32 max_encoded_len = + max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), + 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); u32 max_presented_len; max_presented_len = max(max_encoded_len, max_encrypted_len); @@ -317,21 +241,19 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * The caller must have allocated sufficient memory for the @oname string. * - * If the key is available, we'll decrypt the disk name. Otherwise, we'll - * encode it for presentation in fscrypt_nokey_name format. - * See struct fscrypt_nokey_name for details. + * If the key is available, we'll decrypt the disk name; otherwise, we'll encode + * it for presentation. Short names are directly base64-encoded, while long + * names are encoded in fscrypt_digested_name format. * * Return: 0 on success, -errno on failure */ -int fscrypt_fname_disk_to_usr(const struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +int fscrypt_fname_disk_to_usr(struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); - struct fscrypt_nokey_name nokey_name; - u32 size; /* size of the unencoded no-key name */ - int err; + struct fscrypt_digested_name digested_name; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; @@ -346,37 +268,24 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, if (fscrypt_has_encryption_key(inode)) return fname_decrypt(inode, iname, oname); - /* - * Sanity check that struct fscrypt_nokey_name doesn't have padding - * between fields and that its encoded size never exceeds NAME_MAX. - */ - BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, dirhash) != - offsetof(struct fscrypt_nokey_name, bytes)); - BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) != - offsetof(struct fscrypt_nokey_name, sha256)); - BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX); - - if (hash) { - nokey_name.dirhash[0] = hash; - nokey_name.dirhash[1] = minor_hash; - } else { - nokey_name.dirhash[0] = 0; - nokey_name.dirhash[1] = 0; + if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { + oname->len = base64_encode(iname->name, iname->len, + oname->name); + return 0; } - if (iname->len <= sizeof(nokey_name.bytes)) { - memcpy(nokey_name.bytes, iname->name, iname->len); - size = offsetof(struct fscrypt_nokey_name, bytes[iname->len]); + if (hash) { + digested_name.hash = hash; + digested_name.minor_hash = minor_hash; } else { - memcpy(nokey_name.bytes, iname->name, sizeof(nokey_name.bytes)); - /* Compute strong hash of remaining part of name. */ - err = fscrypt_do_sha256(&iname->name[sizeof(nokey_name.bytes)], - iname->len - sizeof(nokey_name.bytes), - nokey_name.sha256); - if (err) - return err; - size = FSCRYPT_NOKEY_NAME_MAX; + digested_name.hash = 0; + digested_name.minor_hash = 0; } - oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name); + memcpy(digested_name.digest, + FSCRYPT_FNAME_DIGEST(iname->name, iname->len), + FSCRYPT_FNAME_DIGEST_SIZE); + oname->name[0] = '_'; + oname->len = 1 + base64_encode((const u8 *)&digested_name, + sizeof(digested_name), oname->name + 1); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -397,7 +306,8 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); * get the disk_name. * * Else, for keyless @lookup operations, @iname is the presented ciphertext, so - * we decode it to get the fscrypt_nokey_name. Non-@lookup operations will be + * we decode it to get either the ciphertext disk_name (for short names) or the + * fscrypt_digested_name (for long names). Non-@lookup operations will be * impossible in this case, so we fail them with ENOKEY. * * If successful, fscrypt_free_filename() must be called later to clean up. @@ -407,8 +317,8 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - struct fscrypt_nokey_name *nokey_name; int ret; + int digested; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; @@ -432,8 +342,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (!fname->crypto_buf.name) return -ENOMEM; - ret = fscrypt_fname_encrypt(dir, iname, fname->crypto_buf.name, - fname->crypto_buf.len); + ret = fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; @@ -448,31 +358,40 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, * We don't have the key and we are doing a lookup; decode the * user-supplied name */ + if (iname->name[0] == '_') { + if (iname->len != + 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))) + return -ENOENT; + digested = 1; + } else { + if (iname->len > + BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)) + return -ENOENT; + digested = 0; + } - if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX)) - return -ENOENT; - - fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL); + fname->crypto_buf.name = + kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE, + sizeof(struct fscrypt_digested_name)), + GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name); - if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || - (ret > offsetof(struct fscrypt_nokey_name, sha256) && - ret != FSCRYPT_NOKEY_NAME_MAX)) { + ret = base64_decode(iname->name + digested, iname->len - digested, + fname->crypto_buf.name); + if (ret < 0) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; - - nokey_name = (void *)fname->crypto_buf.name; - fname->hash = nokey_name->dirhash[0]; - fname->minor_hash = nokey_name->dirhash[1]; - if (ret != FSCRYPT_NOKEY_NAME_MAX) { - /* The full ciphertext filename is available. */ - fname->disk_name.name = nokey_name->bytes; - fname->disk_name.len = - ret - offsetof(struct fscrypt_nokey_name, bytes); + if (digested) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + fname->hash = n->hash; + fname->minor_hash = n->minor_hash; + } else { + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; } return 0; @@ -481,106 +400,3 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and the name we're - * looking for is very long, then we won't have the full disk_name and instead - * we'll need to match against a fscrypt_nokey_name that includes a strong hash. - * - * Return: %true if the name matches, otherwise %false. - */ -bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - const struct fscrypt_nokey_name *nokey_name = - (const void *)fname->crypto_buf.name; - u8 sha256[SHA256_DIGEST_SIZE]; - - if (likely(fname->disk_name.name)) { - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, de_name_len); - } - if (de_name_len <= sizeof(nokey_name->bytes)) - return false; - if (memcmp(de_name, nokey_name->bytes, sizeof(nokey_name->bytes))) - return false; - if (fscrypt_do_sha256(&de_name[sizeof(nokey_name->bytes)], - de_name_len - sizeof(nokey_name->bytes), sha256)) - return false; - return !memcmp(sha256, nokey_name->sha256, sizeof(sha256)); -} -EXPORT_SYMBOL_GPL(fscrypt_match_name); - -/** - * fscrypt_fname_siphash() - calculate the SipHash of a filename - * @dir: the parent directory - * @name: the filename to calculate the SipHash of - * - * Given a plaintext filename @name and a directory @dir which uses SipHash as - * its dirhash method and has had its fscrypt key set up, this function - * calculates the SipHash of that name using the directory's secret dirhash key. - * - * Return: the SipHash of @name using the hash key of @dir - */ -u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) -{ - const struct fscrypt_info *ci = dir->i_crypt_info; - - WARN_ON(!ci->ci_dirhash_key_initialized); - - return siphash(name->name, name->len, &ci->ci_dirhash_key); -} -EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); - -/* - * Validate dentries in encrypted directories to make sure we aren't potentially - * caching stale dentries after a key has been added. - */ -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct dentry *dir; - int err; - int valid; - - /* - * Plaintext names are always valid, since fscrypt doesn't support - * reverting to ciphertext names without evicting the directory's inode - * -- which implies eviction of the dentries in the directory. - */ - if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) - return 1; - - /* - * Ciphertext name; valid if the directory's key is still unavailable. - * - * Although fscrypt forbids rename() on ciphertext names, we still must - * use dget_parent() here rather than use ->d_parent directly. That's - * because a corrupted fs image may contain directory hard links, which - * the VFS handles by moving the directory's dentry tree in the dcache - * each time ->lookup() finds the directory and it already has a dentry - * elsewhere. Thus ->d_parent can be changing, and we must safely grab - * a reference to some ->d_parent to prevent it from being freed. - */ - - if (flags & LOOKUP_RCU) - return -ECHILD; - - dir = dget_parent(dentry); - err = fscrypt_get_encryption_info(d_inode(dir)); - valid = !fscrypt_has_encryption_key(d_inode(dir)); - dput(dir); - - if (err < 0) - return err; - - return valid; -} -EXPORT_SYMBOL(fscrypt_d_revalidate); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 0753681cb156..5d7b20fead86 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -12,7 +12,6 @@ #define _FSCRYPT_PRIVATE_H #include -#include #include #include @@ -78,26 +77,6 @@ static inline int fscrypt_context_size(const union fscrypt_context *ctx) return 0; } -/* Check whether an fscrypt_context has a recognized version number and size */ -static inline bool fscrypt_context_is_valid(const union fscrypt_context *ctx, - int ctx_size) -{ - return ctx_size >= 1 && ctx_size == fscrypt_context_size(ctx); -} - -/* Retrieve the context's nonce, assuming the context was already validated */ -static inline const u8 *fscrypt_context_nonce(const union fscrypt_context *ctx) -{ - switch (ctx->version) { - case FSCRYPT_CONTEXT_V1: - return ctx->v1.nonce; - case FSCRYPT_CONTEXT_V2: - return ctx->v2.nonce; - } - WARN_ON(1); - return NULL; -} - #undef fscrypt_policy union fscrypt_policy { u8 version; @@ -159,6 +138,12 @@ fscrypt_policy_flags(const union fscrypt_policy *policy) BUG(); } +static inline bool +fscrypt_is_direct_key_policy(const union fscrypt_policy *policy) +{ + return fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAG_DIRECT_KEY; +} + /** * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. @@ -233,14 +218,6 @@ struct fscrypt_info { */ struct fscrypt_direct_key *ci_direct_key; - /* - * This inode's hash key for filenames. This is a 128-bit SipHash-2-4 - * key. This is only set for directories that use a keyed dirhash over - * the plaintext filenames -- currently just casefolded directories. - */ - siphash_key_t ci_dirhash_key; - bool ci_dirhash_key_initialized; - /* The encryption policy used by this inode */ union fscrypt_policy ci_policy; @@ -253,6 +230,24 @@ typedef enum { FS_ENCRYPT, } fscrypt_direction_t; +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FSCRYPT_MODE_AES_128_CBC && + filenames_mode == FSCRYPT_MODE_AES_128_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_AES_256_XTS && + filenames_mode == FSCRYPT_MODE_AES_256_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_ADIANTUM && + filenames_mode == FSCRYPT_MODE_ADIANTUM) + return true; + + return false; +} + /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); @@ -262,6 +257,7 @@ extern int fscrypt_crypt_block(const struct inode *inode, unsigned int len, unsigned int offs, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); +extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); @@ -289,9 +285,8 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci); /* fname.c */ -extern int fscrypt_fname_encrypt(const struct inode *inode, - const struct qstr *iname, - u8 *out, unsigned int olen); +extern int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen); extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); @@ -313,12 +308,11 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, * output doesn't reveal another. */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 -#define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 +#define HKDF_CONTEXT_PER_FILE_KEY 2 #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 -#define HKDF_CONTEXT_DIRHASH_KEY 5 -extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, +extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen); @@ -326,8 +320,7 @@ extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT -extern int fscrypt_select_encryption_impl(struct fscrypt_info *ci, - bool is_hw_wrapped_key); +extern void fscrypt_select_encryption_impl(struct fscrypt_info *ci); static inline bool fscrypt_using_inline_encryption(const struct fscrypt_info *ci) @@ -339,7 +332,6 @@ extern int fscrypt_prepare_inline_crypt_key( struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, const struct fscrypt_info *ci); extern void fscrypt_destroy_inline_crypt_key( @@ -371,10 +363,8 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ -static inline int fscrypt_select_encryption_impl(struct fscrypt_info *ci, - bool is_hw_wrapped_key) +static inline void fscrypt_select_encryption_impl(struct fscrypt_info *ci) { - return 0; } static inline bool fscrypt_using_inline_encryption( @@ -386,7 +376,6 @@ static inline bool fscrypt_using_inline_encryption( static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, const struct fscrypt_info *ci) { WARN_ON(1); @@ -579,18 +568,20 @@ struct fscrypt_mode { extern struct fscrypt_mode fscrypt_modes[]; +static inline bool +fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) +{ + return mode->ivsize >= offsetofend(union fscrypt_iv, nonce); +} + extern int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, const struct fscrypt_info *ci); extern void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); -extern int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, - const u8 *raw_key); - -extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, - const struct fscrypt_master_key *mk); +extern int fscrypt_set_derived_key(struct fscrypt_info *ci, + const u8 *derived_key); /* keysetup_v1.c */ diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index fd7f67628561..2c026009c6e7 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -113,7 +113,7 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, * adds to its application-specific info strings to guarantee that it doesn't * accidentally repeat an info string when using HKDF for different purposes.) */ -int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, +int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen) { diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index a6396bf721ac..30b1ca661249 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -4,8 +4,6 @@ * Encryption hooks for higher-level filesystem operations. */ -#include - #include "fscrypt_private.h" /** @@ -117,53 +115,12 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); + d_set_d_op(dentry, &fscrypt_d_ops); } return err; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); -/** - * fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS - * @inode: the inode on which flags are being changed - * @oldflags: the old flags - * @flags: the new flags - * - * The caller should be holding i_rwsem for write. - * - * Return: 0 on success; -errno if the flags change isn't allowed or if - * another error occurs. - */ -int fscrypt_prepare_setflags(struct inode *inode, - unsigned int oldflags, unsigned int flags) -{ - struct fscrypt_info *ci; - struct fscrypt_master_key *mk; - int err; - - /* - * When the CASEFOLD flag is set on an encrypted directory, we must - * derive the secret key needed for the dirhash. This is only possible - * if the directory uses a v2 encryption policy. - */ - if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { - err = fscrypt_require_key(inode); - if (err) - return err; - ci = inode->i_crypt_info; - if (ci->ci_policy.version != FSCRYPT_POLICY_V2) - return -EINVAL; - mk = ci->ci_master_key->payload.data[0]; - down_read(&mk->mk_secret_sem); - if (is_master_key_secret_present(&mk->mk_secret)) - err = fscrypt_derive_dirhash_key(ci, mk); - else - err = -ENOKEY; - up_read(&mk->mk_secret_sem); - return err; - } - return 0; -} - int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) @@ -230,8 +187,7 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, ciphertext_len = disk_link->len - sizeof(*sd); sd->len = cpu_to_le16(ciphertext_len); - err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path, - ciphertext_len); + err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); if (err) goto err_free_sd; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index e1bbaeff1c43..92c471d3db73 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -26,94 +26,44 @@ struct fscrypt_blk_crypto_key { struct request_queue *devs[]; }; -static int fscrypt_get_num_devices(struct super_block *sb) -{ - if (sb->s_cop->get_num_devices) - return sb->s_cop->get_num_devices(sb); - return 1; -} - -static void fscrypt_get_devices(struct super_block *sb, int num_devs, - struct request_queue **devs) -{ - if (num_devs == 1) - devs[0] = bdev_get_queue(sb->s_bdev); - else - sb->s_cop->get_devices(sb, devs); -} - /* Enable inline encryption for this file if supported. */ -int fscrypt_select_encryption_impl(struct fscrypt_info *ci, - bool is_hw_wrapped_key) +void fscrypt_select_encryption_impl(struct fscrypt_info *ci) { const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; - enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; - struct request_queue **devs; - int num_devs; - int i; /* The file must need contents encryption, not filenames encryption */ if (!S_ISREG(inode->i_mode)) - return 0; + return; /* blk-crypto must implement the needed encryption algorithm */ - if (crypto_mode == BLK_ENCRYPTION_MODE_INVALID) - return 0; + if (ci->ci_mode->blk_crypto_mode == BLK_ENCRYPTION_MODE_INVALID) + return; /* The filesystem must be mounted with -o inlinecrypt */ if (!sb->s_cop->inline_crypt_enabled || !sb->s_cop->inline_crypt_enabled(sb)) - return 0; - - /* - * The needed encryption settings must be supported either by - * blk-crypto-fallback, or by hardware on all the filesystem's devices. - */ - - if (IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) && - !is_hw_wrapped_key) { - ci->ci_inlinecrypt = true; - return 0; - } - - num_devs = fscrypt_get_num_devices(sb); - devs = kmalloc_array(num_devs, sizeof(*devs), GFP_NOFS); - if (!devs) - return -ENOMEM; - - fscrypt_get_devices(sb, num_devs, devs); - - for (i = 0; i < num_devs; i++) { - if (!keyslot_manager_crypto_mode_supported(devs[i]->ksm, - crypto_mode, - sb->s_blocksize, - is_hw_wrapped_key)) - goto out_free_devs; - } + return; ci->ci_inlinecrypt = true; -out_free_devs: - kfree(devs); - return 0; } int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, const struct fscrypt_info *ci) { const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; - int num_devs; + int num_devs = 1; int queue_refs = 0; struct fscrypt_blk_crypto_key *blk_key; int err; int i; - num_devs = fscrypt_get_num_devices(sb); + if (sb->s_cop->get_num_devices) + num_devs = sb->s_cop->get_num_devices(sb); if (WARN_ON(num_devs < 1)) return -EINVAL; @@ -122,13 +72,16 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, return -ENOMEM; blk_key->num_devs = num_devs; - fscrypt_get_devices(sb, num_devs, blk_key->devs); + if (num_devs == 1) + blk_key->devs[0] = bdev_get_queue(sb->s_bdev); + else + sb->s_cop->get_devices(sb, blk_key->devs); BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE); err = blk_crypto_init_key(&blk_key->base, raw_key, raw_key_size, - is_hw_wrapped, crypto_mode, sb->s_blocksize); + crypto_mode, sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; @@ -150,7 +103,6 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, queue_refs++; err = blk_crypto_start_using_mode(crypto_mode, sb->s_blocksize, - is_hw_wrapped, blk_key->devs[i]); if (err) { fscrypt_err(inode, diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 0081fd48e96f..7facb5993b03 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -465,111 +465,6 @@ static int add_master_key(struct super_block *sb, return err; } -static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep) -{ - const struct fscrypt_provisioning_key_payload *payload = prep->data; - - BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE < FSCRYPT_MAX_KEY_SIZE); - - if (prep->datalen < sizeof(*payload) + FSCRYPT_MIN_KEY_SIZE || - prep->datalen > sizeof(*payload) + FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE) - return -EINVAL; - - if (payload->type != FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && - payload->type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) - return -EINVAL; - - if (payload->__reserved) - return -EINVAL; - - prep->payload.data[0] = kmemdup(payload, prep->datalen, GFP_KERNEL); - if (!prep->payload.data[0]) - return -ENOMEM; - - prep->quotalen = prep->datalen; - return 0; -} - -static void fscrypt_provisioning_key_free_preparse( - struct key_preparsed_payload *prep) -{ - kzfree(prep->payload.data[0]); -} - -static void fscrypt_provisioning_key_describe(const struct key *key, - struct seq_file *m) -{ - seq_puts(m, key->description); - if (key_is_positive(key)) { - const struct fscrypt_provisioning_key_payload *payload = - key->payload.data[0]; - - seq_printf(m, ": %u [%u]", key->datalen, payload->type); - } -} - -static void fscrypt_provisioning_key_destroy(struct key *key) -{ - kzfree(key->payload.data[0]); -} - -static struct key_type key_type_fscrypt_provisioning = { - .name = "fscrypt-provisioning", - .preparse = fscrypt_provisioning_key_preparse, - .free_preparse = fscrypt_provisioning_key_free_preparse, - .instantiate = generic_key_instantiate, - .describe = fscrypt_provisioning_key_describe, - .destroy = fscrypt_provisioning_key_destroy, -}; - -/* - * Retrieve the raw key from the Linux keyring key specified by 'key_id', and - * store it into 'secret'. - * - * The key must be of type "fscrypt-provisioning" and must have the field - * fscrypt_provisioning_key_payload::type set to 'type', indicating that it's - * only usable with fscrypt with the particular KDF version identified by - * 'type'. We don't use the "logon" key type because there's no way to - * completely restrict the use of such keys; they can be used by any kernel API - * that accepts "logon" keys and doesn't require a specific service prefix. - * - * The ability to specify the key via Linux keyring key is intended for cases - * where userspace needs to re-add keys after the filesystem is unmounted and - * re-mounted. Most users should just provide the raw key directly instead. - */ -static int get_keyring_key(u32 key_id, u32 type, - struct fscrypt_master_key_secret *secret) -{ - key_ref_t ref; - struct key *key; - const struct fscrypt_provisioning_key_payload *payload; - int err; - - ref = lookup_user_key(key_id, 0, KEY_NEED_SEARCH); - if (IS_ERR(ref)) - return PTR_ERR(ref); - key = key_ref_to_ptr(ref); - - if (key->type != &key_type_fscrypt_provisioning) - goto bad_key; - payload = key->payload.data[0]; - - /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */ - if (payload->type != type) - goto bad_key; - - secret->size = key->datalen - sizeof(*payload); - memcpy(secret->raw, payload->raw, secret->size); - err = 0; - goto out_put; - -bad_key: - err = -EKEYREJECTED; -out_put: - key_ref_put(ref); - return err; -} - /* Size of software "secret" derived from hardware-wrapped key */ #define RAW_SECRET_SIZE 32 @@ -617,28 +512,20 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; + BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE < + FSCRYPT_MAX_KEY_SIZE); + + if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || + arg.raw_size > + ((arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) ? + FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE : FSCRYPT_MAX_KEY_SIZE)) + return -EINVAL; + memset(&secret, 0, sizeof(secret)); - if (arg.key_id) { - if (arg.raw_size != 0) - return -EINVAL; - err = get_keyring_key(arg.key_id, arg.key_spec.type, &secret); - if (err) - goto out_wipe_secret; - err = -EINVAL; - if (!(arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) && - secret.size > FSCRYPT_MAX_KEY_SIZE) - goto out_wipe_secret; - } else { - if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || - arg.raw_size > - ((arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) ? - FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE : FSCRYPT_MAX_KEY_SIZE)) - return -EINVAL; - secret.size = arg.raw_size; - err = -EFAULT; - if (copy_from_user(secret.raw, uarg->raw, secret.size)) - goto out_wipe_secret; - } + secret.size = arg.raw_size; + err = -EFAULT; + if (copy_from_user(secret.raw, uarg->raw, secret.size)) + goto out_wipe_secret; switch (arg.key_spec.type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: @@ -809,6 +696,9 @@ static int check_for_busy_inodes(struct super_block *sb, struct list_head *pos; size_t busy_count = 0; unsigned long ino; + struct dentry *dentry; + char _path[256]; + char *path = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); @@ -827,14 +717,22 @@ static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; + dentry = d_find_alias(inode); } spin_unlock(&mk->mk_decrypted_inodes_lock); + if (dentry) { + path = dentry_path(dentry, _path, sizeof(_path)); + dput(dentry); + } + if (IS_ERR_OR_NULL(path)) + path = "(unknown)"; + fscrypt_warn(NULL, - "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", + "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, - ino); + ino, path); return -EBUSY; } @@ -1132,14 +1030,8 @@ int __init fscrypt_init_keyring(void) if (err) goto err_unregister_fscrypt; - err = register_key_type(&key_type_fscrypt_provisioning); - if (err) - goto err_unregister_fscrypt_user; - return 0; -err_unregister_fscrypt_user: - unregister_key_type(&key_type_fscrypt_user); err_unregister_fscrypt: unregister_key_type(&key_type_fscrypt); return err; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 6b98ca38fc44..f87daf215ac9 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -92,11 +92,8 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, * first time a mode is used. */ pr_info("fscrypt: %s using implementation \"%s\"\n", - mode->friendly_name, crypto_skcipher_driver_name(tfm)); - } - if (WARN_ON(crypto_skcipher_ivsize(tfm) != mode->ivsize)) { - err = -EINVAL; - goto err_free_tfm; + mode->friendly_name, + crypto_skcipher_alg(tfm)->base.cra_driver_name); } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); @@ -117,15 +114,15 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, const struct fscrypt_info *ci) + const struct fscrypt_info *ci) { struct crypto_skcipher *tfm; if (fscrypt_using_inline_encryption(ci)) return fscrypt_prepare_inline_crypt_key(prep_key, - raw_key, raw_key_size, is_hw_wrapped, ci); + raw_key, raw_key_size, ci); - if (WARN_ON(is_hw_wrapped || raw_key_size != ci->ci_mode->keysize)) + if (WARN_ON(raw_key_size != ci->ci_mode->keysize)) return -EINVAL; tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); @@ -146,18 +143,18 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) fscrypt_destroy_inline_crypt_key(prep_key); } -/* Given a per-file encryption key, set up the file's crypto transform object */ -int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) +/* Given the per-file key, set up the file's crypto transform object */ +int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) { ci->ci_owns_key = true; - return fscrypt_prepare_key(&ci->ci_key, raw_key, ci->ci_mode->keysize, - false /*is_hw_wrapped*/, ci); + return fscrypt_prepare_key(&ci->ci_key, derived_key, + ci->ci_mode->keysize, ci); } -static int setup_per_mode_enc_key(struct fscrypt_info *ci, - struct fscrypt_master_key *mk, - struct fscrypt_prepared_key *keys, - u8 hkdf_context, bool include_fs_uuid) +static int setup_per_mode_key(struct fscrypt_info *ci, + struct fscrypt_master_key *mk, + struct fscrypt_prepared_key *keys, + u8 hkdf_context, bool include_fs_uuid) { static DEFINE_MUTEX(mode_key_setup_mutex); const struct inode *inode = ci->ci_inode; @@ -202,7 +199,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, } } err = fscrypt_prepare_key(prep_key, mk->mk_secret.raw, - mk->mk_secret.size, true, ci); + mk->mk_secret.size, ci); if (err) goto out_unlock; } else { @@ -221,7 +218,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, if (err) goto out_unlock; err = fscrypt_prepare_key(prep_key, mode_key, mode->keysize, - false /*is_hw_wrapped*/, ci); + ci); memzero_explicit(mode_key, mode->keysize); if (err) goto out_unlock; @@ -234,24 +231,10 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, return err; } -int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, - const struct fscrypt_master_key *mk) -{ - int err; - - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_DIRHASH_KEY, - ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, - (u8 *)&ci->ci_dirhash_key, - sizeof(ci->ci_dirhash_key)); - if (err) - return err; - ci->ci_dirhash_key_initialized = true; - return 0; -} - static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, struct fscrypt_master_key *mk) { + u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; int err; if (mk->mk_secret.is_hw_wrapped && @@ -263,15 +246,21 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { /* - * DIRECT_KEY: instead of deriving per-file encryption keys, the - * per-file nonce will be included in all the IVs. But unlike - * v1 policies, for v2 policies in this case we don't encrypt - * with the master key directly but rather derive a per-mode - * encryption key. This ensures that the master key is - * consistently used only for HKDF, avoiding key reuse issues. + * DIRECT_KEY: instead of deriving per-file keys, the per-file + * nonce will be included in all the IVs. But unlike v1 + * policies, for v2 policies in this case we don't encrypt with + * the master key directly but rather derive a per-mode key. + * This ensures that the master key is consistently used only + * for HKDF, avoiding key reuse issues. */ - err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_keys, - HKDF_CONTEXT_DIRECT_KEY, false); + if (!fscrypt_mode_supports_direct_key(ci->ci_mode)) { + fscrypt_warn(ci->ci_inode, + "Direct key flag not allowed with %s", + ci->ci_mode->friendly_name); + return -EINVAL; + } + return setup_per_mode_key(ci, mk, mk->mk_direct_keys, + HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { /* @@ -280,34 +269,21 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ - err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_keys, - HKDF_CONTEXT_IV_INO_LBLK_64_KEY, - true); - } else { - u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; - - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_FILE_ENC_KEY, - ci->ci_nonce, - FS_KEY_DERIVATION_NONCE_SIZE, - derived_key, ci->ci_mode->keysize); - if (err) - return err; - - err = fscrypt_set_per_file_enc_key(ci, derived_key); - memzero_explicit(derived_key, ci->ci_mode->keysize); + return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_keys, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, + true); } + + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + HKDF_CONTEXT_PER_FILE_KEY, + ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, + derived_key, ci->ci_mode->keysize); if (err) return err; - /* Derive a secret dirhash key for directories that need it. */ - if (S_ISDIR(ci->ci_inode->i_mode) && IS_CASEFOLDED(ci->ci_inode)) { - err = fscrypt_derive_dirhash_key(ci, mk); - if (err) - return err; - } - - return 0; + err = fscrypt_set_derived_key(ci, derived_key); + memzero_explicit(derived_key, ci->ci_mode->keysize); + return err; } /* @@ -328,6 +304,8 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, struct fscrypt_key_specifier mk_spec; int err; + fscrypt_select_encryption_impl(ci); + switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR; @@ -352,10 +330,6 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, ci->ci_policy.version != FSCRYPT_POLICY_V1) return PTR_ERR(key); - err = fscrypt_select_encryption_impl(ci, false); - if (err) - return err; - /* * As a legacy fallback for v1 policies, search for the key in * the current task's subscribed keyrings too. Don't move this @@ -390,10 +364,6 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, goto out_release_key; } - err = fscrypt_select_encryption_impl(ci, mk->mk_secret.is_hw_wrapped); - if (err) - goto out_release_key; - switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw); @@ -500,8 +470,20 @@ int fscrypt_get_encryption_info(struct inode *inode) goto out; } - memcpy(crypt_info->ci_nonce, fscrypt_context_nonce(&ctx), - FS_KEY_DERIVATION_NONCE_SIZE); + switch (ctx.version) { + case FSCRYPT_CONTEXT_V1: + memcpy(crypt_info->ci_nonce, ctx.v1.nonce, + FS_KEY_DERIVATION_NONCE_SIZE); + break; + case FSCRYPT_CONTEXT_V2: + memcpy(crypt_info->ci_nonce, ctx.v2.nonce, + FS_KEY_DERIVATION_NONCE_SIZE); + break; + default: + WARN_ON(1); + res = -EINVAL; + goto out; + } if (!fscrypt_supported_policy(&crypt_info->ci_policy, inode)) { res = -EINVAL; @@ -601,15 +583,6 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; - /* - * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes - * protected by the key were cleaned by sync_filesystem(). But if - * userspace is still using the files, inodes can be dirtied between - * then and now. We mustn't lose any writes, so skip dirty inodes here. - */ - if (inode->i_state & I_DIRTY_ALL) - return 0; - /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 3f7bb48f7317..47591c54dc3d 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -9,7 +9,7 @@ * This file implements compatibility functions for the original encryption * policy version ("v1"), including: * - * - Deriving per-file encryption keys using the AES-128-ECB based KDF + * - Deriving per-file keys using the AES-128-ECB based KDF * (rather than the new method of using HKDF-SHA512) * * - Retrieving fscrypt master keys from process-subscribed keyrings @@ -234,7 +234,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci->ci_mode->keysize, - false /*is_hw_wrapped*/, ci); + ci); if (err) goto err_free_dk; memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor, @@ -252,8 +252,23 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) static int setup_v1_file_key_direct(struct fscrypt_info *ci, const u8 *raw_master_key) { + const struct fscrypt_mode *mode = ci->ci_mode; struct fscrypt_direct_key *dk; + if (!fscrypt_mode_supports_direct_key(mode)) { + fscrypt_warn(ci->ci_inode, + "Direct key mode not allowed with %s", + mode->friendly_name); + return -EINVAL; + } + + if (ci->ci_policy.v1.contents_encryption_mode != + ci->ci_policy.v1.filenames_encryption_mode) { + fscrypt_warn(ci->ci_inode, + "Direct key mode not allowed with different contents and filenames modes"); + return -EINVAL; + } + dk = fscrypt_get_direct_key(ci, raw_master_key); if (IS_ERR(dk)) return PTR_ERR(dk); @@ -282,7 +297,7 @@ static int setup_v1_file_key_derived(struct fscrypt_info *ci, if (err) goto out; - err = fscrypt_set_per_file_enc_key(ci, derived_key); + err = fscrypt_set_derived_key(ci, derived_key); out: kzfree(derived_key); return err; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 10ccf945020c..96f528071bed 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,43 +29,6 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } -static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) -{ - if (contents_mode == FSCRYPT_MODE_AES_256_XTS && - filenames_mode == FSCRYPT_MODE_AES_256_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_AES_128_CBC && - filenames_mode == FSCRYPT_MODE_AES_128_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_ADIANTUM && - filenames_mode == FSCRYPT_MODE_ADIANTUM) - return true; - - return false; -} - -static bool supported_direct_key_modes(const struct inode *inode, - u32 contents_mode, u32 filenames_mode) -{ - const struct fscrypt_mode *mode; - - if (contents_mode != filenames_mode) { - fscrypt_warn(inode, - "Direct key flag not allowed with different contents and filenames modes"); - return false; - } - mode = &fscrypt_modes[contents_mode]; - - if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { - fscrypt_warn(inode, "Direct key flag not allowed with %s", - mode->friendly_name); - return false; - } - return true; -} - static bool supported_iv_ino_lblk_64_policy( const struct fscrypt_policy_v2 *policy, const struct inode *inode) @@ -100,82 +63,13 @@ static bool supported_iv_ino_lblk_64_policy( return true; } -static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, - const struct inode *inode) -{ - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | - FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { - fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && - !supported_direct_key_modes(inode, policy->contents_encryption_mode, - policy->filenames_encryption_mode)) - return false; - - if (IS_CASEFOLDED(inode)) { - /* With v1, there's no way to derive dirhash keys. */ - fscrypt_warn(inode, - "v1 policies can't be used on casefolded directories"); - return false; - } - - return true; -} - -static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, - const struct inode *inode) -{ - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { - fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && - !supported_direct_key_modes(inode, policy->contents_encryption_mode, - policy->filenames_encryption_mode)) - return false; - - if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && - !supported_iv_ino_lblk_64_policy(policy, inode)) - return false; - - if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { - fscrypt_warn(inode, "Reserved bits set in encryption policy"); - return false; - } - - return true; -} - /** * fscrypt_supported_policy - check whether an encryption policy is supported * * Given an encryption policy, check whether all its encryption modes and other - * settings are supported by this kernel on the given inode. (But we don't - * currently don't check for crypto API support here, so attempting to use an - * algorithm not configured into the crypto API will still fail later.) + * settings are supported by this kernel. (But we don't currently don't check + * for crypto API support here, so attempting to use an algorithm not configured + * into the crypto API will still fail later.) * * Return: %true if supported, else %false */ @@ -183,10 +77,60 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode) { switch (policy_u->version) { - case FSCRYPT_POLICY_V1: - return fscrypt_supported_v1_policy(&policy_u->v1, inode); - case FSCRYPT_POLICY_V2: - return fscrypt_supported_v2_policy(&policy_u->v2, inode); + case FSCRYPT_POLICY_V1: { + const struct fscrypt_policy_v1 *policy = &policy_u->v1; + + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | + FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { + fscrypt_warn(inode, + "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + return true; + } + case FSCRYPT_POLICY_V2: { + const struct fscrypt_policy_v2 *policy = &policy_u->v2; + + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { + fscrypt_warn(inode, + "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && + !supported_iv_ino_lblk_64_policy(policy, inode)) + return false; + + if (memchr_inv(policy->__reserved, 0, + sizeof(policy->__reserved))) { + fscrypt_warn(inode, + "Reserved bits set in encryption policy"); + return false; + } + + return true; + } } return false; } @@ -258,7 +202,7 @@ int fscrypt_policy_from_context(union fscrypt_policy *policy_u, { memset(policy_u, 0, sizeof(*policy_u)); - if (!fscrypt_context_is_valid(ctx_u, ctx_size)) + if (ctx_size <= 0 || ctx_size != fscrypt_context_size(ctx_u)) return -EINVAL; switch (ctx_u->version) { @@ -481,25 +425,6 @@ int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *uarg) } EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_policy_ex); -/* FS_IOC_GET_ENCRYPTION_NONCE: retrieve file's encryption nonce for testing */ -int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) -{ - struct inode *inode = file_inode(filp); - union fscrypt_context ctx; - int ret; - - ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); - if (ret < 0) - return ret; - if (!fscrypt_context_is_valid(&ctx, ret)) - return -EINVAL; - if (copy_to_user(arg, fscrypt_context_nonce(&ctx), - FS_KEY_DERIVATION_NONCE_SIZE)) - return -EFAULT; - return 0; -} -EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_nonce); - /** * fscrypt_has_permitted_context() - is a file's encryption policy permitted * within its directory? diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 39cd2c054339..e2cfd33fd759 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,7 +37,6 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C - select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help This is the next generation of the ext3 filesystem. diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ee766e3bed8b..e4d13c6ac931 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -117,7 +117,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err) + if (err && err != -ENOKEY) return err; } @@ -664,3 +664,10 @@ const struct file_operations ext4_dir_operations = { .open = ext4_dir_open, .release = ext4_release_dir, }; + +#ifdef CONFIG_UNICODE +const struct dentry_operations ext4_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; +#endif diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3e5ca2107998..e7c7a6737a46 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1100,11 +1100,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EOPNOTSUPP; return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); - case FS_IOC_GET_ENCRYPTION_NONCE: - if (!ext4_has_feature_encrypt(sb)) - return -EOPNOTSUPP; - return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); - case EXT4_IOC_FSGETXATTR: { struct fsxattr fa; @@ -1248,7 +1243,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: - case FS_IOC_GET_ENCRYPTION_NONCE: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b134add5a5ad..0e6a7cb9e9cf 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1608,7 +1608,6 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); - generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) return NULL; if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 782d4a26aace..34fbd95d9b3d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4490,6 +4490,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } +#ifdef CONFIG_UNICODE + if (sb->s_encoding) + sb->s_d_op = &ext4_dentry_ops; +#endif + sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 1940a6574b66..675af7cd29d3 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -5,7 +5,6 @@ config F2FS_FS select CRYPTO select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION - select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7fd0b08d7518..0898fff69259 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -108,52 +108,34 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, * Test whether a case-insensitive directory entry matches the filename * being searched for. * - * Only called for encrypted names if the key is available. - * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ -static int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - u8 *de_name, size_t de_name_len, bool quick) +int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, + const struct qstr *entry, bool quick) { const struct super_block *sb = parent->i_sb; const struct unicode_map *um = sb->s_encoding; - struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); - struct qstr entry = QSTR_INIT(de_name, de_name_len); int ret; - if (IS_ENCRYPTED(parent)) { - const struct fscrypt_str encrypted_name = - FSTR_INIT(de_name, de_name_len); - - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); - if (!decrypted_name.name) - return -ENOMEM; - ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, - &decrypted_name); - if (ret < 0) - goto out; - entry.name = decrypted_name.name; - entry.len = decrypted_name.len; - } - if (quick) - ret = utf8_strncasecmp_folded(um, name, &entry); + ret = utf8_strncasecmp_folded(um, name, entry); else - ret = utf8_strncasecmp(um, name, &entry); + ret = utf8_strncasecmp(um, name, entry); + if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ if (sb_has_enc_strict_mode(sb)) - ret = -EINVAL; - else if (name->len != entry.len) - ret = 1; - else - ret = !!memcmp(name->name, entry.name, entry.len); + return -EINVAL; + + if (name->len != entry->len) + return 1; + + return !!memcmp(name->name, entry->name, name->len); } -out: - kfree(decrypted_name.name); + return ret; } @@ -191,24 +173,24 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - u8 *name; - int len; + struct super_block *sb = parent->i_sb; + struct qstr entry; #endif if (de->hash_code != namehash) return false; #ifdef CONFIG_UNICODE - name = d->filename[bit_pos]; - len = le16_to_cpu(de->name_len); + entry.name = d->filename[bit_pos]; + entry.len = de->name_len; - if (needs_casefold(parent)) { + if (sb->s_encoding && IS_CASEFOLDED(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, name, len, true); + return !f2fs_ci_compare(parent, &cf, &entry, true); } - return !f2fs_ci_compare(parent, fname->usr_fname, name, len, + return !f2fs_ci_compare(parent, fname->usr_fname, &entry, false); } #endif @@ -632,13 +614,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, - f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; + f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -648,6 +630,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); + dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -733,19 +716,17 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; - f2fs_hash_t dentry_hash; int err = -EAGAIN; new_name.name = fname_name(fname); new_name.len = fname_len(fname); if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname, + err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, inode, ino, mode); - dentry_hash = f2fs_dentry_hash(dir, &new_name, fname); if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, - dentry_hash, inode, ino, mode); + inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; @@ -1018,7 +999,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err) + if (err && err != -ENOKEY) goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); @@ -1094,3 +1075,10 @@ const struct file_operations f2fs_dir_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif }; + +#ifdef CONFIG_UNICODE +const struct dentry_operations f2fs_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; +#endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e3304b6ba778..dd4a2cd5762e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3137,6 +3137,11 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); +extern int f2fs_ci_compare(const struct inode *parent, + const struct qstr *name, + const struct qstr *entry, + bool quick); + /* * dir.c */ @@ -3170,7 +3175,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, f2fs_hash_t dentry_hash, + const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode); int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); @@ -3203,7 +3208,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); * hash.c */ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, const struct fscrypt_name *fname); + const struct qstr *name_info, struct fscrypt_name *fname); /* * node.c @@ -3683,6 +3688,9 @@ static inline void update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; +#ifdef CONFIG_UNICODE +extern const struct dentry_operations f2fs_dentry_ops; +#endif extern const struct file_operations f2fs_file_operations; extern const struct inode_operations f2fs_file_inode_operations; extern const struct address_space_operations f2fs_dblock_aops; @@ -3713,7 +3721,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct fscrypt_name *fname, + const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 311a36cba330..b0c432c0fbb2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2444,14 +2444,6 @@ static int f2fs_ioc_get_encryption_key_status(struct file *filp, return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); } -static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) -{ - if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) - return -EOPNOTSUPP; - - return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); -} - static int f2fs_ioc_gc(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -3419,8 +3411,6 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_remove_encryption_key_all_users(filp, arg); case FS_IOC_GET_ENCRYPTION_KEY_STATUS: return f2fs_ioc_get_encryption_key_status(filp, arg); - case FS_IOC_GET_ENCRYPTION_NONCE: - return f2fs_ioc_get_encryption_nonce(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); case F2FS_IOC_GARBAGE_COLLECT_RANGE: @@ -3600,7 +3590,6 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: - case FS_IOC_GET_ENCRYPTION_NONCE: case F2FS_IOC_GARBAGE_COLLECT: case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 8f7ee4362312..28acb24e7a7a 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -68,9 +68,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -static f2fs_hash_t __f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, - const struct fscrypt_name *fname) +static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, + struct fscrypt_name *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -80,17 +79,12 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct inode *dir, size_t len = name_info->len; /* encrypted bigname case */ - if (fname && fname->is_ciphertext_name) + if (fname && !fname->disk_name.name) return cpu_to_le32(fname->hash); if (is_dot_dotdot(name_info)) return 0; - if (IS_CASEFOLDED(dir) && IS_ENCRYPTED(dir)) { - f2fs_hash = cpu_to_le32(fscrypt_fname_siphash(dir, name_info)); - return f2fs_hash; - } - /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; @@ -112,7 +106,7 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct inode *dir, } f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, const struct fscrypt_name *fname) + const struct qstr *name_info, struct fscrypt_name *fname) { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); @@ -120,30 +114,27 @@ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, int r, dlen; unsigned char *buff; struct qstr folded; - const struct qstr *name = fname ? fname->usr_fname : name_info; if (!name_info->len || !IS_CASEFOLDED(dir)) goto opaque_seq; - if (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir)) - goto opaque_seq; - buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); if (!buff) return -ENOMEM; - dlen = utf8_casefold(um, name, buff, PATH_MAX); + + dlen = utf8_casefold(um, name_info, buff, PATH_MAX); if (dlen < 0) { kvfree(buff); goto opaque_seq; } folded.name = buff; folded.len = dlen; - r = __f2fs_dentry_hash(dir, &folded, fname); + r = __f2fs_dentry_hash(&folded, fname); kvfree(buff); return r; opaque_seq: #endif - return __f2fs_dentry_hash(dir, name_info, fname); + return __f2fs_dentry_hash(name_info, fname); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c68a32369f44..cabc7a69cee4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -483,8 +483,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) ino = le32_to_cpu(de->ino); fake_mode = f2fs_get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, - de->hash_code, NULL, ino, fake_mode); + err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, + ino, fake_mode); if (err) goto punch_dentry_pages; @@ -596,7 +596,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct fscrypt_name *fname, + const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -607,7 +607,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; - const struct qstr *orig_name = fname->usr_fname; int err = 0; ipage = f2fs_get_node_page(sbi, dir->i_ino); @@ -638,7 +637,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(dir, new_name, fname); + name_hash = f2fs_dentry_hash(dir, new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a8959c64bf3a..23d6cccdb4c1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -492,7 +492,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = fscrypt_prepare_lookup(dir, dentry, &fname); - generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) goto out_splice; if (err) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ce4d1930f369..e3e37be7f214 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3309,6 +3309,12 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) struct unicode_map *encoding; __u16 encoding_flags; + if (f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, + "Can't mount with encoding and encryption"); + return -EINVAL; + } + if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info, &encoding_flags)) { f2fs_err(sbi, @@ -3331,6 +3337,7 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) sbi->sb->s_encoding = encoding; sbi->sb->s_encoding_flags = encoding_flags; + sbi->sb->s_d_op = &f2fs_dentry_ops; } #else if (f2fs_sb_has_casefold(sbi)) { diff --git a/fs/inode.c b/fs/inode.c index 8c25e0df7a0e..4e30a37ef712 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -2167,7 +2166,7 @@ int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - return fscrypt_prepare_setflags(inode, oldflags, flags); + return 0; } EXPORT_SYMBOL(vfs_ioc_setflags_prepare); diff --git a/fs/libfs.c b/fs/libfs.c index 4f2ac9ac0c9a..f66eb521d4f8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1281,54 +1281,4 @@ int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) return ret; } EXPORT_SYMBOL(generic_ci_d_hash); - -static const struct dentry_operations generic_ci_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif - -#ifdef CONFIG_FS_ENCRYPTION -static const struct dentry_operations generic_encrypted_dentry_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; -#endif - -#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION) -static const struct dentry_operations generic_encrypted_ci_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, - .d_revalidate = fscrypt_d_revalidate, -}; -#endif - -/** - * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry - * @dir: parent of dentry whose ops to set - * @dentry: detnry to set ops on - * - * This function sets the dentry ops for the given dentry to handle both - * casefolding and encryption of the dentry name. - */ -void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry) -{ -#ifdef CONFIG_FS_ENCRYPTION - if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { -#ifdef CONFIG_UNICODE - if (dir->i_sb->s_encoding) { - d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); - return; - } #endif - d_set_d_op(dentry, &generic_encrypted_dentry_ops); - return; - } -#endif -#ifdef CONFIG_UNICODE - if (dir->i_sb->s_encoding) { - d_set_d_op(dentry, &generic_ci_dentry_ops); - return; - } -#endif -} -EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index fe221d7d99d6..dfc6fdf019d7 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -7,7 +7,6 @@ config UBIFS_FS select CRYPTO if UBIFS_FS_ZLIB select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB - select FS_ENCRYPTION_ALGS if FS_ENCRYPTION depends on MTD_UBI help UBIFS is a file system for flash devices which works on top of UBI. diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7d5c2cf95353..26ac11d0eb4b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -208,7 +208,6 @@ static int dbg_check_name(const struct ubifs_info *c, return 0; } -static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry); static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -222,7 +221,6 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); err = fscrypt_prepare_lookup(dir, dentry, &nm); - ubifs_set_d_ops(dir, dentry); if (err == -ENOENT) return d_splice_alias(NULL, dentry); if (err) @@ -539,7 +537,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) if (encrypted) { err = fscrypt_get_encryption_info(dir); - if (err) + if (err && err != -ENOKEY) return err; err = fscrypt_fname_alloc_buffer(dir, UBIFS_MAX_NLEN, &fstr); @@ -1686,19 +1684,3 @@ const struct file_operations ubifs_dir_operations = { .compat_ioctl = ubifs_compat_ioctl, #endif }; - -#ifdef CONFIG_FS_ENCRYPTION -static const struct dentry_operations ubifs_encrypted_dentry_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; -#endif - -static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry) -{ -#ifdef CONFIG_FS_ENCRYPTION - if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { - d_set_d_op(dentry, &ubifs_encrypted_dentry_ops); - return; - } -#endif -} diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index daa59b98c8b4..203067d39855 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -229,9 +229,6 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: return fscrypt_ioctl_get_key_status(file, (void __user *)arg); - case FS_IOC_GET_ENCRYPTION_NONCE: - return fscrypt_ioctl_get_nonce(file, (void __user *)arg); - default: return -ENOTTY; } @@ -254,7 +251,6 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: - case FS_IOC_GET_ENCRYPTION_NONCE: break; default: return -ENOIOCTLCMD; diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index 8456a409fc21..ab22dbe7b880 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -31,8 +31,6 @@ enum blk_crypto_mode_num { * @data_unit_size_bits: log2 of data_unit_size * @size: size of this key in bytes (determined by @crypto_mode) * @hash: hash of this key, for keyslot manager use only - * @is_hw_wrapped: @raw points to a wrapped key to be used by an inline - * encryption hardware that accepts wrapped keys. * @raw: the raw bytes of this key. Only the first @size bytes are used. * * A blk_crypto_key is immutable once created, and many bios can reference it at @@ -44,7 +42,6 @@ struct blk_crypto_key { unsigned int data_unit_size_bits; unsigned int size; unsigned int hash; - bool is_hw_wrapped; u8 raw[BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE]; }; diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 7dc478a8c3ed..485cee0b92dd 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -18,15 +18,9 @@ bool blk_crypto_endio(struct bio *bio); int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int raw_key_size, - bool is_hw_wrapped, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size); -int blk_crypto_start_using_mode(enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, - bool is_hw_wrapped_key, - struct request_queue *q); - int blk_crypto_evict_key(struct request_queue *q, const struct blk_crypto_key *key); @@ -46,10 +40,22 @@ static inline bool blk_crypto_endio(struct bio *bio) #ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q); + int blk_crypto_fallback_init(void); #else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ +static inline int +blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) +{ + return 0; +} + static inline int blk_crypto_fallback_init(void) { return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index a15c86a92a08..31a6ae9e28c8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3212,8 +3212,6 @@ static inline bool needs_casefold(const struct inode *dir) return 0; } #endif -extern void generic_set_encrypted_ci_d_ops(struct inode *dir, - struct dentry *dentry); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 2a9cd6c5f361..6ac092cc4067 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -76,21 +76,6 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return READ_ONCE(inode->i_crypt_info) != NULL; } -/** - * fscrypt_needs_contents_encryption() - check whether an inode needs - * contents encryption - * - * Return: %true iff the inode is an encrypted regular file and the kernel was - * built with fscrypt support. - * - * If you need to know whether the encrypt bit is set even when the kernel was - * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. - */ -static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) -{ - return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); -} - static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return inode->i_sb->s_cop->dummy_context && @@ -138,13 +123,11 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) } extern void fscrypt_free_bounce_page(struct page *bounce_page); -extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *); extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *); -extern int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); extern int fscrypt_has_permitted_context(struct inode *, struct inode *); extern int fscrypt_inherit_context(struct inode *, struct inode *, void *, bool); @@ -176,14 +159,82 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(const struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname); -extern bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len); -extern u64 fscrypt_fname_siphash(const struct inode *dir, - const struct qstr *name); +extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, + const struct fscrypt_str *, struct fscrypt_str *); + +#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 + +/* Extracts the second-to-last ciphertext block; see explanation below */ +#define FSCRYPT_FNAME_DIGEST(name, len) \ + ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ + FS_CRYPTO_BLOCK_SIZE)) + +#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE + +/** + * fscrypt_digested_name - alternate identifier for an on-disk filename + * + * When userspace lists an encrypted directory without access to the key, + * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE + * bytes are shown in this abbreviated form (base64-encoded) rather than as the + * full ciphertext (base64-encoded). This is necessary to allow supporting + * filenames up to NAME_MAX bytes, since base64 encoding expands the length. + * + * To make it possible for filesystems to still find the correct directory entry + * despite not knowing the full on-disk name, we encode any filesystem-specific + * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, + * followed by the second-to-last ciphertext block of the filename. Due to the + * use of the CBC-CTS encryption mode, the second-to-last ciphertext block + * depends on the full plaintext. (Note that ciphertext stealing causes the + * last two blocks to appear "flipped".) This makes accidental collisions very + * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they + * share the same filesystem-specific hashes. + * + * However, this scheme isn't immune to intentional collisions, which can be + * created by anyone able to create arbitrary plaintext filenames and view them + * without the key. Making the "digest" be a real cryptographic hash like + * SHA-256 over the full ciphertext would prevent this, although it would be + * less efficient and harder to implement, especially since the filesystem would + * need to calculate it for each directory entry examined during a search. + */ +struct fscrypt_digested_name { + u32 hash; + u32 minor_hash; + u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; +}; + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and a filename in it is + * very long, then we won't have the full disk_name and we'll instead need to + * match against the fscrypt_digested_name. + * + * Return: %true if the name matches, otherwise %false. + */ +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + if (unlikely(!fname->disk_name.name)) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) + return false; + if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) + return false; + return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), + n->digest, FSCRYPT_FNAME_DIGEST_SIZE); + } + + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); @@ -201,8 +252,6 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); -extern int fscrypt_prepare_setflags(struct inode *inode, - unsigned int oldflags, unsigned int flags); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); @@ -219,11 +268,6 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return false; } -static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) -{ - return false; -} - static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return false; @@ -303,11 +347,6 @@ static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, return -EOPNOTSUPP; } -static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) -{ - return -EOPNOTSUPP; -} - static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { @@ -412,7 +451,7 @@ static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) return; } -static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, +static inline int fscrypt_fname_disk_to_usr(struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) @@ -429,13 +468,6 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } -static inline u64 fscrypt_fname_siphash(const struct inode *dir, - const struct qstr *name) -{ - WARN_ON_ONCE(1); - return 0; -} - /* bio.c */ static inline void fscrypt_decrypt_bio(struct bio *bio) { @@ -478,13 +510,6 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } -static inline int fscrypt_prepare_setflags(struct inode *inode, - unsigned int oldflags, - unsigned int flags) -{ - return 0; -} - static inline int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, @@ -677,9 +702,8 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * - * After calling this function, a filesystem should ensure that it's dentry - * operations contain fscrypt_d_revalidate if DCACHE_ENCRYPTED_NAME was set, - * so that the dentry can be invalidated if the key is later added. + * This also installs a custom ->d_revalidate() method which will invalidate the + * dentry if it was created without the key and the key is later added. * * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a * correctly formed encoded ciphertext name, so a negative dentry should be diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index cd65bea927db..2f4aac2851bf 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -8,15 +8,6 @@ #include -/* Inline crypto feature bits. Must set at least one. */ -enum { - /* Support for standard software-specified keys */ - BLK_CRYPTO_FEATURE_STANDARD_KEYS = BIT(0), - - /* Support for hardware-wrapped keys */ - BLK_CRYPTO_FEATURE_WRAPPED_KEYS = BIT(1), -}; - #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct keyslot_manager; @@ -54,7 +45,6 @@ struct keyslot_manager *keyslot_manager_create( struct device *dev, unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ops, - unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); @@ -67,8 +57,7 @@ void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot); bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, - bool is_hw_wrapped_key); + unsigned int data_unit_size); int keyslot_manager_evict_key(struct keyslot_manager *ksm, const struct blk_crypto_key *key); @@ -82,7 +71,6 @@ void keyslot_manager_destroy(struct keyslot_manager *ksm); struct keyslot_manager *keyslot_manager_create_passthrough( struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ops, - unsigned int features, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 320a6381576e..7d150d800abc 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -8,7 +8,6 @@ #ifndef _UAPI_LINUX_FSCRYPT_H #define _UAPI_LINUX_FSCRYPT_H -#include #include /* Encryption policy flags */ @@ -110,22 +109,11 @@ struct fscrypt_key_specifier { } u; }; -/* - * Payload of Linux keyring key of type "fscrypt-provisioning", referenced by - * fscrypt_add_key_arg::key_id as an alternative to fscrypt_add_key_arg::raw. - */ -struct fscrypt_provisioning_key_payload { - __u32 type; - __u32 __reserved; - __u8 raw[]; -}; - /* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 key_id; - __u32 __reserved[7]; + __u32 __reserved[8]; /* N.B.: "temporary" flag, not reserved upstream */ #define __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 __u32 __flags; @@ -166,7 +154,6 @@ struct fscrypt_get_key_status_arg { #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg) #define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg) -#define FS_IOC_GET_ENCRYPTION_NONCE _IOR('f', 27, __u8[16]) /**********************************************************************/ -- GitLab From d6fb207052dc6c3fd1a3928a978096d66fd50bfd Mon Sep 17 00:00:00 2001 From: Srinivasarao P Date: Tue, 4 Aug 2020 14:21:14 +0530 Subject: [PATCH 1278/1278] Revert "clk: qcom: rcg2: Don't crash if our parent can't be found; return an error" This reverts commit ef7339a38eccc07dbbc6a4dde0c0d4d3be157a87. Change-Id: I4ca79e90de2ab00a7d679ca038661f5605c06947 Signed-off-by: Srinivasarao P --- drivers/clk/qcom/clk-rcg2.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 6091d9b6a27b..2e23f8754f8e 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -210,9 +210,6 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, clk_flags = clk_hw_get_flags(hw); p = clk_hw_get_parent_by_index(hw, index); - if (!p) - return -EINVAL; - if (clk_flags & CLK_SET_RATE_PARENT) { if (f->pre_div) { if (!rate) -- GitLab