Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 79257514 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fsnotify updates from Jan Kara:
 "Amir's patches to implement superblock fanotify watches, Xiaoming's
  patch to enable reporting of thread IDs in fanotify events instead of
  TGIDs (sadly the patch got mis-attributed to Amir and I've noticed
  only now), and a fix of possible oops on umount caused by fsnotify
  infrastructure"

* tag 'for_v4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: Fix busy inodes during unmount
  fs: group frequently accessed fields of struct super_block together
  fanotify: support reporting thread id instead of process id
  fanotify: add BUILD_BUG_ON() to count the bits of fanotify constants
  fsnotify: convert runtime BUG_ON() to BUILD_BUG_ON()
  fanotify: deprecate uapi FAN_ALL_* constants
  fanotify: simplify handling of FAN_ONDIR
  fsnotify: generalize handling of extra event flags
  fanotify: fix collision of internal and uapi mark flags
  fanotify: store fanotify_init() flags in group's fanotify_data
  fanotify: add API to attach/detach super block mark
  fsnotify: send path type events to group with super block marks
  fsnotify: add super block object type
parents 7da4221b 721fb6fb
Loading
Loading
Loading
Loading
+11 −6
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ static bool should_merge(struct fsnotify_event *old_fsn,
	old = FANOTIFY_E(old_fsn);
	new = FANOTIFY_E(new_fsn);

	if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
	if (old_fsn->inode == new_fsn->inode && old->pid == new->pid &&
	    old->path.mnt == new->path.mnt &&
	    old->path.dentry == new->path.dentry)
		return true;
@@ -131,8 +131,8 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
	    !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
		return false;

	if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
				 ~marks_ignored_mask)
	if (event_mask & FANOTIFY_OUTGOING_EVENTS &
	    marks_mask & ~marks_ignored_mask)
		return true;

	return false;
@@ -171,7 +171,10 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
		goto out;
init: __maybe_unused
	fsnotify_init_event(&event->fse, inode, mask);
	event->tgid = get_pid(task_tgid(current));
	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
		event->pid = get_pid(task_pid(current));
	else
		event->pid = get_pid(task_tgid(current));
	if (path) {
		event->path = *path;
		path_get(&event->path);
@@ -205,6 +208,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);

	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10);

	if (!fanotify_should_send_event(iter_info, mask, data, data_type))
		return 0;

@@ -236,7 +241,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
	if (ret) {
		/* Permission events shouldn't be merged */
		BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
		/* Our event wasn't used in the end. Free it. */
		fsnotify_destroy_event(group, fsn_event);

@@ -268,7 +273,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)

	event = FANOTIFY_E(fsn_event);
	path_put(&event->path);
	put_pid(event->tgid);
	put_pid(event->pid);
	if (fanotify_is_perm_event(fsn_event->mask)) {
		kmem_cache_free(fanotify_perm_event_cachep,
				FANOTIFY_PE(fsn_event));
+2 −2
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ struct fanotify_event_info {
	 * during this object's lifetime
	 */
	struct path path;
	struct pid *tgid;
	struct pid *pid;
};

/*
@@ -44,7 +44,7 @@ FANOTIFY_PE(struct fsnotify_event *fse)
static inline bool fanotify_is_perm_event(u32 mask)
{
	return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
		mask & FAN_ALL_PERM_EVENTS;
		mask & FANOTIFY_PERM_EVENTS;
}

static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
+58 −45
Original line number Diff line number Diff line
@@ -131,8 +131,8 @@ static int fill_event_metadata(struct fsnotify_group *group,
	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
	metadata->vers = FANOTIFY_METADATA_VERSION;
	metadata->reserved = 0;
	metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
	metadata->pid = pid_vnr(event->tgid);
	metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
	metadata->pid = pid_vnr(event->pid);
	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
		metadata->fd = FAN_NOFD;
	else {
@@ -191,7 +191,7 @@ static int process_access_response(struct fsnotify_group *group,
	if (fd < 0)
		return -EINVAL;

	if ((response & FAN_AUDIT) && !group->fanotify_data.audit)
	if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
		return -EINVAL;

	event = dequeue_event(group, fd);
@@ -395,7 +395,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
	 */
	while (!fsnotify_notify_queue_is_empty(group)) {
		fsn_event = fsnotify_remove_first_event(group);
		if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
		if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) {
			spin_unlock(&group->notification_lock);
			fsnotify_destroy_event(group, fsn_event);
			spin_lock(&group->notification_lock);
@@ -506,18 +506,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,

	spin_lock(&fsn_mark->lock);
	if (!(flags & FAN_MARK_IGNORED_MASK)) {
		__u32 tmask = fsn_mark->mask & ~mask;

		if (flags & FAN_MARK_ONDIR)
			tmask &= ~FAN_ONDIR;

		oldmask = fsn_mark->mask;
		fsn_mark->mask = tmask;
		fsn_mark->mask &= ~mask;
	} else {
		__u32 tmask = fsn_mark->ignored_mask & ~mask;
		if (flags & FAN_MARK_ONDIR)
			tmask &= ~FAN_ONDIR;
		fsn_mark->ignored_mask = tmask;
		fsn_mark->ignored_mask &= ~mask;
	}
	*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
	spin_unlock(&fsn_mark->lock);
@@ -563,6 +555,13 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
				    mask, flags);
}

static int fanotify_remove_sb_mark(struct fsnotify_group *group,
				      struct super_block *sb, __u32 mask,
				      unsigned int flags)
{
	return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags);
}

static int fanotify_remove_inode_mark(struct fsnotify_group *group,
				      struct inode *inode, __u32 mask,
				      unsigned int flags)
@@ -579,19 +578,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,

	spin_lock(&fsn_mark->lock);
	if (!(flags & FAN_MARK_IGNORED_MASK)) {
		__u32 tmask = fsn_mark->mask | mask;

		if (flags & FAN_MARK_ONDIR)
			tmask |= FAN_ONDIR;

		oldmask = fsn_mark->mask;
		fsn_mark->mask = tmask;
		fsn_mark->mask |= mask;
	} else {
		__u32 tmask = fsn_mark->ignored_mask | mask;
		if (flags & FAN_MARK_ONDIR)
			tmask |= FAN_ONDIR;

		fsn_mark->ignored_mask = tmask;
		fsn_mark->ignored_mask |= mask;
		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
	}
@@ -658,6 +648,14 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
				 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags);
}

static int fanotify_add_sb_mark(struct fsnotify_group *group,
				      struct super_block *sb, __u32 mask,
				      unsigned int flags)
{
	return fanotify_add_mark(group, &sb->s_fsnotify_marks,
				 FSNOTIFY_OBJ_TYPE_SB, mask, flags);
}

static int fanotify_add_inode_mark(struct fsnotify_group *group,
				   struct inode *inode, __u32 mask,
				   unsigned int flags)
@@ -686,16 +684,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
	struct user_struct *user;
	struct fanotify_event_info *oevent;

	pr_debug("%s: flags=%d event_f_flags=%d\n",
	pr_debug("%s: flags=%x event_f_flags=%x\n",
		 __func__, flags, event_f_flags);

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

#ifdef CONFIG_AUDITSYSCALL
	if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT))
	if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
#else
	if (flags & ~FAN_ALL_INIT_FLAGS)
	if (flags & ~FANOTIFY_INIT_FLAGS)
#endif
		return -EINVAL;

@@ -731,6 +729,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
	}

	group->fanotify_data.user = user;
	group->fanotify_data.flags = flags;
	atomic_inc(&user->fanotify_listeners);
	group->memcg = get_mem_cgroup_from_mm(current->mm);

@@ -746,7 +745,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
	group->fanotify_data.f_flags = event_f_flags;
	init_waitqueue_head(&group->fanotify_data.access_waitq);
	INIT_LIST_HEAD(&group->fanotify_data.access_list);
	switch (flags & FAN_ALL_CLASS_BITS) {
	switch (flags & FANOTIFY_CLASS_BITS) {
	case FAN_CLASS_NOTIF:
		group->priority = FS_PRIO_0;
		break;
@@ -783,7 +782,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
		fd = -EPERM;
		if (!capable(CAP_AUDIT_WRITE))
			goto out_destroy_group;
		group->fanotify_data.audit = true;
	}

	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
@@ -805,7 +803,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	struct fsnotify_group *group;
	struct fd f;
	struct path path;
	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
	int ret;

	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -815,8 +814,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	if (mask & ((__u64)0xffffffff << 32))
		return -EINVAL;

	if (flags & ~FAN_ALL_MARK_FLAGS)
	if (flags & ~FANOTIFY_MARK_FLAGS)
		return -EINVAL;

	switch (mark_type) {
	case FAN_MARK_INODE:
	case FAN_MARK_MOUNT:
	case FAN_MARK_FILESYSTEM:
		break;
	default:
		return -EINVAL;
	}

	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
	case FAN_MARK_ADD:		/* fallthrough */
	case FAN_MARK_REMOVE:
@@ -824,20 +833,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
			return -EINVAL;
		break;
	case FAN_MARK_FLUSH:
		if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH))
		if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
			return -EINVAL;
		break;
	default:
		return -EINVAL;
	}

	if (mask & FAN_ONDIR) {
		flags |= FAN_MARK_ONDIR;
		mask &= ~FAN_ONDIR;
	}

	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
		valid_mask |= FAN_ALL_PERM_EVENTS;
		valid_mask |= FANOTIFY_PERM_EVENTS;

	if (mask & ~valid_mask)
		return -EINVAL;
@@ -857,14 +861,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	 * allowed to set permissions events.
	 */
	ret = -EINVAL;
	if (mask & FAN_ALL_PERM_EVENTS &&
	if (mask & FANOTIFY_PERM_EVENTS &&
	    group->priority == FS_PRIO_0)
		goto fput_and_out;

	if (flags & FAN_MARK_FLUSH) {
		ret = 0;
		if (flags & FAN_MARK_MOUNT)
		if (mark_type == FAN_MARK_MOUNT)
			fsnotify_clear_vfsmount_marks_by_group(group);
		else if (mark_type == FAN_MARK_FILESYSTEM)
			fsnotify_clear_sb_marks_by_group(group);
		else
			fsnotify_clear_inode_marks_by_group(group);
		goto fput_and_out;
@@ -875,7 +881,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		goto fput_and_out;

	/* inode held in place by reference to path; group by fget on fd */
	if (!(flags & FAN_MARK_MOUNT))
	if (mark_type == FAN_MARK_INODE)
		inode = path.dentry->d_inode;
	else
		mnt = path.mnt;
@@ -883,14 +889,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	/* create/update an inode mark */
	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
	case FAN_MARK_ADD:
		if (flags & FAN_MARK_MOUNT)
		if (mark_type == FAN_MARK_MOUNT)
			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
		else if (mark_type == FAN_MARK_FILESYSTEM)
			ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags);
		else
			ret = fanotify_add_inode_mark(group, inode, mask, flags);
		break;
	case FAN_MARK_REMOVE:
		if (flags & FAN_MARK_MOUNT)
		if (mark_type == FAN_MARK_MOUNT)
			ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
		else if (mark_type == FAN_MARK_FILESYSTEM)
			ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags);
		else
			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
		break;
@@ -934,6 +944,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
 */
static int __init fanotify_user_setup(void)
{
	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7);
	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);

	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
					 SLAB_PANIC|SLAB_ACCOUNT);
	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
+6 −23
Original line number Diff line number Diff line
@@ -131,37 +131,20 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)

		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
		struct super_block *sb = fsnotify_conn_sb(mark->connector);

		seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
			   sb->s_dev, mflags, mark->mask, mark->ignored_mask);
	}
}

void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
{
	struct fsnotify_group *group = f->private_data;
	unsigned int flags = 0;

	switch (group->priority) {
	case FS_PRIO_0:
		flags |= FAN_CLASS_NOTIF;
		break;
	case FS_PRIO_1:
		flags |= FAN_CLASS_CONTENT;
		break;
	case FS_PRIO_2:
		flags |= FAN_CLASS_PRE_CONTENT;
		break;
	}

	if (group->max_events == UINT_MAX)
		flags |= FAN_UNLIMITED_QUEUE;

	if (group->fanotify_data.max_marks == UINT_MAX)
		flags |= FAN_UNLIMITED_MARKS;

	if (group->fanotify_data.audit)
		flags |= FAN_ENABLE_AUDIT;

	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
		   flags, group->fanotify_data.f_flags);
		   group->fanotify_data.flags, group->fanotify_data.f_flags);

	show_fdinfo(m, f, fanotify_fdinfo);
}
+27 −15
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 * Called during unmount with no locks held, so needs to be safe against
 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
 */
void fsnotify_unmount_inodes(struct super_block *sb)
static void fsnotify_unmount_inodes(struct super_block *sb)
{
	struct inode *inode, *iput_inode = NULL;

@@ -96,6 +96,15 @@ void fsnotify_unmount_inodes(struct super_block *sb)

	if (iput_inode)
		iput(iput_inode);
	/* Wait for outstanding inode references from connectors */
	wait_var_event(&sb->s_fsnotify_inode_refs,
		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
}

void fsnotify_sb_delete(struct super_block *sb)
{
	fsnotify_unmount_inodes(sb);
	fsnotify_clear_marks_by_sb(sb);
}

/*
@@ -190,7 +199,7 @@ static int send_to_group(struct inode *to_tell,
			 struct fsnotify_iter_info *iter_info)
{
	struct fsnotify_group *group = NULL;
	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
	__u32 marks_mask = 0;
	__u32 marks_ignored_mask = 0;
	struct fsnotify_mark *mark;
@@ -319,15 +328,17 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
	     const unsigned char *file_name, u32 cookie)
{
	struct fsnotify_iter_info iter_info = {};
	struct mount *mnt;
	struct super_block *sb = NULL;
	struct mount *mnt = NULL;
	__u32 mnt_or_sb_mask = 0;
	int ret = 0;
	/* global tests shouldn't care about events on child only the specific event */
	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);

	if (data_is == FSNOTIFY_EVENT_PATH)
	if (data_is == FSNOTIFY_EVENT_PATH) {
		mnt = real_mount(((const struct path *)data)->mnt);
	else
		mnt = NULL;
		sb = mnt->mnt.mnt_sb;
		mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask;
	}

	/*
	 * Optimization: srcu_read_lock() has a memory barrier which can
@@ -337,16 +348,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
	 * need SRCU to keep them "alive".
	 */
	if (!to_tell->i_fsnotify_marks &&
	    (!mnt || !mnt->mnt_fsnotify_marks))
	    (!mnt || (!mnt->mnt_fsnotify_marks && !sb->s_fsnotify_marks)))
		return 0;
	/*
	 * if this is a modify event we may need to clear the ignored masks
	 * otherwise return if neither the inode nor the vfsmount care about
	 * otherwise return if neither the inode nor the vfsmount/sb care about
	 * this type of event.
	 */
	if (!(mask & FS_MODIFY) &&
	    !(test_mask & to_tell->i_fsnotify_mask) &&
	    !(mnt && test_mask & mnt->mnt_fsnotify_mask))
	    !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
		return 0;

	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
@@ -356,11 +366,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
	if (mnt) {
		iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
		iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
			fsnotify_first_mark(&sb->s_fsnotify_marks);
	}

	/*
	 * We need to merge inode & vfsmount mark lists so that inode mark
	 * ignore masks are properly reflected for mount mark notifications.
	 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
	 * ignore masks are properly reflected for mount/sb mark notifications.
	 * That's why this traversal is so complicated...
	 */
	while (fsnotify_iter_select_report_types(&iter_info)) {
@@ -386,7 +398,7 @@ static __init int fsnotify_init(void)
{
	int ret;

	BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);

	ret = init_srcu_struct(&fsnotify_mark_srcu);
	if (ret)
Loading