ipc/sem.c: fix complex_count vs. simple op race (5864a2fd) · Commits · e / devices / android_kernel_fairphone_FP4

include/linux/sem.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -21,6 +21,7 @@ struct sem_array {
		struct list_head list_id; /* undo requests on this array */
		int sem_nsems; /* no. of semaphores in array */
		int complex_count; /* pending complex operations */
		bool complex_mode; /* no parallel simple ops */
		};

		#ifdef CONFIG_SYSVIPC

ipc/sem.c

+83 −55

Original line number	Diff line number	Diff line
		@@ -162,14 +162,21 @@ static int sysvipc_sem_proc_show(struct seq_file s, void it);

		/*
		* Locking:
		* a) global sem_lock() for read/write
		* sem_undo.id_next,
		* sem_array.complex_count,
		* sem_array.pending{_alter,_cont},
		* sem_array.sem_undo: global sem_lock() for read/write
		* sem_undo.proc_next: only "current" is allowed to read/write that field.
		* sem_array.complex_mode
		* sem_array.pending{_alter,_const},
		* sem_array.sem_undo
		*
		* b) global or semaphore sem_lock() for read/write:
		* sem_array.sem_base[i].pending_{const,alter}:
		* global or semaphore sem_lock() for read/write
		* sem_array.complex_mode (for read)
		*
		* c) special:
		* sem_undo_list.list_proc:
		* * undo_list->lock for write
		* * rcu for read
		*/

		#define sc_semmsl sem_ctls[0]
		@@ -260,30 +267,61 @@ static void sem_rcu_free(struct rcu_head *head)
		}

		/*
		* Wait until all currently ongoing simple ops have completed.
		* Enter the mode suitable for non-simple operations:
		* Caller must own sem_perm.lock.
		* New simple ops cannot start, because simple ops first check
		* that sem_perm.lock is free.
		* that a) sem_perm.lock is free and b) complex_count is 0.
		*/
		static void sem_wait_array(struct sem_array *sma)
		static void complexmode_enter(struct sem_array *sma)
		{
		int i;
		struct sem *sem;

		if (sma->complex_count) {
		/* The thread that increased sma->complex_count waited on
		* all sem->lock locks. Thus we don't need to wait again.
		*/
		if (sma->complex_mode) {
		/* We are already in complex_mode. Nothing to do */
		return;
		}

		/* We need a full barrier after seting complex_mode:
		* The write to complex_mode must be visible
		* before we read the first sem->lock spinlock state.
		*/
		smp_store_mb(sma->complex_mode, true);

		for (i = 0; i < sma->sem_nsems; i++) {
		sem = sma->sem_base + i;
		spin_unlock_wait(&sem->lock);
		}
		/*
		* spin_unlock_wait() is not a memory barriers, it is only a
		* control barrier. The code must pair with spin_unlock(&sem->lock),
		* thus just the control barrier is insufficient.
		*
		* smp_rmb() is sufficient, as writes cannot pass the control barrier.
		*/
		smp_rmb();
		}

		/*
		* Try to leave the mode that disallows simple operations:
		* Caller must own sem_perm.lock.
		*/
		static void complexmode_tryleave(struct sem_array *sma)
		{
		if (sma->complex_count) {
		/* Complex ops are sleeping.
		* We must stay in complex mode
		*/
		return;
		}
		/*
		* Immediately after setting complex_mode to false,
		* a simple op can start. Thus: all memory writes
		* performed by the current operation must be visible
		* before we set complex_mode to false.
		*/
		smp_store_release(&sma->complex_mode, false);
		}

		#define SEM_GLOBAL_LOCK (-1)
		/*
		* If the request contains only one semaphore operation, and there are
		* no complex transactions pending, lock only the semaphore involved.
		@@ -300,57 +338,43 @@ static inline int sem_lock(struct sem_array sma, struct sembuf sops,
		/* Complex operation - acquire a full lock */
		ipc_lock_object(&sma->sem_perm);

		/* And wait until all simple ops that are processed
		* right now have dropped their locks.
		*/
		sem_wait_array(sma);
		return -1;
		/* Prevent parallel simple ops */
		complexmode_enter(sma);
		return SEM_GLOBAL_LOCK;
		}

		/*
		* Only one semaphore affected - try to optimize locking.
		* The rules are:
		* - optimized locking is possible if no complex operation
		* Optimized locking is possible if no complex operation
		* is either enqueued or processed right now.
		* - The test for enqueued complex ops is simple:
		* sma->complex_count != 0
		* - Testing for complex ops that are processed right now is
		* a bit more difficult. Complex ops acquire the full lock
		* and first wait that the running simple ops have completed.
		* (see above)
		* Thus: If we own a simple lock and the global lock is free
		* and complex_count is now 0, then it will stay 0 and
		* thus just locking sem->lock is sufficient.
		*
		* Both facts are tracked by complex_mode.
		*/
		sem = sma->sem_base + sops->sem_num;

		if (sma->complex_count == 0) {
		/*
		* Initial check for complex_mode. Just an optimization,
		* no locking, no memory barrier.
		*/
		if (!sma->complex_mode) {
		/*
		* It appears that no complex operation is around.
		* Acquire the per-semaphore lock.
		*/
		spin_lock(&sem->lock);

		/* Then check that the global lock is free */
		if (!spin_is_locked(&sma->sem_perm.lock)) {
		/*
		* We need a memory barrier with acquire semantics,
		* otherwise we can race with another thread that does:
		* complex_count++;
		* spin_unlock(sem_perm.lock);
		* See 51d7d5205d33
		* ("powerpc: Add smp_mb() to arch_spin_is_locked()"):
		* A full barrier is required: the write of sem->lock
		* must be visible before the read is executed
		*/
		smp_acquire__after_ctrl_dep();
		smp_mb();

		/*
		* Now repeat the test of complex_count:
		* It can't change anymore until we drop sem->lock.
		* Thus: if is now 0, then it will stay 0.
		*/
		if (sma->complex_count == 0) {
		if (!smp_load_acquire(&sma->complex_mode)) {
		/* fast path successful! */
		return sops->sem_num;
		}
		}
		spin_unlock(&sem->lock);
		}

		@@ -369,15 +393,16 @@ static inline int sem_lock(struct sem_array sma, struct sembuf sops,
		/* Not a false alarm, thus complete the sequence for a
		* full lock.
		*/
		sem_wait_array(sma);
		return -1;
		complexmode_enter(sma);
		return SEM_GLOBAL_LOCK;
		}
		}

		static inline void sem_unlock(struct sem_array *sma, int locknum)
		{
		if (locknum == -1) {
		if (locknum == SEM_GLOBAL_LOCK) {
		unmerge_queues(sma);
		complexmode_tryleave(sma);
		ipc_unlock_object(&sma->sem_perm);
		} else {
		struct sem *sem = sma->sem_base + locknum;
		@@ -529,6 +554,7 @@ static int newary(struct ipc_namespace ns, struct ipc_params params)
		}

		sma->complex_count = 0;
		sma->complex_mode = true; /* dropped by sem_unlock below */
		INIT_LIST_HEAD(&sma->pending_alter);
		INIT_LIST_HEAD(&sma->pending_const);
		INIT_LIST_HEAD(&sma->list_id);
		@@ -2184,10 +2210,10 @@ static int sysvipc_sem_proc_show(struct seq_file s, void it)
		/*
		* The proc interface isn't aware of sem_lock(), it calls
		* ipc_lock_object() directly (in sysvipc_find_ipc).
		* In order to stay compatible with sem_lock(), we must wait until
		* all simple semop() calls have left their critical regions.
		* In order to stay compatible with sem_lock(), we must
		* enter / leave complex_mode.
		*/
		sem_wait_array(sma);
		complexmode_enter(sma);

		sem_otime = get_semotime(sma);

		@@ -2204,6 +2230,8 @@ static int sysvipc_sem_proc_show(struct seq_file s, void it)
		sem_otime,
		sma->sem_ctime);

		complexmode_tryleave(sma);

		return 0;
		}
		#endif