Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0cfb6aee authored by Guillaume Knispel's avatar Guillaume Knispel Committed by Linus Torvalds
Browse files

ipc: optimize semget/shmget/msgget for lots of keys

ipc_findkey() used to scan all objects to look for the wanted key.  This
is slow when using a high number of keys.  This change adds an rhashtable
of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n).

This change gives a 865% improvement of benchmark reaim.jobs_per_min on a
56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1]

Other (more micro) benchmark results, by the author: On an i5 laptop, the
following loop executed right after a reboot took, without and with this
change:

    for (int i = 0, k=0x424242; i < KEYS; ++i)
        semget(k++, 1, IPC_CREAT | 0600);

                 total       total          max single  max single
   KEYS        without        with        call without   call with

      1            3.5         4.9   µs            3.5         4.9
     10            7.6         8.6   µs            3.7         4.7
     32           16.2        15.9   µs            4.3         5.3
    100           72.9        41.8   µs            3.7         4.7
   1000        5,630.0       502.0   µs             *           *
  10000    1,340,000.0     7,240.0   µs             *           *
  31900   17,600,000.0    22,200.0   µs             *           *

 *: unreliable measure: high variance

The duration for a lookup-only usage was obtained by the same loop once
the keys are present:

                 total       total          max single  max single
   KEYS        without        with        call without   call with

      1            2.1         2.5   µs            2.1         2.5
     10            4.5         4.8   µs            2.2         2.3
     32           13.0        10.8   µs            2.3         2.8
    100           82.9        25.1   µs             *          2.3
   1000        5,780.0       217.0   µs             *           *
  10000    1,470,000.0     2,520.0   µs             *           *
  31900   17,400,000.0     7,810.0   µs             *           *

Finally, executing each semget() in a new process gave, when still
summing only the durations of these syscalls:

creation:
                 total       total
   KEYS        without        with

      1            3.7         5.0   µs
     10           32.9        36.7   µs
     32          125.0       109.0   µs
    100          523.0       353.0   µs
   1000       20,300.0     3,280.0   µs
  10000    2,470,000.0    46,700.0   µs
  31900   27,800,000.0   219,000.0   µs

lookup-only:
                 total       total
   KEYS        without        with

      1            2.5         2.7   µs
     10           25.4        24.4   µs
     32          106.0        72.6   µs
    100          591.0       352.0   µs
   1000       22,400.0     2,250.0   µs
  10000    2,510,000.0    25,700.0   µs
  31900   28,200,000.0   115,000.0   µs

[1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop

Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debix


Signed-off-by: default avatarGuillaume Knispel <guillaume.knispel@supersonicimagine.com>
Reviewed-by: default avatarMarc Pardo <marc.pardo@supersonicimagine.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
Cc: Marc Pardo <marc.pardo@supersonicimagine.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e4243b80
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@

#include <linux/spinlock.h>
#include <linux/uidgid.h>
#include <linux/rhashtable.h>
#include <uapi/linux/ipc.h>
#include <linux/refcount.h>

@@ -22,6 +23,8 @@ struct kern_ipc_perm {
	unsigned long	seq;
	void		*security;

	struct rhash_head khtnode;

	struct rcu_head rcu;
	refcount_t refcount;
} ____cacheline_aligned_in_smp __randomize_layout;
+3 −0
Original line number Diff line number Diff line
@@ -8,15 +8,18 @@
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/refcount.h>
#include <linux/rhashtable.h>

struct user_namespace;

struct ipc_ids {
	int in_use;
	unsigned short seq;
	bool tables_initialized;
	struct rw_semaphore rwsem;
	struct idr ipcs_idr;
	int next_id;
	struct rhashtable key_ht;
};

struct ipc_namespace {
+6 −4
Original line number Diff line number Diff line
@@ -1011,7 +1011,7 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
}


void msg_init_ns(struct ipc_namespace *ns)
int msg_init_ns(struct ipc_namespace *ns)
{
	ns->msg_ctlmax = MSGMAX;
	ns->msg_ctlmnb = MSGMNB;
@@ -1019,7 +1019,7 @@ void msg_init_ns(struct ipc_namespace *ns)

	atomic_set(&ns->msg_bytes, 0);
	atomic_set(&ns->msg_hdrs, 0);
	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
	return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
}

#ifdef CONFIG_IPC_NS
@@ -1027,6 +1027,7 @@ void msg_exit_ns(struct ipc_namespace *ns)
{
	free_ipcs(ns, &msg_ids(ns), freeque);
	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
}
#endif

@@ -1058,11 +1059,12 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
}
#endif

void __init msg_init(void)
int __init msg_init(void)
{
	msg_init_ns(&init_ipc_ns);
	const int err = msg_init_ns(&init_ipc_ns);

	ipc_init_proc_interface("sysvipc/msg",
				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
				IPC_MSG_IDS, sysvipc_msg_proc_show);
	return err;
}
+16 −4
Original line number Diff line number Diff line
@@ -54,16 +54,28 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
	ns->user_ns = get_user_ns(user_ns);
	ns->ucounts = ucounts;

	err = mq_init_ns(ns);
	err = sem_init_ns(ns);
	if (err)
		goto fail_put;
	err = msg_init_ns(ns);
	if (err)
		goto fail_destroy_sem;
	err = shm_init_ns(ns);
	if (err)
		goto fail_destroy_msg;

	sem_init_ns(ns);
	msg_init_ns(ns);
	shm_init_ns(ns);
	err = mq_init_ns(ns);
	if (err)
		goto fail_destroy_shm;

	return ns;

fail_destroy_shm:
	shm_exit_ns(ns);
fail_destroy_msg:
	msg_exit_ns(ns);
fail_destroy_sem:
	sem_exit_ns(ns);
fail_put:
	put_user_ns(ns->user_ns);
	ns_free_inum(&ns->ns);
+7 −4
Original line number Diff line number Diff line
@@ -183,14 +183,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#define sc_semopm	sem_ctls[2]
#define sc_semmni	sem_ctls[3]

void sem_init_ns(struct ipc_namespace *ns)
int sem_init_ns(struct ipc_namespace *ns)
{
	ns->sc_semmsl = SEMMSL;
	ns->sc_semmns = SEMMNS;
	ns->sc_semopm = SEMOPM;
	ns->sc_semmni = SEMMNI;
	ns->used_sems = 0;
	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
	return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
}

#ifdef CONFIG_IPC_NS
@@ -198,15 +198,18 @@ void sem_exit_ns(struct ipc_namespace *ns)
{
	free_ipcs(ns, &sem_ids(ns), freeary);
	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
	rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
}
#endif

void __init sem_init(void)
int __init sem_init(void)
{
	sem_init_ns(&init_ipc_ns);
	const int err = sem_init_ns(&init_ipc_ns);

	ipc_init_proc_interface("sysvipc/sem",
				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
				IPC_SEM_IDS, sysvipc_sem_proc_show);
	return err;
}

/**
Loading