Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef53d9c5 authored by Srinivasa D S's avatar Srinivasa D S Committed by Linus Torvalds
Browse files

kprobes: improve kretprobe scalability with hashed locking



Currently list of kretprobe instances are stored in kretprobe object (as
used_instances,free_instances) and in kretprobe hash table.  We have one
global kretprobe lock to serialise the access to these lists.  This causes
only one kretprobe handler to execute at a time.  Hence affects system
performance, particularly on SMP systems and when return probe is set on
lot of functions (like on all systemcalls).

Solution proposed here gives fine-grain locks that performs better on SMP
system compared to present kretprobe implementation.

Solution:

 1) Instead of having one global lock to protect kretprobe instances
    present in kretprobe object and kretprobe hash table.  We will have
    two locks, one lock for protecting kretprobe hash table and another
    lock for kretporbe object.

 2) We hold lock present in kretprobe object while we modify kretprobe
    instance in kretprobe object and we hold per-hash-list lock while
    modifying kretprobe instances present in that hash list.  To prevent
    deadlock, we never grab a per-hash-list lock while holding a kretprobe
    lock.

 3) We can remove used_instances from struct kretprobe, as we can
    track used instances of kretprobe instances using kretprobe hash
    table.

Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system
with return probes set on all systemcalls looks like this.

cacheline              non-cacheline             Un-patched kernel
aligned patch 	       aligned patch
===============================================================================
real    9m46.784s       9m54.412s                  10m2.450s
user    40m5.715s       40m7.142s                  40m4.273s
sys     2m57.754s       2m58.583s                  3m17.430s
===========================================================

Time duration for kernel compilation ("make -j 8) on the same system, when
kernel is not probed.
=========================
real    9m26.389s
user    40m8.775s
sys     2m7.283s
=========================

Signed-off-by: default avatarSrinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: default avatarJim Keniston <jkenisto@us.ibm.com>
Acked-by: default avatarAnanth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 53a9600c
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;

	INIT_HLIST_HEAD(&empty_rp);
	spin_lock_irqsave(&kretprobe_lock, flags);
	head = kretprobe_inst_table_head(current);
	kretprobe_hash_lock(current, &head, &flags);

	/*
	 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
	}

	kretprobe_assert(ri, orig_ret_address, trampoline_address);
	spin_unlock_irqrestore(&kretprobe_lock, flags);
	kretprobe_hash_unlock(current, &flags);

	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
		hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
	return (void *)orig_ret_address;
}

/* Called with kretprobe_lock held. */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
				      struct pt_regs *regs)
{
+2 −4
Original line number Diff line number Diff line
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
		((struct fnptr *)kretprobe_trampoline)->ip;

	INIT_HLIST_HEAD(&empty_rp);
	spin_lock_irqsave(&kretprobe_lock, flags);
	head = kretprobe_inst_table_head(current);
	kretprobe_hash_lock(current, &head, &flags);

	/*
	 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
	kretprobe_assert(ri, orig_ret_address, trampoline_address);

	reset_current_kprobe();
	spin_unlock_irqrestore(&kretprobe_lock, flags);
	kretprobe_hash_unlock(current, &flags);
	preempt_enable_no_resched();

	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
	return 1;
}

/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
				      struct pt_regs *regs)
{
+2 −4
Original line number Diff line number Diff line
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
	kcb->kprobe_saved_msr = regs->msr;
}

/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
				      struct pt_regs *regs)
{
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;

	INIT_HLIST_HEAD(&empty_rp);
	spin_lock_irqsave(&kretprobe_lock, flags);
	head = kretprobe_inst_table_head(current);
	kretprobe_hash_lock(current, &head, &flags);

	/*
	 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
	regs->nip = orig_ret_address;

	reset_current_kprobe();
	spin_unlock_irqrestore(&kretprobe_lock, flags);
	kretprobe_hash_unlock(current, &flags);
	preempt_enable_no_resched();

	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+2 −4
Original line number Diff line number Diff line
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
}

/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
					struct pt_regs *regs)
{
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;

	INIT_HLIST_HEAD(&empty_rp);
	spin_lock_irqsave(&kretprobe_lock, flags);
	head = kretprobe_inst_table_head(current);
	kretprobe_hash_lock(current, &head, &flags);

	/*
	 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;

	reset_current_kprobe();
	spin_unlock_irqrestore(&kretprobe_lock, flags);
	kretprobe_hash_unlock(current, &flags);
	preempt_enable_no_resched();

	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+5 −6
Original line number Diff line number Diff line
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
	return 0;
}

/* Called with kretprobe_lock held.  The value stored in the return
 * address register is actually 2 instructions before where the
 * callee will return to.  Sequences usually look something like this
/* The value stored in the return address register is actually 2
 * instructions before where the callee will return to.
 * Sequences usually look something like this
 *
 *		call	some_function	<--- return register points here
 *		 nop			<--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;

	INIT_HLIST_HEAD(&empty_rp);
	spin_lock_irqsave(&kretprobe_lock, flags);
	head = kretprobe_inst_table_head(current);
	kretprobe_hash_lock(current, &head, &flags);

	/*
	 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
	regs->tnpc = orig_ret_address + 4;

	reset_current_kprobe();
	spin_unlock_irqrestore(&kretprobe_lock, flags);
	kretprobe_hash_unlock(current, &flags);
	preempt_enable_no_resched();

	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
Loading