Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8dbc450f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'sparc-vdso'



sparc: VDSO improvements

I started out on these changes with the goal of improving perf
annotations when the VDSO is in use.  Due to lack of inlining the
helper functions are typically hit when profiling instead of
__vdso_gettimeoday() or __vdso_vclock_gettime().

The only symbols available by default are the dyanmic symbols,
which therefore doesn't cover the helper functions.

So the perf output looks terrible, because the symbols cannot be
resolved and all show up as "Unknown".

The sparc VDSO code forces no inlining because of the way the
simplistic %tick register read code patching works.  So fixing that
was the first order of business.  Tricks were taken from how x86
implements alternates.  The crucial factor is that if you want to
refer to locations (for the original and patch instruction(s)) you
have to do so in a way that is resolvable at link time even for a
shared object.  So you have to do this by storing PC-relative
values, and not in executable sections.

Next, we sanitize the Makefile so that the cflags et al. make more
sense.  And LDFLAGS are applied actually to invocations of LD instead
of CC.

We also add some sanity checking, specifically in a post-link check
that makes sure we don't have any unexpected unresolved symbols in the
VDSO.  This is essential because the dynamic linker cannot resolve
symbols in the VDSO because it cannot write to it.

Finally some very minor optimizations are preformed to the
vclock_gettime.c code.  One thing which is tricky with this code on
sparc is that struct timeval and struct timespec are layed out
differently on 64-bit.  This is because, unlike other architectures,
sparc defined suseconds_t as 'int' even on 64-bit.  This is why we
have all of the "union" tstv_t" business and the weird assignments
in __vdso_gettimeofday().

Performance wise we do gain some cycle shere, specifically here
are cycle counts for a user application calling gettimeofday():

	no-VDSO		VDSO-orig	VDSO-new
================================================
64-bit	853 cycles	112 cycles	125 cycles
32-bit	849 cycles	134 cycles	141 cycles

These results are with current glibc sources.

To get better we'd need to implement this in assembler, and I might
just do that at some point.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 46b83064 19832d24
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -121,8 +121,12 @@ struct thread_info {
}

/* how to get the thread information struct from C */
#ifndef BUILD_VDSO
register struct thread_info *current_thread_info_reg asm("g6");
#define current_thread_info()	(current_thread_info_reg)
#else
extern struct thread_info *current_thread_info(void);
#endif

/* thread information allocation */
#if PAGE_SHIFT == 13
+3 −3
Original line number Diff line number Diff line
@@ -8,10 +8,10 @@
struct vdso_image {
	void *data;
	unsigned long size;   /* Always a multiple of PAGE_SIZE */

	unsigned long tick_patch, tick_patch_len;

	long sym_vvar_start;  /* Negative offset to the vvar area */
	long sym_vread_tick; /* Start of vread_tick section */
	long sym_vread_tick_patch_start; /* Start of tick read */
	long sym_vread_tick_patch_end;   /* End of tick read */
};

#ifdef CONFIG_SPARC64
+0 −3
Original line number Diff line number Diff line
@@ -53,8 +53,6 @@

DEFINE_SPINLOCK(rtc_lock);

unsigned int __read_mostly vdso_fix_stick;

#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -838,7 +836,6 @@ void __init time_init_early(void)
		} else {
			init_tick_ops(&tick_operations);
			clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
			vdso_fix_stick = 1;
		}
	} else {
		init_tick_ops(&stick_operations);
+17 −16
Original line number Diff line number Diff line
@@ -33,10 +33,8 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)

CPPFLAGS_vdso.lds += -P -C

VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
			-Wl,--no-undefined \
			-Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
			$(DISABLE_LTO)
VDSO_LDFLAGS_vdso.lds = -m elf64_sparc -soname linux-vdso.so.1 --no-undefined \
			-z max-page-size=8192 -z common-page-size=8192

$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
	$(call if_changed,vdso)
@@ -54,13 +52,14 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
# Don't omit frame pointers for ease of userspace debugging, but do
# optimize sibling calls.
#
CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \
       -m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \
       -ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \
       $(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \
       -foptimize-sibling-calls -DBUILD_VDSO
CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 \
       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
       -fno-omit-frame-pointer -foptimize-sibling-calls \
       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO

$(vobjs): KBUILD_CFLAGS += $(CFL)
SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7

$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)

#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -73,7 +72,7 @@ $(obj)/%.so: $(obj)/%.so.dbg
	$(call if_changed,objcopy)

CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf32_sparc,-soname=linux-gate.so.1
VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1

#This makes sure the $(obj) subdirectory exists even though vdso32/
#is not a kbuild sub-make subdirectory
@@ -91,7 +90,8 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic -mno-app-regs -ffixed-g7
KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
@@ -109,12 +109,13 @@ $(obj)/vdso32.so.dbg: FORCE \
# The DSO images are built using a special linker script.
#
quiet_cmd_vdso = VDSO    $@
      cmd_vdso = $(CC) -nostdlib -o $@ \
      cmd_vdso = $(LD) -nostdlib -o $@ \
		       $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
		       -T $(filter %.lds,$^) $(filter %.o,$^) && \
		sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@'

VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
	$(call ld-option, --build-id) -Bsymbolic
GCOV_PROFILE := n

#
+10 −0
Original line number Diff line number Diff line
#!/bin/sh
objdump="$1"
file="$2"
$objdump -t "$file" | grep '*UUND*' | grep -v '#scratch' > /dev/null 2>&1
if [ $? -eq 1 ]; then
    exit 0
else
    echo "$file: undefined symbols found" >&2
    exit 1
fi
Loading