Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 954e482b authored by Fenghua Yu's avatar Fenghua Yu Committed by H. Peter Anvin
Browse files

x86/copy_user_generic: Optimize copy_user_generic with CPU erms feature



According to Intel 64 and IA-32 SDM and Optimization Reference Manual, beginning
with Ivybridge, REG string operation using MOVSB and STOSB can provide both
flexible and high-performance REG string operations in cases like memory copy.
Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/
STOSB).

If CPU erms feature is detected, patch copy_user_generic with enhanced fast
string version of copy_user_generic.

A few new macros are defined to reduce duplicate code in ALTERNATIVE and
ALTERNATIVE_2.

Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1337908785-14015-1-git-send-email-fenghua.yu@intel.com


Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 2f747590
Loading
Loading
Loading
Loading
+59 −15
Original line number Original line Diff line number Diff line
@@ -75,22 +75,53 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
}
#endif	/* CONFIG_SMP */
#endif	/* CONFIG_SMP */


#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n"

#define b_replacement(number)	"663"#number
#define e_replacement(number)	"664"#number

#define alt_slen "662b-661b"
#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"

#define ALTINSTR_ENTRY(feature, number)					      \
	" .long 661b - .\n"				/* label           */ \
	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \
	" .word " __stringify(feature) "\n"		/* feature bit     */ \
	" .byte " alt_slen "\n"				/* source len      */ \
	" .byte " alt_rlen(number) "\n"			/* replacement len */

#define DISCARD_ENTRY(number)				/* rlen <= slen */    \
	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"

#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \
	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"

/* alternative assembly primitive: */
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature)			\
#define ALTERNATIVE(oldinstr, newinstr, feature)			\
									\
	OLDINSTR(oldinstr)						\
      "661:\n\t" oldinstr "\n662:\n"					\
	".section .altinstructions,\"a\"\n"				\
	".section .altinstructions,\"a\"\n"				\
      "	 .long 661b - .\n"			/* label           */	\
	ALTINSTR_ENTRY(feature, 1)					\
      "	 .long 663f - .\n"			/* new instruction */	\
      "	 .word " __stringify(feature) "\n"	/* feature bit     */	\
      "	 .byte 662b-661b\n"			/* sourcelen       */	\
      "	 .byte 664f-663f\n"			/* replacementlen  */	\
	".previous\n"							\
	".previous\n"							\
	".section .discard,\"aw\",@progbits\n"				\
	".section .discard,\"aw\",@progbits\n"				\
      "	 .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */	\
	DISCARD_ENTRY(1)						\
	".previous\n"							\
	".previous\n"							\
	".section .altinstr_replacement, \"ax\"\n"			\
	".section .altinstr_replacement, \"ax\"\n"			\
      "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\
	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
	".previous"

#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
	OLDINSTR(oldinstr)						\
	".section .altinstructions,\"a\"\n"				\
	ALTINSTR_ENTRY(feature1, 1)					\
	ALTINSTR_ENTRY(feature2, 2)					\
	".previous\n"							\
	".section .discard,\"aw\",@progbits\n"				\
	DISCARD_ENTRY(1)						\
	DISCARD_ENTRY(2)						\
	".previous\n"							\
	".section .altinstr_replacement, \"ax\"\n"			\
	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
	".previous"
	".previous"


/*
/*
@@ -139,6 +170,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
	asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
	asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)


/*
 * Like alternative_call, but there are two features and respective functions.
 * If CPU has feature2, function2 is used.
 * Otherwise, if CPU has feature1, function1 is used.
 * Otherwise, old function is used.
 */
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
			   output, input...)				      \
	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
		"call %P[new2]", feature2)				      \
		: output : [old] "i" (oldfunc), [new1] "i" (newfunc1),	      \
		[new2] "i" (newfunc2), ## input)

/*
/*
 * use this macro(s) if you need more than one output parameter
 * use this macro(s) if you need more than one output parameter
 * in alternative_io
 * in alternative_io
+10 −1
Original line number Original line Diff line number Diff line
@@ -17,6 +17,8 @@


/* Handles exceptions in both to and from, but doesn't do access_ok */
/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
__must_check unsigned long
copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
__must_check unsigned long
copy_user_generic_string(void *to, const void *from, unsigned len);
copy_user_generic_string(void *to, const void *from, unsigned len);
__must_check unsigned long
__must_check unsigned long
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)
{
{
	unsigned ret;
	unsigned ret;


	alternative_call(copy_user_generic_unrolled,
	/*
	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
	 * Otherwise, use copy_user_generic_unrolled.
	 */
	alternative_call_2(copy_user_generic_unrolled,
			 copy_user_generic_string,
			 copy_user_generic_string,
			 X86_FEATURE_REP_GOOD,
			 X86_FEATURE_REP_GOOD,
			 copy_user_enhanced_fast_string,
			 X86_FEATURE_ERMS,
			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
				     "=d" (len)),
				     "=d" (len)),
			 "1" (to), "2" (from), "3" (len)
			 "1" (to), "2" (from), "3" (len)
+1 −0
Original line number Original line Diff line number Diff line
@@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8);


EXPORT_SYMBOL(copy_user_generic_string);
EXPORT_SYMBOL(copy_user_generic_string);
EXPORT_SYMBOL(copy_user_generic_unrolled);
EXPORT_SYMBOL(copy_user_generic_unrolled);
EXPORT_SYMBOL(copy_user_enhanced_fast_string);
EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
EXPORT_SYMBOL(_copy_to_user);