Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4612c715 authored by Denys Vlasenko's avatar Denys Vlasenko Committed by Brian Norris
Browse files

mtd: cfi: deinline large functions

With this .config: http://busybox.net/~vda/kernel_config

,
after uninlining these functions have sizes and callsite counts
as follows:

cfi_udelay(): 74 bytes, 26 callsites
cfi_send_gen_cmd(): 153 bytes, 95 callsites
cfi_build_cmd(): 274 bytes, 123 callsites
cfi_build_cmd_addr(): 49 bytes, 15 callsites
cfi_merge_status(): 230 bytes, 3 callsites

Reduction in code size is about 50,000:

    text     data      bss       dec     hex filename
85842882 22294584 20627456 128764922 7accbfa vmlinux.before
85789648 22294616 20627456 128711720 7abfc28 vmlinux

Signed-off-by: default avatarDenys Vlasenko <dvlasenk@redhat.com>
CC: Dan Carpenter <dan.carpenter@oracle.com>
CC: Jingoo Han <jg1.han@samsung.com>
CC: Brian Norris <computersforpeace@gmail.com>
CC: Aaron Sierra <asierra@xes-inc.com>
CC: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
CC: David Woodhouse <David.Woodhouse@intel.com>
CC: linux-mtd@lists.infradead.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: default avatarBrian Norris <computersforpeace@gmail.com>
parent c3c263a8
Loading
Loading
Loading
Loading
+188 −0
Original line number Diff line number Diff line
@@ -23,6 +23,194 @@
#include <linux/mtd/map.h>
#include <linux/mtd/cfi.h>

void cfi_udelay(int us)
{
	if (us >= 1000) {
		msleep((us+999)/1000);
	} else {
		udelay(us);
		cond_resched();
	}
}
EXPORT_SYMBOL(cfi_udelay);

/*
 * Returns the command address according to the given geometry.
 */
uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
				struct map_info *map, struct cfi_private *cfi)
{
	unsigned bankwidth = map_bankwidth(map);
	unsigned interleave = cfi_interleave(cfi);
	unsigned type = cfi->device_type;
	uint32_t addr;

	addr = (cmd_ofs * type) * interleave;

	/* Modify the unlock address if we are in compatibility mode.
	 * For 16bit devices on 8 bit busses
	 * and 32bit devices on 16 bit busses
	 * set the low bit of the alternating bit sequence of the address.
	 */
	if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa))
		addr |= (type >> 1)*interleave;

	return  addr;
}
EXPORT_SYMBOL(cfi_build_cmd_addr);

/*
 * Transforms the CFI command for the given geometry (bus width & interleave).
 * It looks too long to be inline, but in the common case it should almost all
 * get optimised away.
 */
map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi)
{
	map_word val = { {0} };
	int wordwidth, words_per_bus, chip_mode, chips_per_word;
	unsigned long onecmd;
	int i;

	/* We do it this way to give the compiler a fighting chance
	   of optimising away all the crap for 'bankwidth' larger than
	   an unsigned long, in the common case where that support is
	   disabled */
	if (map_bankwidth_is_large(map)) {
		wordwidth = sizeof(unsigned long);
		words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1
	} else {
		wordwidth = map_bankwidth(map);
		words_per_bus = 1;
	}

	chip_mode = map_bankwidth(map) / cfi_interleave(cfi);
	chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map);

	/* First, determine what the bit-pattern should be for a single
	   device, according to chip mode and endianness... */
	switch (chip_mode) {
	default: BUG();
	case 1:
		onecmd = cmd;
		break;
	case 2:
		onecmd = cpu_to_cfi16(map, cmd);
		break;
	case 4:
		onecmd = cpu_to_cfi32(map, cmd);
		break;
	}

	/* Now replicate it across the size of an unsigned long, or
	   just to the bus width as appropriate */
	switch (chips_per_word) {
	default: BUG();
#if BITS_PER_LONG >= 64
	case 8:
		onecmd |= (onecmd << (chip_mode * 32));
#endif
	case 4:
		onecmd |= (onecmd << (chip_mode * 16));
	case 2:
		onecmd |= (onecmd << (chip_mode * 8));
	case 1:
		;
	}

	/* And finally, for the multi-word case, replicate it
	   in all words in the structure */
	for (i=0; i < words_per_bus; i++) {
		val.x[i] = onecmd;
	}

	return val;
}
EXPORT_SYMBOL(cfi_build_cmd);

unsigned long cfi_merge_status(map_word val, struct map_info *map,
					   struct cfi_private *cfi)
{
	int wordwidth, words_per_bus, chip_mode, chips_per_word;
	unsigned long onestat, res = 0;
	int i;

	/* We do it this way to give the compiler a fighting chance
	   of optimising away all the crap for 'bankwidth' larger than
	   an unsigned long, in the common case where that support is
	   disabled */
	if (map_bankwidth_is_large(map)) {
		wordwidth = sizeof(unsigned long);
		words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1
	} else {
		wordwidth = map_bankwidth(map);
		words_per_bus = 1;
	}

	chip_mode = map_bankwidth(map) / cfi_interleave(cfi);
	chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map);

	onestat = val.x[0];
	/* Or all status words together */
	for (i=1; i < words_per_bus; i++) {
		onestat |= val.x[i];
	}

	res = onestat;
	switch(chips_per_word) {
	default: BUG();
#if BITS_PER_LONG >= 64
	case 8:
		res |= (onestat >> (chip_mode * 32));
#endif
	case 4:
		res |= (onestat >> (chip_mode * 16));
	case 2:
		res |= (onestat >> (chip_mode * 8));
	case 1:
		;
	}

	/* Last, determine what the bit-pattern should be for a single
	   device, according to chip mode and endianness... */
	switch (chip_mode) {
	case 1:
		break;
	case 2:
		res = cfi16_to_cpu(map, res);
		break;
	case 4:
		res = cfi32_to_cpu(map, res);
		break;
	default: BUG();
	}
	return res;
}
EXPORT_SYMBOL(cfi_merge_status);

/*
 * Sends a CFI command to a bank of flash for the given geometry.
 *
 * Returns the offset in flash where the command was written.
 * If prev_val is non-null, it will be set to the value at the command address,
 * before the command was written.
 */
uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base,
				struct map_info *map, struct cfi_private *cfi,
				int type, map_word *prev_val)
{
	map_word val;
	uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi);
	val = cfi_build_cmd(cmd, map, cfi);

	if (prev_val)
		*prev_val = map_read(map, addr);

	map_write(map, val, addr);

	return addr - base;
}
EXPORT_SYMBOL(cfi_send_gen_cmd);

int __xipram cfi_qry_present(struct map_info *map, __u32 base,
			     struct cfi_private *cfi)
{
+8 −180
Original line number Diff line number Diff line
@@ -296,183 +296,19 @@ struct cfi_private {
	struct flchip chips[0];  /* per-chip data structure for each chip */
};

/*
 * Returns the command address according to the given geometry.
 */
static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
				struct map_info *map, struct cfi_private *cfi)
{
	unsigned bankwidth = map_bankwidth(map);
	unsigned interleave = cfi_interleave(cfi);
	unsigned type = cfi->device_type;
	uint32_t addr;
uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
				struct map_info *map, struct cfi_private *cfi);

	addr = (cmd_ofs * type) * interleave;

	/* Modify the unlock address if we are in compatibility mode.
	 * For 16bit devices on 8 bit busses
	 * and 32bit devices on 16 bit busses
	 * set the low bit of the alternating bit sequence of the address.
	 */
	if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa))
		addr |= (type >> 1)*interleave;

	return  addr;
}

/*
 * Transforms the CFI command for the given geometry (bus width & interleave).
 * It looks too long to be inline, but in the common case it should almost all
 * get optimised away.
 */
static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi)
{
	map_word val = { {0} };
	int wordwidth, words_per_bus, chip_mode, chips_per_word;
	unsigned long onecmd;
	int i;

	/* We do it this way to give the compiler a fighting chance
	   of optimising away all the crap for 'bankwidth' larger than
	   an unsigned long, in the common case where that support is
	   disabled */
	if (map_bankwidth_is_large(map)) {
		wordwidth = sizeof(unsigned long);
		words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1
	} else {
		wordwidth = map_bankwidth(map);
		words_per_bus = 1;
	}

	chip_mode = map_bankwidth(map) / cfi_interleave(cfi);
	chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map);

	/* First, determine what the bit-pattern should be for a single
	   device, according to chip mode and endianness... */
	switch (chip_mode) {
	default: BUG();
	case 1:
		onecmd = cmd;
		break;
	case 2:
		onecmd = cpu_to_cfi16(map, cmd);
		break;
	case 4:
		onecmd = cpu_to_cfi32(map, cmd);
		break;
	}

	/* Now replicate it across the size of an unsigned long, or
	   just to the bus width as appropriate */
	switch (chips_per_word) {
	default: BUG();
#if BITS_PER_LONG >= 64
	case 8:
		onecmd |= (onecmd << (chip_mode * 32));
#endif
	case 4:
		onecmd |= (onecmd << (chip_mode * 16));
	case 2:
		onecmd |= (onecmd << (chip_mode * 8));
	case 1:
		;
	}

	/* And finally, for the multi-word case, replicate it
	   in all words in the structure */
	for (i=0; i < words_per_bus; i++) {
		val.x[i] = onecmd;
	}

	return val;
}
map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi);
#define CMD(x)  cfi_build_cmd((x), map, cfi)


static inline unsigned long cfi_merge_status(map_word val, struct map_info *map,
					   struct cfi_private *cfi)
{
	int wordwidth, words_per_bus, chip_mode, chips_per_word;
	unsigned long onestat, res = 0;
	int i;

	/* We do it this way to give the compiler a fighting chance
	   of optimising away all the crap for 'bankwidth' larger than
	   an unsigned long, in the common case where that support is
	   disabled */
	if (map_bankwidth_is_large(map)) {
		wordwidth = sizeof(unsigned long);
		words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1
	} else {
		wordwidth = map_bankwidth(map);
		words_per_bus = 1;
	}

	chip_mode = map_bankwidth(map) / cfi_interleave(cfi);
	chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map);

	onestat = val.x[0];
	/* Or all status words together */
	for (i=1; i < words_per_bus; i++) {
		onestat |= val.x[i];
	}

	res = onestat;
	switch(chips_per_word) {
	default: BUG();
#if BITS_PER_LONG >= 64
	case 8:
		res |= (onestat >> (chip_mode * 32));
#endif
	case 4:
		res |= (onestat >> (chip_mode * 16));
	case 2:
		res |= (onestat >> (chip_mode * 8));
	case 1:
		;
	}

	/* Last, determine what the bit-pattern should be for a single
	   device, according to chip mode and endianness... */
	switch (chip_mode) {
	case 1:
		break;
	case 2:
		res = cfi16_to_cpu(map, res);
		break;
	case 4:
		res = cfi32_to_cpu(map, res);
		break;
	default: BUG();
	}
	return res;
}

unsigned long cfi_merge_status(map_word val, struct map_info *map,
					   struct cfi_private *cfi);
#define MERGESTATUS(x) cfi_merge_status((x), map, cfi)


/*
 * Sends a CFI command to a bank of flash for the given geometry.
 *
 * Returns the offset in flash where the command was written.
 * If prev_val is non-null, it will be set to the value at the command address,
 * before the command was written.
 */
static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base,
uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base,
				struct map_info *map, struct cfi_private *cfi,
				int type, map_word *prev_val)
{
	map_word val;
	uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi);
	val = cfi_build_cmd(cmd, map, cfi);

	if (prev_val)
		*prev_val = map_read(map, addr);

	map_write(map, val, addr);

	return addr - base;
}
				int type, map_word *prev_val);

static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr)
{
@@ -506,15 +342,7 @@ static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr)
	}
}

static inline void cfi_udelay(int us)
{
	if (us >= 1000) {
		msleep((us+999)/1000);
	} else {
		udelay(us);
		cond_resched();
	}
}
void cfi_udelay(int us);

int __xipram cfi_qry_present(struct map_info *map, __u32 base,
			     struct cfi_private *cfi);