Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 529bf6be authored by Dipankar Sarma's avatar Dipankar Sarma Committed by Linus Torvalds
Browse files

[PATCH] fix file counting



I have benchmarked this on an x86_64 NUMA system and see no significant
performance difference on kernbench.  Tested on both x86_64 and powerpc.

The way we do file struct accounting is not very suitable for batched
freeing.  For scalability reasons, file accounting was
constructor/destructor based.  This meant that nr_files was decremented
only when the object was removed from the slab cache.  This is susceptible
to slab fragmentation.  With RCU based file structure, consequent batched
freeing and a test program like Serge's, we just speed this up and end up
with a very fragmented slab -

llm22:~ # cat /proc/sys/fs/file-nr
587730  0       758844

At the same time, I see only a 2000+ objects in filp cache.  The following
patch I fixes this problem.

This patch changes the file counting by removing the filp_count_lock.
Instead we use a separate percpu counter, nr_files, for now and all
accesses to it are through get_nr_files() api.  In the sysctl handler for
nr_files, we populate files_stat.nr_files before returning to user.

Counting files as an when they are created and destroyed (as opposed to
inside slab) allows us to correctly count open files with RCU.

Signed-off-by: default avatarDipankar Sarma <dipankar@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 21a1ea9e
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -1736,7 +1736,7 @@ void __init vfs_caches_init(unsigned long mempages)
			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);


	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);


	dcache_init(mempages);
	dcache_init(mempages);
	inode_init(mempages);
	inode_init(mempages);
+55 −32
Original line number Original line Diff line number Diff line
@@ -5,6 +5,7 @@
 *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
 *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
 */
 */


#include <linux/config.h>
#include <linux/string.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/file.h>
@@ -19,52 +20,67 @@
#include <linux/capability.h>
#include <linux/capability.h>
#include <linux/cdev.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>
#include <linux/fsnotify.h>
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>

#include <asm/atomic.h>


/* sysctl tunables... */
/* sysctl tunables... */
struct files_stat_struct files_stat = {
struct files_stat_struct files_stat = {
	.max_files = NR_FILE
	.max_files = NR_FILE
};
};


EXPORT_SYMBOL(files_stat); /* Needed by unix.o */

/* public. Not pretty! */
/* public. Not pretty! */
__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);


static DEFINE_SPINLOCK(filp_count_lock);
static struct percpu_counter nr_files __cacheline_aligned_in_smp;


/* slab constructors and destructors are called from arbitrary
static inline void file_free_rcu(struct rcu_head *head)
 * context and must be fully threaded - use a local spinlock
 * to protect files_stat.nr_files
 */
void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags)
{
{
	if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
	    SLAB_CTOR_CONSTRUCTOR) {
	kmem_cache_free(filp_cachep, f);
		unsigned long flags;
		spin_lock_irqsave(&filp_count_lock, flags);
		files_stat.nr_files++;
		spin_unlock_irqrestore(&filp_count_lock, flags);
}
}

static inline void file_free(struct file *f)
{
	percpu_counter_dec(&nr_files);
	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
}


void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags)
/*
 * Return the total number of open files in the system
 */
static int get_nr_files(void)
{
{
	unsigned long flags;
	return percpu_counter_read_positive(&nr_files);
	spin_lock_irqsave(&filp_count_lock, flags);
	files_stat.nr_files--;
	spin_unlock_irqrestore(&filp_count_lock, flags);
}
}


static inline void file_free_rcu(struct rcu_head *head)
/*
 * Return the maximum number of open files in the system
 */
int get_max_files(void)
{
{
	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
	return files_stat.max_files;
	kmem_cache_free(filp_cachep, f);
}
}
EXPORT_SYMBOL_GPL(get_max_files);


static inline void file_free(struct file *f)
/*
 * Handle nr_files sysctl
 */
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
int proc_nr_files(ctl_table *table, int write, struct file *filp,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
{
{
	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
	files_stat.nr_files = get_nr_files();
	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
}
#else
int proc_nr_files(ctl_table *table, int write, struct file *filp,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
{
	return -ENOSYS;
}
}
#endif


/* Find an unused file structure and return a pointer to it.
/* Find an unused file structure and return a pointer to it.
 * Returns NULL, if there are no more free file structures or
 * Returns NULL, if there are no more free file structures or
@@ -78,14 +94,20 @@ struct file *get_empty_filp(void)
	/*
	/*
	 * Privileged users can go above max_files
	 * Privileged users can go above max_files
	 */
	 */
	if (files_stat.nr_files >= files_stat.max_files &&
	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
				!capable(CAP_SYS_ADMIN))
		/*
		 * percpu_counters are inaccurate.  Do an expensive check before
		 * we go and fail.
		 */
		if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
			goto over;
			goto over;
	}


	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
	if (f == NULL)
	if (f == NULL)
		goto fail;
		goto fail;


	percpu_counter_inc(&nr_files);
	memset(f, 0, sizeof(*f));
	memset(f, 0, sizeof(*f));
	if (security_file_alloc(f))
	if (security_file_alloc(f))
		goto fail_sec;
		goto fail_sec;
@@ -101,10 +123,10 @@ struct file *get_empty_filp(void)


over:
over:
	/* Ran out of filps - report that */
	/* Ran out of filps - report that */
	if (files_stat.nr_files > old_max) {
	if (get_nr_files() > old_max) {
		printk(KERN_INFO "VFS: file-max limit %d reached\n",
		printk(KERN_INFO "VFS: file-max limit %d reached\n",
					files_stat.max_files);
					get_max_files());
		old_max = files_stat.nr_files;
		old_max = get_nr_files();
	}
	}
	goto fail;
	goto fail;


@@ -276,4 +298,5 @@ void __init files_init(unsigned long mempages)
	if (files_stat.max_files < NR_FILE)
	if (files_stat.max_files < NR_FILE)
		files_stat.max_files = NR_FILE;
		files_stat.max_files = NR_FILE;
	files_defer_init();
	files_defer_init();
	percpu_counter_init(&nr_files);
} 
} 
+0 −2
Original line number Original line Diff line number Diff line
@@ -60,8 +60,6 @@ extern void put_filp(struct file *);
extern int get_unused_fd(void);
extern int get_unused_fd(void);
extern void FASTCALL(put_unused_fd(unsigned int fd));
extern void FASTCALL(put_unused_fd(unsigned int fd));
struct kmem_cache;
struct kmem_cache;
extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags);
extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags);


extern struct file ** alloc_fd_array(int);
extern struct file ** alloc_fd_array(int);
extern void free_fd_array(struct file **, int);
extern void free_fd_array(struct file **, int);
+1 −0
Original line number Original line Diff line number Diff line
@@ -35,6 +35,7 @@ struct files_stat_struct {
	int max_files;		/* tunable */
	int max_files;		/* tunable */
};
};
extern struct files_stat_struct files_stat;
extern struct files_stat_struct files_stat;
extern int get_max_files(void);


struct inodes_stat_t {
struct inodes_stat_t {
	int nr_inodes;
	int nr_inodes;
+4 −1
Original line number Original line Diff line number Diff line
@@ -50,6 +50,9 @@
#include <asm/uaccess.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/processor.h>


extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
                     void __user *buffer, size_t *lenp, loff_t *ppos);

#if defined(CONFIG_SYSCTL)
#if defined(CONFIG_SYSCTL)


/* External variables not in a header file. */
/* External variables not in a header file. */
@@ -943,7 +946,7 @@ static ctl_table fs_table[] = {
		.data		= &files_stat,
		.data		= &files_stat,
		.maxlen		= 3*sizeof(int),
		.maxlen		= 3*sizeof(int),
		.mode		= 0444,
		.mode		= 0444,
		.proc_handler	= &proc_dointvec,
		.proc_handler	= &proc_nr_files,
	},
	},
	{
	{
		.ctl_name	= FS_MAXFILE,
		.ctl_name	= FS_MAXFILE,
Loading