Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7339ff83 authored by Robin Holt's avatar Robin Holt Committed by Linus Torvalds
Browse files

[PATCH] Add tmpfs options for memory placement policies



Anything that writes into a tmpfs filesystem is liable to disproportionately
decrease the available memory on a particular node.  Since there's no telling
what sort of application (e.g.  dd/cp/cat) might be dropping large files
there, this lets the admin choose the appropriate default behavior for their
site's situation.

Introduce a tmpfs mount option which allows specifying a memory policy and
a second option to specify the nodelist for that policy.  With the default
policy, tmpfs will behave as it does today.  This patch adds support for
preferred, bind, and interleave policies.

The default policy will cause pages to be added to tmpfs files on the node
which is doing the writing.  Some jobs expect a single process to create
and manage the tmpfs files.  This results in a node which has a
significantly reduced number of free pages.

With this patch, the administrator can specify the policy and nodes for
that policy where they would prefer allocations.

This patch was originally written by Brent Casavant and Hugh Dickins.  I
added support for the bind and preferred policies and the mpol_nodelist
mount option.

Signed-off-by: default avatarBrent Casavant <bcasavan@sgi.com>
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarRobin Holt <holt@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 852cf918
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -78,6 +78,18 @@ use up all the memory on the machine; but enhances the scalability of
that instance in a system with many cpus making intensive use of it.


tmpfs has a mount option to set the NUMA memory allocation policy for
all files in that instance:
mpol=interleave		prefers to allocate memory from each node in turn
mpol=default		prefers to allocate memory from the local node
mpol=bind		prefers to allocate from mpol_nodelist
mpol=preferred		prefers to allocate from first node in mpol_nodelist

The following mount option is used in conjunction with mpol=interleave,
mpol=bind or mpol=preferred:
mpol_nodelist:	nodelist suitable for parsing with nodelist_parse.


To specify the initial root directory you can use the following mount
options:

+1 −1
Original line number Diff line number Diff line
@@ -402,7 +402,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		info = HUGETLBFS_I(inode);
		mpol_shared_policy_init(&info->policy);
		mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
		switch (mode & S_IFMT) {
		default:
			init_special_inode(inode, mode, dev);
+4 −7
Original line number Diff line number Diff line
@@ -132,12 +132,8 @@ struct shared_policy {
	spinlock_t lock;
};

static inline void mpol_shared_policy_init(struct shared_policy *info)
{
	info->root = RB_ROOT;
	spin_lock_init(&info->lock);
}

void mpol_shared_policy_init(struct shared_policy *info, int policy,
				nodemask_t *nodes);
int mpol_set_shared_policy(struct shared_policy *info,
				struct vm_area_struct *vma,
				struct mempolicy *new);
@@ -211,7 +207,8 @@ static inline int mpol_set_shared_policy(struct shared_policy *info,
	return -EINVAL;
}

static inline void mpol_shared_policy_init(struct shared_policy *info)
static inline void mpol_shared_policy_init(struct shared_policy *info,
					int policy, nodemask_t *nodes)
{
}

+2 −0
Original line number Diff line number Diff line
@@ -26,6 +26,8 @@ struct shmem_sb_info {
	unsigned long free_blocks;  /* How many are left for allocation */
	unsigned long max_inodes;   /* How many inodes are allowed */
	unsigned long free_inodes;  /* How many are left for allocation */
	int policy;		    /* Default NUMA memory alloc policy */
	nodemask_t policy_nodes;    /* nodemask for preferred and bind */
	spinlock_t    stat_lock;
};

+24 −0
Original line number Diff line number Diff line
@@ -1359,6 +1359,30 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
	return 0;
}

void mpol_shared_policy_init(struct shared_policy *info, int policy,
				nodemask_t *policy_nodes)
{
	info->root = RB_ROOT;
	spin_lock_init(&info->lock);

	if (policy != MPOL_DEFAULT) {
		struct mempolicy *newpol;

		/* Falls back to MPOL_DEFAULT on any error */
		newpol = mpol_new(policy, policy_nodes);
		if (!IS_ERR(newpol)) {
			/* Create pseudo-vma that contains just the policy */
			struct vm_area_struct pvma;

			memset(&pvma, 0, sizeof(struct vm_area_struct));
			/* Policy covers entire file */
			pvma.vm_end = TASK_SIZE;
			mpol_set_shared_policy(info, &pvma, newpol);
			mpol_free(newpol);
		}
	}
}

int mpol_set_shared_policy(struct shared_policy *info,
			struct vm_area_struct *vma, struct mempolicy *npol)
{
Loading