Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 97ac7350 authored by Amit Arora's avatar Amit Arora Committed by Theodore Ts'o
Browse files

sys_fallocate() implementation on i386, x86_64 and powerpc



fallocate() is a new system call being proposed here which will allow
applications to preallocate space to any file(s) in a file system.
Each file system implementation that wants to use this feature will need
to support an inode operation called ->fallocate().
Applications can use this feature to avoid fragmentation to certain
level and thus get faster access speed. With preallocation, applications
also get a guarantee of space for particular file(s) - even if later the
the system becomes full.

Currently, glibc provides an interface called posix_fallocate() which
can be used for similar cause. Though this has the advantage of working
on all file systems, but it is quite slow (since it writes zeroes to
each block that has to be preallocated). Without a doubt, file systems
can do this more efficiently within the kernel, by implementing
the proposed fallocate() system call. It is expected that
posix_fallocate() will be modified to call this new system call first
and incase the kernel/filesystem does not implement it, it should fall
back to the current implementation of writing zeroes to the new blocks.
ToDos:
1. Implementation on other architectures (other than i386, x86_64,
   and ppc). Patches for s390(x) and ia64 are already available from
   previous posts, but it was decided that they should be added later
   once fallocate is in the mainline. Hence not including those patches
   in this take.
2. Changes to glibc,
   a) to support fallocate() system call
   b) to make posix_fallocate() and posix_fallocate64() call fallocate()

Signed-off-by: default avatarAmit Arora <aarora@in.ibm.com>
parent cb32da04
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -323,3 +323,4 @@ ENTRY(sys_call_table)
	.long sys_signalfd
	.long sys_timerfd
	.long sys_eventfd
	.long sys_fallocate
+7 −0
Original line number Diff line number Diff line
@@ -773,6 +773,13 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
	return sys_truncate(path, (high << 32) | low);
}

asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
				     u32 lenhi, u32 lenlo)
{
	return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
			     ((loff_t)lenhi << 32) | lenlo);
}

asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
				 unsigned long low)
{
+1 −0
Original line number Diff line number Diff line
@@ -719,4 +719,5 @@ ia32_sys_call_table:
	.quad compat_sys_signalfd
	.quad compat_sys_timerfd
	.quad sys_eventfd
	.quad sys32_fallocate
ia32_syscall_end:
+8 −0
Original line number Diff line number Diff line
@@ -879,3 +879,11 @@ asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
	return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
				len, advice);
}

asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
				unsigned offset_hi, unsigned len_lo,
				unsigned len_hi)
{
	return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
			     ((u64)len_hi << 32) | len_lo);
}
+59 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/syscalls.h>
#include <linux/rcupdate.h>
#include <linux/audit.h>
#include <linux/falloc.h>

int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
@@ -352,6 +353,64 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
}
#endif

asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
	struct file *file;
	struct inode *inode;
	long ret = -EINVAL;

	if (offset < 0 || len <= 0)
		goto out;

	/* Return error if mode is not supported */
	ret = -EOPNOTSUPP;
	if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
		goto out;

	ret = -EBADF;
	file = fget(fd);
	if (!file)
		goto out;
	if (!(file->f_mode & FMODE_WRITE))
		goto out_fput;
	/*
	 * Revalidate the write permissions, in case security policy has
	 * changed since the files were opened.
	 */
	ret = security_file_permission(file, MAY_WRITE);
	if (ret)
		goto out_fput;

	inode = file->f_path.dentry->d_inode;

	ret = -ESPIPE;
	if (S_ISFIFO(inode->i_mode))
		goto out_fput;

	ret = -ENODEV;
	/*
	 * Let individual file system decide if it supports preallocation
	 * for directories or not.
	 */
	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
		goto out_fput;

	ret = -EFBIG;
	/* Check for wrap through zero too */
	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
		goto out_fput;

	if (inode->i_op && inode->i_op->fallocate)
		ret = inode->i_op->fallocate(inode, mode, offset, len);
	else
		ret = -ENOSYS;

out_fput:
	fput(file);
out:
	return ret;
}

/*
 * access() needs to use the real uid/gid, not the effective uid/gid.
 * We do this by temporarily clearing all FS-related capabilities and
Loading