Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 393d1d1d authored by Dr. Tilmann Bubeck's avatar Dr. Tilmann Bubeck Committed by Theodore Ts'o
Browse files

ext4: implementation of a new ioctl called EXT4_IOC_SWAP_BOOT



Add a new ioctl, EXT4_IOC_SWAP_BOOT which swaps i_blocks and
associated attributes (like i_blocks, i_size, i_flags, ...) from the
specified inode with inode EXT4_BOOT_LOADER_INO (#5). This is
typically used to store a boot loader in a secure part of the
filesystem, where it can't be changed by a normal user by accident.
The data blocks of the previous boot loader will be associated with
the given inode.

This usercode program is a simple example of the usage:

int main(int argc, char *argv[])
{
  int fd;
  int err;

  if ( argc != 2 ) {
    printf("usage: ext4-swap-boot-inode FILE-TO-SWAP\n");
    exit(1);
  }

  fd = open(argv[1], O_WRONLY);
  if ( fd < 0 ) {
    perror("open");
    exit(1);
  }

  err = ioctl(fd, EXT4_IOC_SWAP_BOOT);
  if ( err < 0 ) {
    perror("ioctl");
    exit(1);
  }

  close(fd);
  exit(0);
}

[ Modified by Theodore Ts'o to fix a number of bugs in the original code.]

Signed-off-by: default avatarDr. Tilmann Bubeck <t.bubeck@reinform.de>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent f78ee70d
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -587,6 +587,16 @@ Table of Ext4 specific ioctls
			      bitmaps and inode table, the userspace tool thus
			      just passes the new number of blocks.

EXT4_IOC_SWAP_BOOT	      Swap i_blocks and associated attributes
			      (like i_blocks, i_size, i_flags, ...) from
			      the specified inode with inode
			      EXT4_BOOT_LOADER_INO (#5). This is typically
			      used to store a boot loader in a secure part of
			      the filesystem, where it can't be changed by a
			      normal user by accident.
			      The data blocks of the previous boot loader
			      will be associated with the given inode.

..............................................................................

References
+8 −0
Original line number Diff line number Diff line
@@ -616,6 +616,7 @@ enum {
#define EXT4_IOC_ALLOC_DA_BLKS		_IO('f', 12)
#define EXT4_IOC_MOVE_EXT		_IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS		_IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT		_IO('f', 17)

#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -1341,6 +1342,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
	return ino == EXT4_ROOT_INO ||
		ino == EXT4_USR_QUOTA_INO ||
		ino == EXT4_GRP_QUOTA_INO ||
		ino == EXT4_BOOT_LOADER_INO ||
		ino == EXT4_JOURNAL_INO ||
		ino == EXT4_RESIZE_INO ||
		(ino >= EXT4_FIRST_INO(sb) &&
@@ -2624,6 +2626,12 @@ extern int ext4_ind_migrate(struct inode *inode);


/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
					    struct inode *second);
extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
					  struct inode *donor_inode);
void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
			     __u64 start_orig, __u64 start_donor,
			     __u64 len, __u64 *moved_len);
+8 −3
Original line number Diff line number Diff line
@@ -4191,8 +4191,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
	 * NeilBrown 1999oct15
	 */
	if (inode->i_nlink == 0) {
		if (inode->i_mode == 0 ||
		    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
		if ((inode->i_mode == 0 ||
		     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
		    ino != EXT4_BOOT_LOADER_INO) {
			/* this inode is deleted */
			ret = -ESTALE;
			goto bad_inode;
@@ -4200,7 +4201,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
		/* The only unlinked inodes we let through here have
		 * valid i_mode and are being read by the orphan
		 * recovery code: that's fine, we're about to complete
		 * the process of deleting those. */
		 * the process of deleting those.
		 * OR it is the EXT4_BOOT_LOADER_INO which is
		 * not initialized on a new filesystem. */
	}
	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
	inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
@@ -4320,6 +4323,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
		else
			init_special_inode(inode, inode->i_mode,
			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
	} else if (ino == EXT4_BOOT_LOADER_INO) {
		make_bad_inode(inode);
	} else {
		ret = -EIO;
		EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
+197 −0
Original line number Diff line number Diff line
@@ -17,9 +17,201 @@
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "ext4_extents.h"

#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)

/**
 * Swap memory between @a and @b for @len bytes.
 *
 * @a:          pointer to first memory area
 * @b:          pointer to second memory area
 * @len:        number of bytes to swap
 *
 */
static void memswap(void *a, void *b, size_t len)
{
	unsigned char *ap, *bp;
	unsigned char tmp;

	ap = (unsigned char *)a;
	bp = (unsigned char *)b;
	while (len-- > 0) {
		tmp = *ap;
		*ap = *bp;
		*bp = tmp;
		ap++;
		bp++;
	}
}

/**
 * Swap i_data and associated attributes between @inode1 and @inode2.
 * This function is used for the primary swap between inode1 and inode2
 * and also to revert this primary swap in case of errors.
 *
 * Therefore you have to make sure, that calling this method twice
 * will revert all changes.
 *
 * @inode1:     pointer to first inode
 * @inode2:     pointer to second inode
 */
static void swap_inode_data(struct inode *inode1, struct inode *inode2)
{
	loff_t isize;
	struct ext4_inode_info *ei1;
	struct ext4_inode_info *ei2;

	ei1 = EXT4_I(inode1);
	ei2 = EXT4_I(inode2);

	memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags));
	memswap(&inode1->i_version, &inode2->i_version,
		  sizeof(inode1->i_version));
	memswap(&inode1->i_blocks, &inode2->i_blocks,
		  sizeof(inode1->i_blocks));
	memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes));
	memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime));
	memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime));

	memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
	memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
	memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
	memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
	memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));

	isize = i_size_read(inode1);
	i_size_write(inode1, i_size_read(inode2));
	i_size_write(inode2, isize);
}

/**
 * Swap the information from the given @inode and the inode
 * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
 * important fields of the inodes.
 *
 * @sb:         the super block of the filesystem
 * @inode:      the inode to swap with EXT4_BOOT_LOADER_INO
 *
 */
static long swap_inode_boot_loader(struct super_block *sb,
				struct inode *inode)
{
	handle_t *handle;
	int err;
	struct inode *inode_bl;
	struct ext4_inode_info *ei;
	struct ext4_inode_info *ei_bl;
	struct ext4_sb_info *sbi;

	if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) {
		err = -EINVAL;
		goto swap_boot_out;
	}

	if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
		err = -EPERM;
		goto swap_boot_out;
	}

	sbi = EXT4_SB(sb);
	ei = EXT4_I(inode);

	inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
	if (IS_ERR(inode_bl)) {
		err = PTR_ERR(inode_bl);
		goto swap_boot_out;
	}
	ei_bl = EXT4_I(inode_bl);

	filemap_flush(inode->i_mapping);
	filemap_flush(inode_bl->i_mapping);

	/* Protect orig inodes against a truncate and make sure,
	 * that only 1 swap_inode_boot_loader is running. */
	ext4_inode_double_lock(inode, inode_bl);

	truncate_inode_pages(&inode->i_data, 0);
	truncate_inode_pages(&inode_bl->i_data, 0);

	/* Wait for all existing dio workers */
	ext4_inode_block_unlocked_dio(inode);
	ext4_inode_block_unlocked_dio(inode_bl);
	inode_dio_wait(inode);
	inode_dio_wait(inode_bl);

	handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
	if (IS_ERR(handle)) {
		err = -EINVAL;
		goto swap_boot_out;
	}

	/* Protect extent tree against block allocations via delalloc */
	ext4_double_down_write_data_sem(inode, inode_bl);

	if (inode_bl->i_nlink == 0) {
		/* this inode has never been used as a BOOT_LOADER */
		set_nlink(inode_bl, 1);
		i_uid_write(inode_bl, 0);
		i_gid_write(inode_bl, 0);
		inode_bl->i_flags = 0;
		ei_bl->i_flags = 0;
		inode_bl->i_version = 1;
		i_size_write(inode_bl, 0);
		inode_bl->i_mode = S_IFREG;
		if (EXT4_HAS_INCOMPAT_FEATURE(sb,
					      EXT4_FEATURE_INCOMPAT_EXTENTS)) {
			ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
			ext4_ext_tree_init(handle, inode_bl);
		} else
			memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
	}

	swap_inode_data(inode, inode_bl);

	inode->i_ctime = inode_bl->i_ctime = ext4_current_time(inode);

	spin_lock(&sbi->s_next_gen_lock);
	inode->i_generation = sbi->s_next_generation++;
	inode_bl->i_generation = sbi->s_next_generation++;
	spin_unlock(&sbi->s_next_gen_lock);

	ext4_discard_preallocations(inode);

	err = ext4_mark_inode_dirty(handle, inode);
	if (err < 0) {
		ext4_warning(inode->i_sb,
			"couldn't mark inode #%lu dirty (err %d)",
			inode->i_ino, err);
		/* Revert all changes: */
		swap_inode_data(inode, inode_bl);
	} else {
		err = ext4_mark_inode_dirty(handle, inode_bl);
		if (err < 0) {
			ext4_warning(inode_bl->i_sb,
				"couldn't mark inode #%lu dirty (err %d)",
				inode_bl->i_ino, err);
			/* Revert all changes: */
			swap_inode_data(inode, inode_bl);
			ext4_mark_inode_dirty(handle, inode);
		}
	}

	ext4_journal_stop(handle);

	ext4_double_up_write_data_sem(inode, inode_bl);

	ext4_inode_resume_unlocked_dio(inode);
	ext4_inode_resume_unlocked_dio(inode_bl);

	ext4_inode_double_unlock(inode, inode_bl);

	iput(inode_bl);

swap_boot_out:
	return err;
}

long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
	struct inode *inode = file_inode(filp);
@@ -353,6 +545,11 @@ group_add_out:
		return err;
	}

	case EXT4_IOC_SWAP_BOOT:
		if (!(filp->f_mode & FMODE_WRITE))
			return -EBADF;
		return swap_inode_boot_loader(sb, inode);

	case EXT4_IOC_RESIZE_FS: {
		ext4_fsblk_t n_blocks_count;
		struct super_block *sb = inode->i_sb;
+25 −23
Original line number Diff line number Diff line
@@ -144,12 +144,13 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
}

/**
 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
 * ext4_double_down_write_data_sem - Acquire two inodes' write lock
 *                                   of i_data_sem
 *
 * Acquire write lock of i_data_sem of the two inodes
 */
static void
double_down_write_data_sem(struct inode *first, struct inode *second)
void
ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
{
	if (first < second) {
		down_write(&EXT4_I(first)->i_data_sem);
@@ -162,14 +163,15 @@ double_down_write_data_sem(struct inode *first, struct inode *second)
}

/**
 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
 * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
 *
 * @orig_inode:		original inode structure to be released its lock first
 * @donor_inode:	donor inode structure to be released its lock second
 * Release write lock of i_data_sem of two inodes (orig and donor).
 */
static void
double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
void
ext4_double_up_write_data_sem(struct inode *orig_inode,
			      struct inode *donor_inode)
{
	up_write(&EXT4_I(orig_inode)->i_data_sem);
	up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -976,7 +978,7 @@ again:
	 * necessary, just swap data blocks between orig and donor.
	 */
	if (uninit) {
		double_down_write_data_sem(orig_inode, donor_inode);
		ext4_double_down_write_data_sem(orig_inode, donor_inode);
		/* If any of extents in range became initialized we have to
		 * fallback to data copying */
		uninit = mext_check_coverage(orig_inode, orig_blk_offset,
@@ -990,7 +992,7 @@ again:
			goto drop_data_sem;

		if (!uninit) {
			double_up_write_data_sem(orig_inode, donor_inode);
			ext4_double_up_write_data_sem(orig_inode, donor_inode);
			goto data_copy;
		}
		if ((page_has_private(pagep[0]) &&
@@ -1004,7 +1006,7 @@ again:
						donor_inode, orig_blk_offset,
						block_len_in_page, err);
	drop_data_sem:
		double_up_write_data_sem(orig_inode, donor_inode);
		ext4_double_up_write_data_sem(orig_inode, donor_inode);
		goto unlock_pages;
	}
data_copy:
@@ -1065,11 +1067,11 @@ repair_branches:
	 * Extents are swapped already, but we are not able to copy data.
	 * Try to swap extents to it's original places
	 */
	double_down_write_data_sem(orig_inode, donor_inode);
	ext4_double_down_write_data_sem(orig_inode, donor_inode);
	replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
					       orig_blk_offset,
					       block_len_in_page, &err2);
	double_up_write_data_sem(orig_inode, donor_inode);
	ext4_double_up_write_data_sem(orig_inode, donor_inode);
	if (replaced_count != block_len_in_page) {
		EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
				       "Unable to copy data block,"
@@ -1209,15 +1211,15 @@ mext_check_arguments(struct inode *orig_inode,
}

/**
 * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
 * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
 *
 * @inode1:	the inode structure
 * @inode2:	the inode structure
 *
 * Lock two inodes' i_mutex
 */
static void
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
void
ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
{
	BUG_ON(inode1 == inode2);
	if (inode1 < inode2) {
@@ -1230,15 +1232,15 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
}

/**
 * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
 * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
 *
 * @inode1:     the inode that is released first
 * @inode2:     the inode that is released second
 *
 */

static void
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
void
ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
	mutex_unlock(&inode1->i_mutex);
	mutex_unlock(&inode2->i_mutex);
@@ -1333,7 +1335,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
		return -EINVAL;
	}
	/* Protect orig and donor inodes against a truncate */
	mext_inode_double_lock(orig_inode, donor_inode);
	ext4_inode_double_lock(orig_inode, donor_inode);

	/* Wait for all existing dio workers */
	ext4_inode_block_unlocked_dio(orig_inode);
@@ -1342,7 +1344,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
	inode_dio_wait(donor_inode);

	/* Protect extent tree against block allocations via delalloc */
	double_down_write_data_sem(orig_inode, donor_inode);
	ext4_double_down_write_data_sem(orig_inode, donor_inode);
	/* Check the filesystem environment whether move_extent can be done */
	ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
				    donor_start, &len);
@@ -1466,7 +1468,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
		 * b. racing with ->readpage, ->write_begin, and ext4_get_block
		 *    in move_extent_per_page
		 */
		double_up_write_data_sem(orig_inode, donor_inode);
		ext4_double_up_write_data_sem(orig_inode, donor_inode);

		while (orig_page_offset <= seq_end_page) {

@@ -1500,7 +1502,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
				block_len_in_page = rest_blocks;
		}

		double_down_write_data_sem(orig_inode, donor_inode);
		ext4_double_down_write_data_sem(orig_inode, donor_inode);
		if (ret < 0)
			break;

@@ -1538,10 +1540,10 @@ out:
		ext4_ext_drop_refs(holecheck_path);
		kfree(holecheck_path);
	}
	double_up_write_data_sem(orig_inode, donor_inode);
	ext4_double_up_write_data_sem(orig_inode, donor_inode);
	ext4_inode_resume_unlocked_dio(orig_inode);
	ext4_inode_resume_unlocked_dio(donor_inode);
	mext_inode_double_unlock(orig_inode, donor_inode);
	ext4_inode_double_unlock(orig_inode, donor_inode);

	return ret;
}