Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b517bea1 authored by Zach Brown's avatar Zach Brown Committed by Linus Torvalds
Browse files

[PATCH] 64-bit jbd2 core



Here is the patch to JBD to handle 64 bit block numbers, originally from Zach
Brown.  This patch is useful only after adding support for 64-bit block
numbers in the filesystem.

Signed-off-by: default avatarBadari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: default avatarZach Brown <zach.brown@oracle.com>
Signed-off-by: default avatarDave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d0d856e8
Loading
Loading
Loading
Loading
+13 −4
Original line number Original line Diff line number Diff line
@@ -271,6 +271,14 @@ static void journal_submit_data_buffers(journal_t *journal,
	journal_do_submit_data(wbuf, bufs);
	journal_do_submit_data(wbuf, bufs);
}
}


static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
				   sector_t block)
{
	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
	if (tag_bytes > JBD_TAG_SIZE32)
		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}

/*
/*
 * jbd2_journal_commit_transaction
 * jbd2_journal_commit_transaction
 *
 *
@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
	int first_tag = 0;
	int first_tag = 0;
	int tag_flag;
	int tag_flag;
	int i;
	int i;
	int tag_bytes = journal_tag_bytes(journal);


	/*
	/*
	 * First job: lock down the current transaction and wait for
	 * First job: lock down the current transaction and wait for
@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
			tag_flag |= JBD2_FLAG_SAME_UUID;
			tag_flag |= JBD2_FLAG_SAME_UUID;


		tag = (journal_block_tag_t *) tagp;
		tag = (journal_block_tag_t *) tagp;
		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
		tag->t_flags = cpu_to_be32(tag_flag);
		tag->t_flags = cpu_to_be32(tag_flag);
		tagp += sizeof(journal_block_tag_t);
		tagp += tag_bytes;
		space_left -= sizeof(journal_block_tag_t);
		space_left -= tag_bytes;


		if (first_tag) {
		if (first_tag) {
			memcpy (tagp, journal->j_uuid, 16);
			memcpy (tagp, journal->j_uuid, 16);
@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)


		if (bufs == journal->j_wbufsize ||
		if (bufs == journal->j_wbufsize ||
		    commit_transaction->t_buffers == NULL ||
		    commit_transaction->t_buffers == NULL ||
		    space_left < sizeof(journal_block_tag_t) + 16) {
		    space_left < tag_bytes + 16) {


			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);


+11 −0
Original line number Original line Diff line number Diff line
@@ -1609,6 +1609,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
}
}


/*
 * helper functions to deal with 32 or 64bit block numbers.
 */
size_t journal_tag_bytes(journal_t *journal)
{
	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
		return JBD_TAG_SIZE64;
	else
		return JBD_TAG_SIZE32;
}

/*
/*
 * Simple support for retrying memory allocations.  Introduced to help to
 * Simple support for retrying memory allocations.  Introduced to help to
 * debug different VM deadlock avoidance strategies.
 * debug different VM deadlock avoidance strategies.
+30 −13
Original line number Original line Diff line number Diff line
@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 * Count the number of in-use tags in a journal descriptor block.
 * Count the number of in-use tags in a journal descriptor block.
 */
 */


static int count_tags(struct buffer_head *bh, int size)
static int count_tags(journal_t *journal, struct buffer_head *bh)
{
{
	char *			tagp;
	char *			tagp;
	journal_block_tag_t *	tag;
	journal_block_tag_t *	tag;
	int			nr = 0;
	int			nr = 0, size = journal->j_blocksize;
	int			tag_bytes = journal_tag_bytes(journal);


	tagp = &bh->b_data[sizeof(journal_header_t)];
	tagp = &bh->b_data[sizeof(journal_header_t)];


	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
	while ((tagp - bh->b_data + tag_bytes) <= size) {
		tag = (journal_block_tag_t *) tagp;
		tag = (journal_block_tag_t *) tagp;


		nr++;
		nr++;
		tagp += sizeof(journal_block_tag_t);
		tagp += tag_bytes;
		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
			tagp += 16;
			tagp += 16;


@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
	return err;
	return err;
}
}


static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
{
	sector_t block = be32_to_cpu(tag->t_blocknr);
	if (tag_bytes > JBD_TAG_SIZE32)
		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
	return block;
}

static int do_one_pass(journal_t *journal,
static int do_one_pass(journal_t *journal,
			struct recovery_info *info, enum passtype pass)
			struct recovery_info *info, enum passtype pass)
{
{
@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal,
	struct buffer_head *	bh;
	struct buffer_head *	bh;
	unsigned int		sequence;
	unsigned int		sequence;
	int			blocktype;
	int			blocktype;
	int			tag_bytes = journal_tag_bytes(journal);


	/* Precompute the maximum metadata descriptors in a descriptor block */
	/* Precompute the maximum metadata descriptors in a descriptor block */
	int			MAX_BLOCKS_PER_DESC;
	int			MAX_BLOCKS_PER_DESC;
	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
			       / sizeof(journal_block_tag_t));
			       / tag_bytes);


	/*
	/*
	 * First thing is to establish what we expect to find in the log
	 * First thing is to establish what we expect to find in the log
@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal,
			 * in pass REPLAY; otherwise, just skip over the
			 * in pass REPLAY; otherwise, just skip over the
			 * blocks it describes. */
			 * blocks it describes. */
			if (pass != PASS_REPLAY) {
			if (pass != PASS_REPLAY) {
				next_log_block +=
				next_log_block += count_tags(journal, bh);
					count_tags(bh, journal->j_blocksize);
				wrap(journal, next_log_block);
				wrap(journal, next_log_block);
				brelse(bh);
				brelse(bh);
				continue;
				continue;
@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal,
			 * getting done here! */
			 * getting done here! */


			tagp = &bh->b_data[sizeof(journal_header_t)];
			tagp = &bh->b_data[sizeof(journal_header_t)];
			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
			while ((tagp - bh->b_data + tag_bytes)
			       <= journal->j_blocksize) {
			       <= journal->j_blocksize) {
				unsigned long io_block;
				unsigned long io_block;


@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal,
					unsigned long blocknr;
					unsigned long blocknr;


					J_ASSERT(obh != NULL);
					J_ASSERT(obh != NULL);
					blocknr = be32_to_cpu(tag->t_blocknr);
					blocknr = read_tag_block(tag_bytes,
								 tag);


					/* If the block has been
					/* If the block has been
					 * revoked, then we're all done
					 * revoked, then we're all done
@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal,
				}
				}


			skip_write:
			skip_write:
				tagp += sizeof(journal_block_tag_t);
				tagp += tag_bytes;
				if (!(flags & JBD2_FLAG_SAME_UUID))
				if (!(flags & JBD2_FLAG_SAME_UUID))
					tagp += 16;
					tagp += 16;


@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
{
{
	jbd2_journal_revoke_header_t *header;
	jbd2_journal_revoke_header_t *header;
	int offset, max;
	int offset, max;
	int record_len = 4;


	header = (jbd2_journal_revoke_header_t *) bh->b_data;
	header = (jbd2_journal_revoke_header_t *) bh->b_data;
	offset = sizeof(jbd2_journal_revoke_header_t);
	offset = sizeof(jbd2_journal_revoke_header_t);
	max = be32_to_cpu(header->r_count);
	max = be32_to_cpu(header->r_count);


	while (offset < max) {
	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
		record_len = 8;

	while (offset + record_len <= max) {
		unsigned long blocknr;
		unsigned long blocknr;
		int err;
		int err;


		if (record_len == 4)
			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
		offset += 4;
		else
			blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
		offset += record_len;
		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
		if (err)
		if (err)
			return err;
			return err;
+11 −3
Original line number Original line Diff line number Diff line
@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal,
		*descriptorp = descriptor;
		*descriptorp = descriptor;
	}
	}


	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
		* ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
			cpu_to_be64(record->blocknr);
		offset += 8;

	} else {
		* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
		* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
			cpu_to_be32(record->blocknr);
			cpu_to_be32(record->blocknr);
		offset += 4;
		offset += 4;
	}

	*offsetp = offset;
	*offsetp = offset;
}
}


+12 −2
Original line number Original line Diff line number Diff line
@@ -150,14 +150,21 @@ typedef struct journal_header_s




/*
/*
 * The block tag: used to describe a single buffer in the journal
 * The block tag: used to describe a single buffer in the journal.
 * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this
 * raw struct shouldn't be used for pointer math or sizeof() - use
 * journal_tag_bytes(journal) instead to compute this.
 */
 */
typedef struct journal_block_tag_s
typedef struct journal_block_tag_s
{
{
	__be32		t_blocknr;	/* The on-disk block number */
	__be32		t_blocknr;	/* The on-disk block number */
	__be32		t_flags;	/* See below */
	__be32		t_flags;	/* See below */
	__be32		t_blocknr_high; /* most-significant high 32bits. */
} journal_block_tag_t;
} journal_block_tag_t;


#define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t))

/*
/*
 * The revoke descriptor: used on disk to describe a series of blocks to
 * The revoke descriptor: used on disk to describe a series of blocks to
 * be revoked from the log
 * be revoked from the log
@@ -235,11 +242,13 @@ typedef struct journal_superblock_s
	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))


#define JBD2_FEATURE_INCOMPAT_REVOKE	0x00000001
#define JBD2_FEATURE_INCOMPAT_REVOKE	0x00000001
#define JBD2_FEATURE_INCOMPAT_64BIT	0x00000002


/* Features known to this kernel version: */
/* Features known to this kernel version: */
#define JBD2_KNOWN_COMPAT_FEATURES	0
#define JBD2_KNOWN_COMPAT_FEATURES	0
#define JBD2_KNOWN_ROCOMPAT_FEATURES	0
#define JBD2_KNOWN_ROCOMPAT_FEATURES	0
#define JBD2_KNOWN_INCOMPAT_FEATURES	JBD2_FEATURE_INCOMPAT_REVOKE
#define JBD2_KNOWN_INCOMPAT_FEATURES	(JBD2_FEATURE_INCOMPAT_REVOKE | \
					 JBD2_FEATURE_INCOMPAT_64BIT)


#ifdef __KERNEL__
#ifdef __KERNEL__


@@ -1052,6 +1061,7 @@ static inline int tid_geq(tid_t x, tid_t y)
}
}


extern int jbd2_journal_blocks_per_page(struct inode *inode);
extern int jbd2_journal_blocks_per_page(struct inode *inode);
extern size_t journal_tag_bytes(journal_t *journal);


/*
/*
 * Return the minimum number of blocks which must be free in the journal
 * Return the minimum number of blocks which must be free in the journal