Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9d7386aa authored by Paul Lawrence's avatar Paul Lawrence
Browse files

ANDROID: Incremental fs: Use simple compression in log buffer



Bug: 154342202
Test: incfs_test passes
Signed-off-by: default avatarPaul Lawrence <paullawrence@google.com>
Change-Id: Ibcc641dd92596018c9f10b5bc7bd0db2642a80c7
(cherry picked from commit b6b4a3a404ccd9c62347e27c4fc7883d776c2cbb)
parent a10905b7
Loading
Loading
Loading
Loading
+170 −140
Original line number Diff line number Diff line
@@ -34,8 +34,7 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb,
	mutex_init(&mi->mi_pending_reads_mutex);
	init_waitqueue_head(&mi->mi_pending_reads_notif_wq);
	init_waitqueue_head(&mi->mi_log.ml_notif_wq);
	rwlock_init(&mi->mi_log.rl_access_lock);
	spin_lock_init(&mi->mi_log.rl_logging_lock);
	spin_lock_init(&mi->mi_log.rl_lock);
	INIT_LIST_HEAD(&mi->mi_reads_list_head);

	error = incfs_realloc_mount_info(mi, options);
@@ -53,6 +52,7 @@ int incfs_realloc_mount_info(struct mount_info *mi,
			     struct mount_options *options)
{
	void *new_buffer = NULL;
	void *old_buffer;
	size_t new_buffer_size = 0;

	if (options->read_log_pages != mi->mi_options.read_log_pages) {
@@ -71,16 +71,18 @@ int incfs_realloc_mount_info(struct mount_info *mi,
				return -ENOMEM;
		}

		write_lock(&mi->mi_log.rl_access_lock);
		kfree(mi->mi_log.rl_ring_buf);
		WRITE_ONCE(mi->mi_log.rl_ring_buf, new_buffer);
		WRITE_ONCE(mi->mi_log.rl_size,
			   new_buffer_size / sizeof(*mi->mi_log.rl_ring_buf));
		log_state = READ_ONCE(mi->mi_log.rl_state);
		log_state.generation_id++;
		log_state.next_index = log_state.current_pass_no = 0;
		WRITE_ONCE(mi->mi_log.rl_state, log_state);
		write_unlock(&mi->mi_log.rl_access_lock);
		spin_lock(&mi->mi_log.rl_lock);
		old_buffer = mi->mi_log.rl_ring_buf;
		mi->mi_log.rl_ring_buf = new_buffer;
		mi->mi_log.rl_size = new_buffer_size;
		log_state = (struct read_log_state){
			.generation_id = mi->mi_log.rl_head.generation_id + 1,
		};
		mi->mi_log.rl_head = log_state;
		mi->mi_log.rl_tail = log_state;
		spin_unlock(&mi->mi_log.rl_lock);

		kfree(old_buffer);
	}

	mi->mi_options = *options;
@@ -246,36 +248,120 @@ static ssize_t decompress(struct mem_range src, struct mem_range dst)
	return result;
}

static void log_read_one_record(struct read_log *rl, struct read_log_state *rs)
{
	union log_record *record =
		(union log_record *)((u8 *)rl->rl_ring_buf + rs->next_offset);
	size_t record_size;

	switch (record->full_record.type) {
	case FULL:
		rs->base_record = record->full_record;
		record_size = sizeof(record->full_record);
		break;

	case SAME_FILE:
		rs->base_record.block_index =
			record->same_file_record.block_index;
		rs->base_record.absolute_ts_us +=
			record->same_file_record.relative_ts_us;
		record_size = sizeof(record->same_file_record);
		break;

	case SAME_FILE_NEXT_BLOCK:
		++rs->base_record.block_index;
		rs->base_record.absolute_ts_us +=
			record->same_file_next_block.relative_ts_us;
		record_size = sizeof(record->same_file_next_block);
		break;

	case SAME_FILE_NEXT_BLOCK_SHORT:
		++rs->base_record.block_index;
		rs->base_record.absolute_ts_us +=
			record->same_file_next_block_short.relative_ts_us;
		record_size = sizeof(record->same_file_next_block_short);
		break;
	}

	rs->next_offset += record_size;
	if (rs->next_offset > rl->rl_size - sizeof(*record)) {
		rs->next_offset = 0;
		++rs->current_pass_no;
	}
	++rs->current_record_no;
}

static void log_block_read(struct mount_info *mi, incfs_uuid_t *id,
			int block_index, bool timed_out)
			   int block_index)
{
	struct read_log *log = &mi->mi_log;
	struct read_log_state state;
	struct read_log_state *head, *tail;
	s64 now_us = ktime_to_us(ktime_get());
	int rl_size;
	struct read_log_record record = {
	s64 relative_us;
	union log_record record;
	size_t record_size;

	spin_lock(&log->rl_lock);
	if (log->rl_size == 0) {
		spin_unlock(&log->rl_lock);
		return;
	}

	head = &log->rl_head;
	tail = &log->rl_tail;
	relative_us = now_us - head->base_record.absolute_ts_us;

	if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) ||
	    relative_us >= 1ll << 32) {
		record.full_record = (struct full_record){
			.type = FULL,
			.block_index = block_index,
			.file_id = *id,
			.absolute_ts_us = now_us,
		};
		record_size = sizeof(struct full_record);
	} else if (block_index != head->base_record.block_index + 1 ||
		   relative_us >= 1 << 30) {
		record.same_file_record = (struct same_file_record){
			.type = SAME_FILE,
			.block_index = block_index,
		.timed_out = timed_out,
		.timestamp_us = now_us
			.relative_ts_us = relative_us,
		};

	read_lock(&log->rl_access_lock);
	rl_size = READ_ONCE(log->rl_size);
	if (rl_size != 0) {
		spin_lock(&log->rl_logging_lock);
		state = READ_ONCE(log->rl_state);
		log->rl_ring_buf[state.next_index] = record;
		if (++state.next_index == rl_size) {
			state.next_index = 0;
			++state.current_pass_no;
		record_size = sizeof(struct same_file_record);
	} else if (relative_us >= 1 << 14) {
		record.same_file_next_block = (struct same_file_next_block){
			.type = SAME_FILE_NEXT_BLOCK,
			.relative_ts_us = relative_us,
		};
		record_size = sizeof(struct same_file_next_block);
	} else {
		record.same_file_next_block_short =
			(struct same_file_next_block_short){
				.type = SAME_FILE_NEXT_BLOCK_SHORT,
				.relative_ts_us = relative_us,
			};
		record_size = sizeof(struct same_file_next_block_short);
	}
		WRITE_ONCE(log->rl_state, state);
		spin_unlock(&log->rl_logging_lock);

	head->base_record.file_id = *id;
	head->base_record.block_index = block_index;
	head->base_record.absolute_ts_us = now_us;

	/* Advance tail beyond area we are going to overwrite */
	while (tail->current_pass_no < head->current_pass_no &&
	       tail->next_offset < head->next_offset + record_size)
		log_read_one_record(log, tail);

	memcpy(((u8 *)log->rl_ring_buf) + head->next_offset, &record,
	       record_size);
	head->next_offset += record_size;
	if (head->next_offset > log->rl_size - sizeof(record)) {
		head->next_offset = 0;
		++head->current_pass_no;
	}
	read_unlock(&log->rl_access_lock);
	++head->current_record_no;

	if (rl_size != 0)
	spin_unlock(&log->rl_lock);
	wake_up_all(&log->ml_notif_wq);
}

@@ -708,8 +794,7 @@ static int wait_for_data_block(struct data_file *df, int block_index,
	mi = df->df_mount_info;

	if (timeout_ms == 0) {
		log_block_read(mi, &df->df_id, block_index,
			       true /*timed out*/);
		log_block_read(mi, &df->df_id, block_index);
		return -ETIME;
	}

@@ -728,8 +813,7 @@ static int wait_for_data_block(struct data_file *df, int block_index,

	if (wait_res == 0) {
		/* Wait has timed out */
		log_block_read(mi, &df->df_id, block_index,
			       true /*timed out*/);
		log_block_read(mi, &df->df_id, block_index);
		return -ETIME;
	}
	if (wait_res < 0) {
@@ -825,7 +909,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df,
	}

	if (result >= 0)
		log_block_read(mi, &df->df_id, index, false /*timed out*/);
		log_block_read(mi, &df->df_id, index);

out:
	return result;
@@ -1195,49 +1279,29 @@ struct read_log_state incfs_get_log_state(struct mount_info *mi)
	struct read_log *log = &mi->mi_log;
	struct read_log_state result;

	read_lock(&log->rl_access_lock);
	spin_lock(&log->rl_logging_lock);
	result = READ_ONCE(log->rl_state);
	spin_unlock(&log->rl_logging_lock);
	read_unlock(&log->rl_access_lock);
	spin_lock(&log->rl_lock);
	result = log->rl_head;
	spin_unlock(&log->rl_lock);
	return result;
}

static u64 calc_record_count(const struct read_log_state *state, int rl_size)
{
	return state->current_pass_no * (u64)rl_size + state->next_index;
}

int incfs_get_uncollected_logs_count(struct mount_info *mi,
				     struct read_log_state state)
				     const struct read_log_state *state)
{
	struct read_log *log = &mi->mi_log;
	struct read_log_state rl_state;
	int rl_size;
	u64 count;

	read_lock(&log->rl_access_lock);
	rl_size = READ_ONCE(log->rl_size);
	spin_lock(&log->rl_logging_lock);
	rl_state = READ_ONCE(log->rl_state);
	spin_unlock(&log->rl_logging_lock);
	read_unlock(&log->rl_access_lock);

	count = calc_record_count(&rl_state, rl_size);
	if (rl_state.generation_id == state.generation_id)
		count -= calc_record_count(&state, rl_size);
	return min_t(int, count, rl_size);
}

static void fill_pending_read_from_log_record(
	struct incfs_pending_read_info *dest, const struct read_log_record *src,
	struct read_log_state *state, u64 log_size)
{
	dest->file_id = src->file_id;
	dest->block_index = src->block_index;
	dest->serial_number =
		state->current_pass_no * log_size + state->next_index;
	dest->timestamp_us = src->timestamp_us;
	u32 generation;
	u64 head_no, tail_no;

	spin_lock(&log->rl_lock);
	tail_no = log->rl_tail.current_record_no;
	head_no = log->rl_head.current_record_no;
	generation = log->rl_head.generation_id;
	spin_unlock(&log->rl_lock);

	if (generation != state->generation_id)
		return head_no - tail_no;
	else
		return head_no - max_t(u64, tail_no, state->current_record_no);
}

int incfs_collect_logged_reads(struct mount_info *mi,
@@ -1245,82 +1309,48 @@ int incfs_collect_logged_reads(struct mount_info *mi,
			       struct incfs_pending_read_info *reads,
			       int reads_size)
{
	struct read_log *log = &mi->mi_log;
	struct read_log_state live_state;
	int dst_idx;
	int rl_size;
	int result = 0;
	u64 read_count;
	u64 written_count;

	read_lock(&log->rl_access_lock);
	struct read_log *log = &mi->mi_log;
	struct read_log_state *head, *tail;

	rl_size = READ_ONCE(log->rl_size);
	spin_lock(&log->rl_logging_lock);
	live_state = READ_ONCE(log->rl_state);
	spin_unlock(&log->rl_logging_lock);
	spin_lock(&log->rl_lock);
	head = &log->rl_head;
	tail = &log->rl_tail;

	if (reader_state->generation_id != live_state.generation_id) {
		reader_state->generation_id = live_state.generation_id;
		reader_state->current_pass_no = reader_state->next_index = 0;
	}
	if (reader_state->generation_id != head->generation_id) {
		pr_debug("read ptr is wrong generation: %u/%u",
			 reader_state->generation_id, head->generation_id);

	read_count = calc_record_count(reader_state, rl_size);
	written_count = calc_record_count(&live_state, rl_size);
	if (read_count == written_count) {
		result = 0;
		goto out;
	}
	if (reader_state->next_index >= rl_size) {
		result = -ERANGE;
		goto out;
	}

	if (read_count > written_count) {
		/* This reader is somehow ahead of the writer. */
		pr_debug("incfs: Log reader is ahead of writer\n");
		*reader_state = live_state;
		*reader_state = (struct read_log_state){
			.generation_id = head->generation_id,
		};
	}

	if (written_count - read_count > rl_size) {
		/*
		 * Reading pointer is too far behind,
		 * start from the record following the write pointer.
		 */
		pr_debug(
			"incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n",
			(u32)reader_state->next_index,
	if (reader_state->current_record_no < tail->current_record_no) {
		pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n",
			 (u32)reader_state->next_offset,
			 (u32)reader_state->current_pass_no,
			(u32)live_state.next_index,
			(u32)live_state.current_pass_no - 1, (u32)rl_size);
			 (u32)tail->next_offset, (u32)tail->current_pass_no);

		*reader_state = (struct read_log_state){
			.next_index = live_state.next_index,
			.current_pass_no = live_state.current_pass_no - 1,
		};
		*reader_state = *tail;
	}

	for (dst_idx = 0; dst_idx < reads_size; dst_idx++) {
		if (reader_state->next_index == live_state.next_index &&
		    reader_state->current_pass_no == live_state.current_pass_no)
		if (reader_state->current_record_no == head->current_record_no)
			break;

		fill_pending_read_from_log_record(
			&reads[dst_idx],
			&log->rl_ring_buf[reader_state->next_index],
			reader_state, rl_size);
		log_read_one_record(log, reader_state);

		reader_state->next_index++;
		if (reader_state->next_index == rl_size) {
			reader_state->next_index = 0;
			reader_state->current_pass_no++;
		}
		reads[dst_idx] = (struct incfs_pending_read_info){
			.file_id = reader_state->base_record.file_id,
			.block_index = reader_state->base_record.block_index,
			.serial_number = reader_state->current_record_no,
			.timestamp_us = reader_state->base_record.absolute_ts_us
		};
	}
	result = dst_idx;

out:
	read_unlock(&log->rl_access_lock);
	return result;
	spin_unlock(&log->rl_lock);
	return dst_idx;
}

bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs)
+50 −26
Original line number Diff line number Diff line
@@ -20,50 +20,74 @@

#define SEGMENTS_PER_FILE 3

struct read_log_record {
	u32 block_index : 31;

	u32 timed_out : 1;

	u64 timestamp_us;
enum LOG_RECORD_TYPE {
	FULL,
	SAME_FILE,
	SAME_FILE_NEXT_BLOCK,
	SAME_FILE_NEXT_BLOCK_SHORT,
};

struct full_record {
	enum LOG_RECORD_TYPE type : 2; /* FULL */
	u32 block_index : 30;
	incfs_uuid_t file_id;
} __packed;
	u64 absolute_ts_us;
} __packed; /* 28 bytes */

struct same_file_record {
	enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */
	u32 block_index : 30;
	u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */
} __packed; /* 12 bytes */

struct same_file_next_block {
	enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */
	u32 relative_ts_us : 30; /* max 2^30 us ~= 15 min (17:50) */
} __packed; /* 4 bytes */

struct same_file_next_block_short {
	enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK_SHORT */
	u16 relative_ts_us : 14; /* max 2^14 us ~= 16 ms */
} __packed; /* 2 bytes */

union log_record {
	struct full_record full_record;
	struct same_file_record same_file_record;
	struct same_file_next_block same_file_next_block;
	struct same_file_next_block_short same_file_next_block_short;
};

struct read_log_state {
	/* Log buffer generation id, incremented on configuration changes */
	u32 generation_id : 8;
	u32 generation_id;

	/* Next slot in rl_ring_buf to write into. */
	u32 next_index : 24;
	/* Offset in rl_ring_buf to write into. */
	u32 next_offset;

	/* Current number of writer passes over rl_ring_buf */
	u32 current_pass_no;

	/* Current full_record to diff against */
	struct full_record base_record;

	/* Current record number counting from configuration change */
	u64 current_record_no;
};

/* A ring buffer to save records about data blocks which were recently read. */
struct read_log {
	struct read_log_record *rl_ring_buf;
	void *rl_ring_buf;

	int rl_size;

	struct read_log_state rl_state;
	struct read_log_state rl_head;

	/*
	 * A lock for _all_ accesses to the struct, to protect against remounts.
	 * Taken for writing when resizing the buffer.
	 */
	rwlock_t rl_access_lock;
	struct read_log_state rl_tail;

	/*
	 * A lock to protect the actual logging - adding a new record.
	 * Note: ALWAYS taken after and under the |rl_access_lock|.
	 */
	spinlock_t rl_logging_lock;
	/* A lock to protect the above fields */
	spinlock_t rl_lock;

	/*
	 * A queue of waiters who want to be notified about reads.
	 */
	/* A queue of waiters who want to be notified about reads */
	wait_queue_head_t ml_notif_wq;
};

@@ -281,7 +305,7 @@ int incfs_collect_logged_reads(struct mount_info *mi,
			       int reads_size);
struct read_log_state incfs_get_log_state(struct mount_info *mi);
int incfs_get_uncollected_logs_count(struct mount_info *mi,
				     struct read_log_state state);
				     const struct read_log_state *state);

static inline struct inode_info *get_incfs_node(struct inode *inode)
{
+1 −1
Original line number Diff line number Diff line
@@ -640,7 +640,7 @@ static __poll_t log_poll(struct file *file, poll_table *wait)
	__poll_t ret = 0;

	poll_wait(file, &mi->mi_log.ml_notif_wq, wait);
	count = incfs_get_uncollected_logs_count(mi, log_state->state);
	count = incfs_get_uncollected_logs_count(mi, &log_state->state);
	if (count >= mi->mi_options.read_log_wakeup_count)
		ret = EPOLLIN | EPOLLRDNORM;

+70 −19
Original line number Diff line number Diff line
@@ -1932,16 +1932,19 @@ static int hash_tree_test(char *mount_dir)
	return TEST_FAILURE;
}

enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG };

static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
			 bool no_rlog)
			 enum expected_log expected_log)
{
	uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE];
	struct incfs_pending_read_info prs[100] = {};
	struct incfs_pending_read_info prs[2048] = {};
	int prs_size = ARRAY_SIZE(prs);
	int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE;
	int expected_read_block_cnt;
	int res;
	int read_count;
	int i;
	int i, j;
	char *filename = concat_file_name(mount_dir, file->name);
	int fd;

@@ -1952,17 +1955,29 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,

	if (block_cnt > prs_size)
		block_cnt = prs_size;
	expected_read_block_cnt = block_cnt;

	for (i = 0; i < block_cnt; i++) {
		res = pread(fd, data, sizeof(data),
			    INCFS_DATA_FILE_BLOCK_SIZE * i);

		/* Make some read logs of type SAME_FILE_NEXT_BLOCK */
		if (i % 10 == 0)
			usleep(20000);

		/* Skip some blocks to make logs of type SAME_FILE */
		if (i % 10 == 5) {
			++i;
			--expected_read_block_cnt;
		}

		if (res <= 0)
			goto failure;
	}

	read_count =
		wait_for_pending_reads(log_fd, no_rlog ? 10 : 0, prs, prs_size);
	if (no_rlog) {
	read_count = wait_for_pending_reads(
		log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size);
	if (expected_log == NO_LOG) {
		if (read_count == 0)
			goto success;
		if (read_count < 0)
@@ -1979,14 +1994,26 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
		goto failure;
	}

	if (read_count != block_cnt) {
	i = 0;
	if (expected_log == PARTIAL_LOG) {
		if (read_count == 0) {
			ksft_print_msg("No logs %s.\n", file->name);
			goto failure;
		}

		for (i = 0, j = 0; j < expected_read_block_cnt - read_count;
		     i++, j++)
			if (i % 10 == 5)
				++i;

	} else if (read_count != expected_read_block_cnt) {
		ksft_print_msg("Bad log read count %s %d %d.\n", file->name,
			       read_count, block_cnt);
			       read_count, expected_read_block_cnt);
		goto failure;
	}

	for (i = 0; i < read_count; i++) {
		struct incfs_pending_read_info *read = &prs[i];
	for (j = 0; j < read_count; i++, j++) {
		struct incfs_pending_read_info *read = &prs[j];

		if (!same_id(&read->file_id, &file->id)) {
			ksft_print_msg("Bad log read ino %s\n", file->name);
@@ -1999,8 +2026,8 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
			goto failure;
		}

		if (i != 0) {
			unsigned long psn = prs[i - 1].serial_number;
		if (j != 0) {
			unsigned long psn = prs[j - 1].serial_number;

			if (read->serial_number != psn + 1) {
				ksft_print_msg("Bad log read sn %s %d %d.\n",
@@ -2015,6 +2042,9 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
				       file->name);
			goto failure;
		}

		if (i % 10 == 5)
			++i;
	}

success:
@@ -2065,7 +2095,7 @@ static int read_log_test(char *mount_dir)
	for (i = 0; i < file_num; i++) {
		struct test_file *file = &test.files[i];

		if (validate_logs(mount_dir, log_fd, file, false))
		if (validate_logs(mount_dir, log_fd, file, FULL_LOG))
			goto failure;
	}

@@ -2093,7 +2123,7 @@ static int read_log_test(char *mount_dir)
	for (i = 0; i < file_num; i++) {
		struct test_file *file = &test.files[i];

		if (validate_logs(mount_dir, log_fd, file, false))
		if (validate_logs(mount_dir, log_fd, file, FULL_LOG))
			goto failure;
	}

@@ -2120,7 +2150,30 @@ static int read_log_test(char *mount_dir)
	for (i = 0; i < file_num; i++) {
		struct test_file *file = &test.files[i];

		if (validate_logs(mount_dir, log_fd, file, true))
		if (validate_logs(mount_dir, log_fd, file, NO_LOG))
			goto failure;
	}

	/*
	 * Remount and check that logs start working again
	 */
	drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC);
	if (drop_caches == -1)
		goto failure;
	i = write(drop_caches, "3", 1);
	close(drop_caches);
	if (i != 1)
		goto failure;

	if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1",
			 true) != 0)
		goto failure;

	/* Validate data again */
	for (i = 0; i < file_num; i++) {
		struct test_file *file = &test.files[i];

		if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG))
			goto failure;
	}

@@ -2143,7 +2196,7 @@ static int read_log_test(char *mount_dir)
	for (i = 0; i < file_num; i++) {
		struct test_file *file = &test.files[i];

		if (validate_logs(mount_dir, log_fd, file, false))
		if (validate_logs(mount_dir, log_fd, file, FULL_LOG))
			goto failure;
	}

@@ -2294,8 +2347,6 @@ static int validate_ranges(const char *mount_dir, struct test_file *file)

		if (fba.start_index >= block_cnt) {
			if (fba.index_out != fba.start_index) {
				printf("Paul: %d, %d\n", (int)fba.index_out,
				       (int)fba.start_index);
				error = -EINVAL;
				goto out;
			}
@@ -2647,7 +2698,7 @@ int main(int argc, char *argv[])
	rmdir(mount_dir);

	if (fails > 0)
		ksft_exit_pass();
		ksft_exit_fail();
	else
		ksft_exit_pass();
	return 0;