Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b6844e7c authored by Jaegeuk Kim's avatar Jaegeuk Kim
Browse files

Merge remote-tracking branch 'origin/upstream-f2fs-stable-linux-4.4.y' into android-4.4



* origin/upstream-f2fs-stable-linux-4.4.y:
  f2fs: use EINVAL for superblock with invalid magic
  f2fs: fix to read source block before invalidating it
  f2fs: remove redundant check from f2fs_setflags_common()
  f2fs: use generic checking and prep function for FS_IOC_SETFLAGS
  f2fs: improve print log in f2fs_sanity_check_ckpt()
  f2fs: avoid out-of-range memory access
  f2fs: fix to avoid long latency during umount
  f2fs: allow all the users to pin a file
  f2fs: support swap file w/ DIO
  f2fs: allocate blocks for pinned file
  f2fs: fix is_idle() check for discard type
  f2fs: add a rw_sem to cover quota flag changes
  f2fs: set SBI_NEED_FSCK for xattr corruption case
  f2fs: use generic EFSBADCRC/EFSCORRUPTED
  f2fs: Use DIV_ROUND_UP() instead of open-coding
  f2fs: print kernel message if filesystem is inconsistent
  f2fs: introduce f2fs_<level> macros to wrap f2fs_printk()
  f2fs: avoid get_valid_blocks() for cleanup
  f2fs: ioctl for removing a range from F2FS
  f2fs: only set project inherit bit for directory
  f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags
  f2fs: Add option to limit required GC for checkpoint=disable
  f2fs: Fix accounting for unusable blocks
  f2fs: Fix root reserved on remount
  f2fs: Lower threshold for disable_cp_again
  f2fs: fix sparse warning
  f2fs: fix f2fs_show_options to show nodiscard mount option
  f2fs: add error prints for debugging mount failure
  f2fs: fix to do sanity check on segment bitmap of LFS curseg
  f2fs: add missing sysfs entries in documentation
  f2fs: fix to avoid deadloop if data_flush is on
  f2fs: always assume that the device is idle under gc_urgent
  f2fs: add bio cache for IPU
  f2fs: allow ssr block allocation during checkpoint=disable period
  f2fs: fix to check layout on last valid checkpoint park

Change-Id: I12033b777edf57fb337c13df82caec464f1426d6
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@google.com>
parents 35a6a604 575d3de9
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -243,3 +243,11 @@ Description:
		 - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
		 - [h] means add/del hot file extension
		 - [c] means add/del cold file extension

What:		/sys/fs/f2fs/<disk>/unusable
Date		April 2019
Contact:	"Daniel Rosenberg" <drosen@google.com>
Description:
		If checkpoint=disable, it displays the number of blocks that are unusable.
                If checkpoint=enable it displays the enumber of blocks that would be unusable
                if checkpoint=disable were to be set.
+123 −10
Original line number Diff line number Diff line
@@ -214,11 +214,22 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix",
                       non-atomic files likewise "nobarrier" mount option.
test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                       context. The fake fscrypt context is used by xfstests.
checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
checkpoint=%s[:%u[%]]     Set to "disable" to turn off checkpointing. Set to "enable"
                       to reenable checkpointing. Is enabled by default. While
                       disabled, any unmounting or unexpected shutdowns will cause
                       the filesystem contents to appear as they did when the
                       filesystem was mounted with that option.
                       While mounting with checkpoint=disabled, the filesystem must
                       run garbage collection to ensure that all available space can
                       be used. If this takes too much time, the mount may return
                       EAGAIN. You may optionally add a value to indicate how much
                       of the disk you would be willing to temporarily give up to
                       avoid additional garbage collection. This can be given as a
                       number of blocks, or as a percent. For instance, mounting
                       with checkpoint=disable:100% would always succeed, but it may
                       hide up to all remaining free space. The actual space that
                       would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
                       This space is reclaimed once checkpoint=enable.

================================================================================
DEBUGFS ENTRIES
@@ -246,11 +257,14 @@ Files in /sys/fs/f2fs/<devname>
..............................................................................
 File                         Content

 gc_max_sleep_time            This tuning parameter controls the maximum sleep
 gc_urgent_sleep_time         This parameter controls sleep time for gc_urgent.
                              500 ms is set by default. See above gc_urgent.

 gc_min_sleep_time            This tuning parameter controls the minimum sleep
                              time for the garbage collection thread. Time is
                              in milliseconds.

 gc_min_sleep_time            This tuning parameter controls the minimum sleep
 gc_max_sleep_time            This tuning parameter controls the maximum sleep
                              time for the garbage collection thread. Time is
                              in milliseconds.

@@ -270,9 +284,6 @@ Files in /sys/fs/f2fs/<devname>
                              to 1, background thread starts to do GC by given
                              gc_urgent_sleep_time interval.

 gc_urgent_sleep_time         This parameter controls sleep time for gc_urgent.
                              500 ms is set by default. See above gc_urgent.

 reclaim_segments             This parameter controls the number of prefree
                              segments to be reclaimed. If the number of prefree
			      segments is larger than the number of segments
@@ -287,7 +298,16 @@ Files in /sys/fs/f2fs/<devname>
			      checkpoint is triggered, and issued during the
			      checkpoint. By default, it is disabled with 0.

 trim_sections                This parameter controls the number of sections
 discard_granularity	      This parameter controls the granularity of discard
			      command size. It will issue discard commands iif
			      the size is larger than given granularity. Its
			      unit size is 4KB, and 4 (=16KB) is set by default.
			      The maximum value is 128 (=512KB).

 reserved_blocks	      This parameter indicates the number of blocks that
			      f2fs reserves internally for root.

 batched_trim_sections	      This parameter controls the number of sections
                              to be trimmed out in batch mode when FITRIM
                              conducts. 32 sections is set by default.

@@ -309,11 +329,35 @@ Files in /sys/fs/f2fs/<devname>
			      the number is less than this value, it triggers
			      in-place-updates.

 min_seq_blocks		      This parameter controls the threshold to serialize
			      write IOs issued by multiple threads in parallel.

 min_hot_blocks		      This parameter controls the threshold to allocate
			      a hot data log for pending data blocks to write.

 min_ssr_sections	      This parameter adds the threshold when deciding
			      SSR block allocation. If this is large, SSR mode
			      will be enabled early.

 ram_thresh                   This parameter controls the memory footprint used
			      by free nids and cached nat entries. By default,
			      10 is set, which indicates 10 MB / 1 GB RAM.

 ra_nid_pages		      When building free nids, F2FS reads NAT blocks
			      ahead for speed up. Default is 0.

 dirty_nats_ratio	      Given dirty ratio of cached nat entries, F2FS
			      determines flushing them in background.

 max_victim_search	      This parameter controls the number of trials to
			      find a victim segment when conducting SSR and
			      cleaning operations. The default value is 4096
			      which covers 8GB block address range.

 migration_granularity	      For large-sized sections, F2FS can stop GC given
			      this granularity instead of reclaiming entire
			      section.

 dir_level                    This parameter controls the directory level to
			      support large directory. If a directory has a
			      number of files, it can reduce the file lookup
@@ -321,9 +365,53 @@ Files in /sys/fs/f2fs/<devname>
			      Otherwise, it needs to decrease this value to
			      reduce the space overhead. The default value is 0.

 ram_thresh                   This parameter controls the memory footprint used
			      by free nids and cached nat entries. By default,
			      10 is set, which indicates 10 MB / 1 GB RAM.
 cp_interval		      F2FS tries to do checkpoint periodically, 60 secs
			      by default.

 idle_interval		      F2FS detects system is idle, if there's no F2FS
			      operations during given interval, 5 secs by
			      default.

 discard_idle_interval	      F2FS detects the discard thread is idle, given
			      time interval. Default is 5 secs.

 gc_idle_interval	      F2FS detects the GC thread is idle, given time
			      interval. Default is 5 secs.

 umount_discard_timeout       When unmounting the disk, F2FS waits for finishing
			      queued discard commands which can take huge time.
			      This gives time out for it, 5 secs by default.

 iostat_enable		      This controls to enable/disable iostat in F2FS.

 readdir_ra		      This enables/disabled readahead of inode blocks
			      in readdir, and default is enabled.

 gc_pin_file_thresh	      This indicates how many GC can be failed for the
			      pinned file. If it exceeds this, F2FS doesn't
			      guarantee its pinning state. 2048 trials is set
			      by default.

 extension_list		      This enables to change extension_list for hot/cold
			      files in runtime.

 inject_rate		      This controls injection rate of arbitrary faults.

 inject_type		      This controls injection type of arbitrary faults.

 dirty_segments 	      This shows # of dirty segments.

 lifetime_write_kbytes	      This shows # of data written to the disk.

 features		      This shows current features enabled on F2FS.

 current_reserved_blocks      This shows # of blocks currently reserved.

 unusable                     If checkpoint=disable, this shows the number of
                              blocks that are unusable.
                              If checkpoint=enable it shows the number of blocks
                              that would be unusable if checkpoint=disable were
                              to be set.

================================================================================
USAGE
@@ -656,3 +744,28 @@ algorithm.
In order to identify whether the data in the victim segment are valid or not,
F2FS manages a bitmap. Each bit represents the validity of a block, and the
bitmap is composed of a bit stream covering whole blocks in main area.

Fallocate(2) Policy
-------------------

The default policy follows the below posix rule.

Allocating disk space
    The default operation (i.e., mode is zero) of fallocate() allocates
    the disk space within the range specified by offset and len.  The
    file size (as reported by stat(2)) will be changed if offset+len is
    greater than the file size.  Any subregion within the range specified
    by offset and len that did not contain data before the call will be
    initialized to zero.  This default behavior closely resembles the
    behavior of the posix_fallocate(3) library function, and is intended
    as a method of optimally implementing that function.

However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
zero or random data, which is useful to the below scenario where:
 1. create(fd)
 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
 3. fallocate(fd, 0, 0, size)
 4. address = fibmap(fd, offset)
 5. open(blkdev)
 6. write(blkdev, address)
+44 −57
Original line number Diff line number Diff line
@@ -146,8 +146,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,

	exist = f2fs_test_bit(offset, se->cur_valid_map);
	if (!exist && type == DATA_GENERIC_ENHANCE) {
		f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
			"blkaddr:%u, sit bitmap:%d", blkaddr, exist);
		f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
			 blkaddr, exist);
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		WARN_ON(1);
	}
@@ -184,8 +184,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
	case DATA_GENERIC_ENHANCE_READ:
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
				blkaddr < MAIN_BLKADDR(sbi))) {
			f2fs_msg(sbi->sb, KERN_WARNING,
				"access invalid blkaddr:%u", blkaddr);
			f2fs_warn(sbi, "access invalid blkaddr:%u",
				  blkaddr);
			set_sbi_flag(sbi, SBI_NEED_FSCK);
			WARN_ON(1);
			return false;
@@ -658,8 +658,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)

err_out:
	set_sbi_flag(sbi, SBI_NEED_FSCK);
	f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
	f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.",
		  __func__, ino);
	return err;
}
@@ -677,13 +676,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
		return 0;

	if (bdev_read_only(sbi->sb->s_bdev)) {
		f2fs_msg(sbi->sb, KERN_INFO, "write access "
			"unavailable, skipping orphan cleanup");
		f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
		return 0;
	}

	if (s_flags & MS_RDONLY) {
		f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
		f2fs_info(sbi, "orphan cleanup on readonly fs");
		sbi->sb->s_flags &= ~MS_RDONLY;
	}

@@ -828,26 +826,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
	if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
			crc_offset > CP_CHKSUM_OFFSET) {
		f2fs_put_page(*cp_page, 1);
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}

	if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) {
		if (crc_offset != CP_MIN_CHKSUM_OFFSET) {
			f2fs_put_page(*cp_page, 1);
			f2fs_msg(sbi->sb, KERN_WARNING,
				"layout of large_nat_bitmap is deprecated, "
				"run fsck to repair, chksum_offset: %zu",
				crc_offset);
			return -EINVAL;
		}
	}

	crc = f2fs_checkpoint_chksum(sbi, *cp_block);
	if (crc != cur_cp_crc(*cp_block)) {
		f2fs_put_page(*cp_page, 1);
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		f2fs_warn(sbi, "invalid crc value");
		return -EINVAL;
	}

@@ -870,8 +856,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,

	if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
					sbi->blocks_per_seg) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid cp_pack_total_block_count:%u",
		f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
			  le32_to_cpu(cp_block->cp_pack_total_block_count));
		goto invalid_cp;
	}
@@ -906,6 +891,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
	unsigned int cp_blks = 1 + __cp_payload(sbi);
	block_t cp_blk_no;
	int i;
	int err;

	sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
				 GFP_KERNEL);
@@ -933,6 +919,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
	} else if (cp2) {
		cur_page = cp2;
	} else {
		err = -EFSCORRUPTED;
		goto fail_no_cp;
	}

@@ -945,8 +932,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
		sbi->cur_cp_pack = 2;

	/* Sanity checking of checkpoint */
	if (f2fs_sanity_check_ckpt(sbi))
	if (f2fs_sanity_check_ckpt(sbi)) {
		err = -EFSCORRUPTED;
		goto free_fail_no_cp;
	}

	if (cp_blks <= 1)
		goto done;
@@ -960,8 +949,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
		if (IS_ERR(cur_page))
		if (IS_ERR(cur_page)) {
			err = PTR_ERR(cur_page);
			goto free_fail_no_cp;
		}
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
@@ -976,7 +967,7 @@ free_fail_no_cp:
	f2fs_put_page(cp2, 1);
fail_no_cp:
	kvfree(sbi->ckpt);
	return -EINVAL;
	return err;
}

static void __add_dirty_inode(struct inode *inode, enum inode_type type)
@@ -1143,17 +1134,24 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)

static bool __need_flush_quota(struct f2fs_sb_info *sbi)
{
	bool ret = false;

	if (!is_journalled_quota(sbi))
		return false;
	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
		return false;
	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
		return false;
	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
		return true;
	if (get_pages(sbi, F2FS_DIRTY_QDATA))
		return true;
	return false;

	down_write(&sbi->quota_sem);
	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
		ret = false;
	} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
		ret = false;
	} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
		ret = true;
	} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
		ret = true;
	}
	up_write(&sbi->quota_sem);
	return ret;
}

/*
@@ -1172,26 +1170,22 @@ static int block_operations(struct f2fs_sb_info *sbi)
	blk_start_plug(&plug);

retry_flush_quotas:
	f2fs_lock_all(sbi);
	if (__need_flush_quota(sbi)) {
		int locked;

		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
			f2fs_lock_all(sbi);
			set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
			goto retry_flush_dents;
		}
		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
		f2fs_unlock_all(sbi);

		/* only failed during mount/umount/freeze/quotactl */
		locked = down_read_trylock(&sbi->sb->s_umount);
		f2fs_quota_sync(sbi->sb, -1);
		if (locked)
			up_read(&sbi->sb->s_umount);
	}

	f2fs_lock_all(sbi);
	if (__need_flush_quota(sbi)) {
		f2fs_unlock_all(sbi);
		cond_resched();
		goto retry_flush_quotas;
	}
@@ -1213,12 +1207,6 @@ retry_flush_dents:
	 */
	down_write(&sbi->node_change);

	if (__need_flush_quota(sbi)) {
		up_write(&sbi->node_change);
		f2fs_unlock_all(sbi);
		goto retry_flush_quotas;
	}

	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		up_write(&sbi->node_change);
		f2fs_unlock_all(sbi);
@@ -1314,7 +1302,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
		is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
@@ -1570,8 +1559,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
		if (cpc->reason != CP_PAUSE)
			return 0;
		f2fs_msg(sbi->sb, KERN_WARNING,
				"Start checkpoint disabled!");
		f2fs_warn(sbi, "Start checkpoint disabled!");
	}
	mutex_lock(&sbi->cp_mutex);

@@ -1637,8 +1625,7 @@ stop:
	stat_inc_cp_count(sbi->stat_info);

	if (cpc->reason & CP_RECOVERY)
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
		f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);

	/* do checkpoint periodically */
	f2fs_update_time(sbi, CP_TIME);
Loading