Loading fs/btrfs/ctree.h +5 −2 Original line number Diff line number Diff line Loading @@ -1825,6 +1825,9 @@ struct btrfs_fs_info { spinlock_t reada_lock; struct radix_tree_root reada_tree; /* readahead works cnt */ atomic_t reada_works_cnt; /* Extent buffer radix tree */ spinlock_t buffer_lock; struct radix_tree_root buffer_radix; Loading Loading @@ -4563,8 +4566,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); int btree_readahead_hook(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, u64 start, int err); static inline int is_fstree(u64 rootid) { Loading fs/btrfs/disk-io.c +12 −11 Original line number Diff line number Diff line Loading @@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, int found_level; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int reads_done; Loading @@ -637,20 +638,20 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", btrfs_err_rl(fs_info, "bad tree block start %llu %llu", found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root->fs_info, eb)) { btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", if (check_tree_block_fsid(fs_info, eb)) { btrfs_err_rl(fs_info, "bad fsid on block %llu", eb->start); ret = -EIO; goto err; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { btrfs_err(root->fs_info, "bad tree block level %d", btrfs_err(fs_info, "bad tree block level %d", (int)btrfs_header_level(eb)); ret = -EIO; goto err; Loading @@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); ret = csum_tree_block(root->fs_info, eb, 1); ret = csum_tree_block(fs_info, eb, 1); if (ret) { ret = -EIO; goto err; Loading @@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(root, eb, eb->start, ret); btree_readahead_hook(fs_info, eb, eb->start, ret); if (ret) { /* Loading @@ -699,14 +700,13 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, static int btree_io_failed_hook(struct page *page, int failed_mirror) { struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(root, eb, eb->start, -EIO); btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); return -EIO; /* we fixed nothing */ } Loading Loading @@ -2604,6 +2604,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; Loading fs/btrfs/reada.c +124 −124 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ struct reada_extent { spinlock_t lock; struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; struct btrfs_device *scheduled_for; int scheduled; }; struct reada_zone { Loading Loading @@ -101,11 +101,12 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); static void __reada_start_machine(struct btrfs_fs_info *fs_info); static int reada_add_block(struct reada_control *rc, u64 logical, struct btrfs_key *top, int level, u64 generation); struct btrfs_key *top, u64 generation); /* recurses */ /* in case of err, eb might be NULL */ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, static void __readahead_hook(struct btrfs_fs_info *fs_info, struct reada_extent *re, struct extent_buffer *eb, u64 start, int err) { int level = 0; Loading @@ -113,55 +114,40 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, int i; u64 bytenr; u64 generation; struct reada_extent *re; struct btrfs_fs_info *fs_info = root->fs_info; struct list_head list; unsigned long index = start >> PAGE_CACHE_SHIFT; struct btrfs_device *for_dev; if (eb) level = btrfs_header_level(eb); /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) return -1; spin_lock(&re->lock); /* * just take the full list from the extent. afterwards we * don't need the lock anymore */ list_replace_init(&re->extctl, &list); for_dev = re->scheduled_for; re->scheduled_for = NULL; re->scheduled = 0; spin_unlock(&re->lock); if (err == 0) { nritems = level ? btrfs_header_nritems(eb) : 0; generation = btrfs_header_generation(eb); /* * FIXME: currently we just set nritems to 0 if this is a leaf, * effectively ignoring the content. In a next step we could * trigger more readahead depending from the content, e.g. * fetch the checksums for the extents in the leaf. */ } else { /* * this is the error case, the extent buffer has not been * read correctly. We won't access anything from it and * just cleanup our data structures. Effectively this will * cut the branch below this node from read ahead. */ nritems = 0; generation = 0; } if (err) goto cleanup; /* * FIXME: currently we just set nritems to 0 if this is a leaf, * effectively ignoring the content. In a next step we could * trigger more readahead depending from the content, e.g. * fetch the checksums for the extents in the leaf. */ if (!level) goto cleanup; nritems = btrfs_header_nritems(eb); generation = btrfs_header_generation(eb); for (i = 0; i < nritems; i++) { struct reada_extctl *rec; u64 n_gen; Loading @@ -188,7 +174,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, */ #ifdef DEBUG if (rec->generation != generation) { btrfs_debug(root->fs_info, btrfs_debug(fs_info, "generation mismatch for (%llu,%d,%llu) %llu != %llu", key.objectid, key.type, key.offset, rec->generation, generation); Loading @@ -197,10 +183,11 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, if (rec->generation == generation && btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) reada_add_block(rc, bytenr, &next_key, level - 1, n_gen); reada_add_block(rc, bytenr, &next_key, n_gen); } } cleanup: /* * free extctl records */ Loading @@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, reada_extent_put(fs_info, re); /* one ref for each entry */ } reada_extent_put(fs_info, re); /* our ref */ if (for_dev) atomic_dec(&for_dev->reada_in_flight); return 0; return; } /* * start is passed separately in case eb in NULL, which may be the case with * failed I/O */ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err) int btree_readahead_hook(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, u64 start, int err) { int ret; int ret = 0; struct reada_extent *re; ret = __readahead_hook(root, eb, start, err); /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, start >> PAGE_CACHE_SHIFT); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) { ret = -1; goto start_machine; } reada_start_machine(root->fs_info); __readahead_hook(fs_info, re, eb, start, err); reada_extent_put(fs_info, re); /* our ref */ start_machine: reada_start_machine(fs_info); return ret; } Loading @@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, logical >> PAGE_CACHE_SHIFT, 1); if (ret == 1) if (ret == 1 && logical >= zone->start && logical <= zone->end) { kref_get(&zone->refcnt); spin_unlock(&fs_info->reada_lock); if (ret == 1) { if (logical >= zone->start && logical < zone->end) return zone; spin_lock(&fs_info->reada_lock); kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } spin_unlock(&fs_info->reada_lock); cache = btrfs_lookup_block_group(fs_info, logical); if (!cache) return NULL; Loading Loading @@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, kfree(zone); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, logical >> PAGE_CACHE_SHIFT, 1); if (ret == 1) if (ret == 1 && logical >= zone->start && logical <= zone->end) kref_get(&zone->refcnt); else zone = NULL; } spin_unlock(&fs_info->reada_lock); Loading @@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, static struct reada_extent *reada_find_extent(struct btrfs_root *root, u64 logical, struct btrfs_key *top, int level) struct btrfs_key *top) { int ret; struct reada_extent *re = NULL; Loading @@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, u64 length; int real_stripes; int nzones = 0; int i; unsigned long index = logical >> PAGE_CACHE_SHIFT; int dev_replace_is_ongoing; int have_zone = 0; spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); Loading Loading @@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct reada_zone *zone; dev = bbio->stripes[nzones].dev; /* cannot read ahead on missing device. */ if (!dev->bdev) continue; zone = reada_find_zone(fs_info, dev, logical, bbio); if (!zone) break; continue; re->zones[nzones] = zone; re->zones[re->nzones++] = zone; spin_lock(&zone->lock); if (!zone->elems) kref_get(&zone->refcnt); Loading @@ -389,8 +390,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } re->nzones = nzones; if (nzones == 0) { if (re->nzones == 0) { /* not a single zone found, error and out */ goto error; } Loading @@ -415,8 +415,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, prev_dev = NULL; dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( &fs_info->dev_replace); for (i = 0; i < nzones; ++i) { dev = bbio->stripes[i].dev; for (nzones = 0; nzones < re->nzones; ++nzones) { dev = re->zones[nzones]->device; if (dev == prev_dev) { /* * in case of DUP, just add the first zone. As both Loading @@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, */ continue; } if (!dev->bdev) { /* * cannot read ahead on missing device, but for RAID5/6, * REQ_GET_READ_MIRRORS return 1. So don't skip missing * device for such case. */ if (nzones > 1) if (!dev->bdev) continue; } if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { /* Loading @@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { while (--i >= 0) { dev = bbio->stripes[i].dev; while (--nzones >= 0) { dev = re->zones[nzones]->device; BUG_ON(dev == NULL); /* ignore whether the entry was inserted */ radix_tree_delete(&dev->reada_extents, index); Loading @@ -459,18 +454,21 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } have_zone = 1; } spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace); if (!have_zone) goto error; btrfs_put_bbio(bbio); return re; error: while (nzones) { for (nzones = 0; nzones < re->nzones; ++nzones) { struct reada_zone *zone; --nzones; zone = re->zones[nzones]; kref_get(&zone->refcnt); spin_lock(&zone->lock); Loading Loading @@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } if (re->scheduled_for) atomic_dec(&re->scheduled_for->reada_in_flight); kfree(re); } Loading @@ -556,13 +552,13 @@ static void reada_control_release(struct kref *kref) } static int reada_add_block(struct reada_control *rc, u64 logical, struct btrfs_key *top, int level, u64 generation) struct btrfs_key *top, u64 generation) { struct btrfs_root *root = rc->root; struct reada_extent *re; struct reada_extctl *rec; re = reada_find_extent(root, logical, top, level); /* takes one ref */ re = reada_find_extent(root, logical, top); /* takes one ref */ if (!re) return -1; Loading Loading @@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, u64 logical; int ret; int i; int need_kick = 0; spin_lock(&fs_info->reada_lock); if (dev->reada_curr_zone == NULL) { Loading @@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, */ ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, dev->reada_next >> PAGE_CACHE_SHIFT, 1); if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { if (ret == 0 || re->logical > dev->reada_curr_zone->end) { ret = reada_pick_zone(dev); if (!ret) { spin_unlock(&fs_info->reada_lock); Loading @@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->reada_lock); spin_lock(&re->lock); if (re->scheduled || list_empty(&re->extctl)) { spin_unlock(&re->lock); reada_extent_put(fs_info, re); return 0; } re->scheduled = 1; spin_unlock(&re->lock); /* * find mirror num */ Loading @@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, } logical = re->logical; spin_lock(&re->lock); if (re->scheduled_for == NULL) { re->scheduled_for = dev; need_kick = 1; } spin_unlock(&re->lock); reada_extent_put(fs_info, re); if (!need_kick) return 0; atomic_inc(&dev->reada_in_flight); ret = reada_tree_block_flagged(fs_info->extent_root, logical, mirror_num, &eb); if (ret) __readahead_hook(fs_info->extent_root, NULL, logical, ret); __readahead_hook(fs_info, re, NULL, logical, ret); else if (eb) __readahead_hook(fs_info->extent_root, eb, eb->start, ret); __readahead_hook(fs_info, re, eb, eb->start, ret); if (eb) free_extent_buffer(eb); atomic_dec(&dev->reada_in_flight); reada_extent_put(fs_info, re); return 1; } Loading @@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work) set_task_ioprio(current, BTRFS_IOPRIO_READA); __reada_start_machine(fs_info); set_task_ioprio(current, old_ioprio); atomic_dec(&fs_info->reada_works_cnt); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) Loading Loading @@ -783,8 +780,12 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) * enqueue to workers to finish it. This will distribute the load to * the cores. */ for (i = 0; i < 2; ++i) for (i = 0; i < 2; ++i) { reada_start_machine(fs_info); if (atomic_read(&fs_info->reada_works_cnt) > BTRFS_MAX_MIRRORS * 2) break; } } static void reada_start_machine(struct btrfs_fs_info *fs_info) Loading @@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); atomic_inc(&fs_info->reada_works_cnt); } #ifdef DEBUG Loading Loading @@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) if (ret == 0) break; printk(KERN_DEBUG " re: logical %llu size %u empty %d for %lld", " re: logical %llu size %u empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, list_empty(&re->extctl), re->scheduled_for ? re->scheduled_for->devid : -1); list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", Loading @@ -878,19 +879,14 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) index, 1); if (ret == 0) break; if (!re->scheduled_for) { if (!re->scheduled) { index = (re->logical >> PAGE_CACHE_SHIFT) + 1; continue; } printk(KERN_DEBUG "re: logical %llu size %u list empty %d for %lld", "re: logical %llu size %u list empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, list_empty(&re->extctl), re->scheduled_for ? re->scheduled_for->devid : -1); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", re->zones[i]->start, re->zones[i]->end); list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", re->zones[i]->start, Loading @@ -900,7 +896,6 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) re->zones[i]->devs[j]->devid); } } } printk(KERN_CONT "\n"); index = (re->logical >> PAGE_CACHE_SHIFT) + 1; } Loading @@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct reada_control *rc; u64 start; u64 generation; int level; int ret; struct extent_buffer *node; static struct btrfs_key max_key = { Loading @@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, node = btrfs_root_node(root); start = node->start; level = btrfs_header_level(node); generation = btrfs_header_generation(node); free_extent_buffer(node); ret = reada_add_block(rc, start, &max_key, level, generation); ret = reada_add_block(rc, start, &max_key, generation); if (ret) { kfree(rc); return ERR_PTR(ret); Loading @@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; struct btrfs_fs_info *fs_info = rc->root->fs_info; while (atomic_read(&rc->elems)) { if (!atomic_read(&fs_info->reada_works_cnt)) reada_start_machine(fs_info); wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 5 * HZ); dump_devs(rc->root->fs_info, Loading @@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle) int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; struct btrfs_fs_info *fs_info = rc->root->fs_info; while (atomic_read(&rc->elems)) { wait_event(rc->wait, atomic_read(&rc->elems) == 0); if (!atomic_read(&fs_info->reada_works_cnt)) reada_start_machine(fs_info); wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, (HZ + 9) / 10); } kref_put(&rc->refcnt, reada_control_release); Loading Loading
fs/btrfs/ctree.h +5 −2 Original line number Diff line number Diff line Loading @@ -1825,6 +1825,9 @@ struct btrfs_fs_info { spinlock_t reada_lock; struct radix_tree_root reada_tree; /* readahead works cnt */ atomic_t reada_works_cnt; /* Extent buffer radix tree */ spinlock_t buffer_lock; struct radix_tree_root buffer_radix; Loading Loading @@ -4563,8 +4566,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); int btree_readahead_hook(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, u64 start, int err); static inline int is_fstree(u64 rootid) { Loading
fs/btrfs/disk-io.c +12 −11 Original line number Diff line number Diff line Loading @@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, int found_level; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int reads_done; Loading @@ -637,20 +638,20 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", btrfs_err_rl(fs_info, "bad tree block start %llu %llu", found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root->fs_info, eb)) { btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", if (check_tree_block_fsid(fs_info, eb)) { btrfs_err_rl(fs_info, "bad fsid on block %llu", eb->start); ret = -EIO; goto err; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { btrfs_err(root->fs_info, "bad tree block level %d", btrfs_err(fs_info, "bad tree block level %d", (int)btrfs_header_level(eb)); ret = -EIO; goto err; Loading @@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); ret = csum_tree_block(root->fs_info, eb, 1); ret = csum_tree_block(fs_info, eb, 1); if (ret) { ret = -EIO; goto err; Loading @@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(root, eb, eb->start, ret); btree_readahead_hook(fs_info, eb, eb->start, ret); if (ret) { /* Loading @@ -699,14 +700,13 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, static int btree_io_failed_hook(struct page *page, int failed_mirror) { struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(root, eb, eb->start, -EIO); btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); return -EIO; /* we fixed nothing */ } Loading Loading @@ -2604,6 +2604,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; Loading
fs/btrfs/reada.c +124 −124 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ struct reada_extent { spinlock_t lock; struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; struct btrfs_device *scheduled_for; int scheduled; }; struct reada_zone { Loading Loading @@ -101,11 +101,12 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); static void __reada_start_machine(struct btrfs_fs_info *fs_info); static int reada_add_block(struct reada_control *rc, u64 logical, struct btrfs_key *top, int level, u64 generation); struct btrfs_key *top, u64 generation); /* recurses */ /* in case of err, eb might be NULL */ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, static void __readahead_hook(struct btrfs_fs_info *fs_info, struct reada_extent *re, struct extent_buffer *eb, u64 start, int err) { int level = 0; Loading @@ -113,55 +114,40 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, int i; u64 bytenr; u64 generation; struct reada_extent *re; struct btrfs_fs_info *fs_info = root->fs_info; struct list_head list; unsigned long index = start >> PAGE_CACHE_SHIFT; struct btrfs_device *for_dev; if (eb) level = btrfs_header_level(eb); /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) return -1; spin_lock(&re->lock); /* * just take the full list from the extent. afterwards we * don't need the lock anymore */ list_replace_init(&re->extctl, &list); for_dev = re->scheduled_for; re->scheduled_for = NULL; re->scheduled = 0; spin_unlock(&re->lock); if (err == 0) { nritems = level ? btrfs_header_nritems(eb) : 0; generation = btrfs_header_generation(eb); /* * FIXME: currently we just set nritems to 0 if this is a leaf, * effectively ignoring the content. In a next step we could * trigger more readahead depending from the content, e.g. * fetch the checksums for the extents in the leaf. */ } else { /* * this is the error case, the extent buffer has not been * read correctly. We won't access anything from it and * just cleanup our data structures. Effectively this will * cut the branch below this node from read ahead. */ nritems = 0; generation = 0; } if (err) goto cleanup; /* * FIXME: currently we just set nritems to 0 if this is a leaf, * effectively ignoring the content. In a next step we could * trigger more readahead depending from the content, e.g. * fetch the checksums for the extents in the leaf. */ if (!level) goto cleanup; nritems = btrfs_header_nritems(eb); generation = btrfs_header_generation(eb); for (i = 0; i < nritems; i++) { struct reada_extctl *rec; u64 n_gen; Loading @@ -188,7 +174,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, */ #ifdef DEBUG if (rec->generation != generation) { btrfs_debug(root->fs_info, btrfs_debug(fs_info, "generation mismatch for (%llu,%d,%llu) %llu != %llu", key.objectid, key.type, key.offset, rec->generation, generation); Loading @@ -197,10 +183,11 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, if (rec->generation == generation && btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) reada_add_block(rc, bytenr, &next_key, level - 1, n_gen); reada_add_block(rc, bytenr, &next_key, n_gen); } } cleanup: /* * free extctl records */ Loading @@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, reada_extent_put(fs_info, re); /* one ref for each entry */ } reada_extent_put(fs_info, re); /* our ref */ if (for_dev) atomic_dec(&for_dev->reada_in_flight); return 0; return; } /* * start is passed separately in case eb in NULL, which may be the case with * failed I/O */ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err) int btree_readahead_hook(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, u64 start, int err) { int ret; int ret = 0; struct reada_extent *re; ret = __readahead_hook(root, eb, start, err); /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, start >> PAGE_CACHE_SHIFT); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) { ret = -1; goto start_machine; } reada_start_machine(root->fs_info); __readahead_hook(fs_info, re, eb, start, err); reada_extent_put(fs_info, re); /* our ref */ start_machine: reada_start_machine(fs_info); return ret; } Loading @@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, logical >> PAGE_CACHE_SHIFT, 1); if (ret == 1) if (ret == 1 && logical >= zone->start && logical <= zone->end) { kref_get(&zone->refcnt); spin_unlock(&fs_info->reada_lock); if (ret == 1) { if (logical >= zone->start && logical < zone->end) return zone; spin_lock(&fs_info->reada_lock); kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } spin_unlock(&fs_info->reada_lock); cache = btrfs_lookup_block_group(fs_info, logical); if (!cache) return NULL; Loading Loading @@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, kfree(zone); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, logical >> PAGE_CACHE_SHIFT, 1); if (ret == 1) if (ret == 1 && logical >= zone->start && logical <= zone->end) kref_get(&zone->refcnt); else zone = NULL; } spin_unlock(&fs_info->reada_lock); Loading @@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, static struct reada_extent *reada_find_extent(struct btrfs_root *root, u64 logical, struct btrfs_key *top, int level) struct btrfs_key *top) { int ret; struct reada_extent *re = NULL; Loading @@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, u64 length; int real_stripes; int nzones = 0; int i; unsigned long index = logical >> PAGE_CACHE_SHIFT; int dev_replace_is_ongoing; int have_zone = 0; spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); Loading Loading @@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct reada_zone *zone; dev = bbio->stripes[nzones].dev; /* cannot read ahead on missing device. */ if (!dev->bdev) continue; zone = reada_find_zone(fs_info, dev, logical, bbio); if (!zone) break; continue; re->zones[nzones] = zone; re->zones[re->nzones++] = zone; spin_lock(&zone->lock); if (!zone->elems) kref_get(&zone->refcnt); Loading @@ -389,8 +390,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } re->nzones = nzones; if (nzones == 0) { if (re->nzones == 0) { /* not a single zone found, error and out */ goto error; } Loading @@ -415,8 +415,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, prev_dev = NULL; dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( &fs_info->dev_replace); for (i = 0; i < nzones; ++i) { dev = bbio->stripes[i].dev; for (nzones = 0; nzones < re->nzones; ++nzones) { dev = re->zones[nzones]->device; if (dev == prev_dev) { /* * in case of DUP, just add the first zone. As both Loading @@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, */ continue; } if (!dev->bdev) { /* * cannot read ahead on missing device, but for RAID5/6, * REQ_GET_READ_MIRRORS return 1. So don't skip missing * device for such case. */ if (nzones > 1) if (!dev->bdev) continue; } if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { /* Loading @@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { while (--i >= 0) { dev = bbio->stripes[i].dev; while (--nzones >= 0) { dev = re->zones[nzones]->device; BUG_ON(dev == NULL); /* ignore whether the entry was inserted */ radix_tree_delete(&dev->reada_extents, index); Loading @@ -459,18 +454,21 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } have_zone = 1; } spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace); if (!have_zone) goto error; btrfs_put_bbio(bbio); return re; error: while (nzones) { for (nzones = 0; nzones < re->nzones; ++nzones) { struct reada_zone *zone; --nzones; zone = re->zones[nzones]; kref_get(&zone->refcnt); spin_lock(&zone->lock); Loading Loading @@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } if (re->scheduled_for) atomic_dec(&re->scheduled_for->reada_in_flight); kfree(re); } Loading @@ -556,13 +552,13 @@ static void reada_control_release(struct kref *kref) } static int reada_add_block(struct reada_control *rc, u64 logical, struct btrfs_key *top, int level, u64 generation) struct btrfs_key *top, u64 generation) { struct btrfs_root *root = rc->root; struct reada_extent *re; struct reada_extctl *rec; re = reada_find_extent(root, logical, top, level); /* takes one ref */ re = reada_find_extent(root, logical, top); /* takes one ref */ if (!re) return -1; Loading Loading @@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, u64 logical; int ret; int i; int need_kick = 0; spin_lock(&fs_info->reada_lock); if (dev->reada_curr_zone == NULL) { Loading @@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, */ ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, dev->reada_next >> PAGE_CACHE_SHIFT, 1); if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { if (ret == 0 || re->logical > dev->reada_curr_zone->end) { ret = reada_pick_zone(dev); if (!ret) { spin_unlock(&fs_info->reada_lock); Loading @@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->reada_lock); spin_lock(&re->lock); if (re->scheduled || list_empty(&re->extctl)) { spin_unlock(&re->lock); reada_extent_put(fs_info, re); return 0; } re->scheduled = 1; spin_unlock(&re->lock); /* * find mirror num */ Loading @@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, } logical = re->logical; spin_lock(&re->lock); if (re->scheduled_for == NULL) { re->scheduled_for = dev; need_kick = 1; } spin_unlock(&re->lock); reada_extent_put(fs_info, re); if (!need_kick) return 0; atomic_inc(&dev->reada_in_flight); ret = reada_tree_block_flagged(fs_info->extent_root, logical, mirror_num, &eb); if (ret) __readahead_hook(fs_info->extent_root, NULL, logical, ret); __readahead_hook(fs_info, re, NULL, logical, ret); else if (eb) __readahead_hook(fs_info->extent_root, eb, eb->start, ret); __readahead_hook(fs_info, re, eb, eb->start, ret); if (eb) free_extent_buffer(eb); atomic_dec(&dev->reada_in_flight); reada_extent_put(fs_info, re); return 1; } Loading @@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work) set_task_ioprio(current, BTRFS_IOPRIO_READA); __reada_start_machine(fs_info); set_task_ioprio(current, old_ioprio); atomic_dec(&fs_info->reada_works_cnt); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) Loading Loading @@ -783,8 +780,12 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) * enqueue to workers to finish it. This will distribute the load to * the cores. */ for (i = 0; i < 2; ++i) for (i = 0; i < 2; ++i) { reada_start_machine(fs_info); if (atomic_read(&fs_info->reada_works_cnt) > BTRFS_MAX_MIRRORS * 2) break; } } static void reada_start_machine(struct btrfs_fs_info *fs_info) Loading @@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); atomic_inc(&fs_info->reada_works_cnt); } #ifdef DEBUG Loading Loading @@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) if (ret == 0) break; printk(KERN_DEBUG " re: logical %llu size %u empty %d for %lld", " re: logical %llu size %u empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, list_empty(&re->extctl), re->scheduled_for ? re->scheduled_for->devid : -1); list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", Loading @@ -878,19 +879,14 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) index, 1); if (ret == 0) break; if (!re->scheduled_for) { if (!re->scheduled) { index = (re->logical >> PAGE_CACHE_SHIFT) + 1; continue; } printk(KERN_DEBUG "re: logical %llu size %u list empty %d for %lld", "re: logical %llu size %u list empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, list_empty(&re->extctl), re->scheduled_for ? re->scheduled_for->devid : -1); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", re->zones[i]->start, re->zones[i]->end); list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", re->zones[i]->start, Loading @@ -900,7 +896,6 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) re->zones[i]->devs[j]->devid); } } } printk(KERN_CONT "\n"); index = (re->logical >> PAGE_CACHE_SHIFT) + 1; } Loading @@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct reada_control *rc; u64 start; u64 generation; int level; int ret; struct extent_buffer *node; static struct btrfs_key max_key = { Loading @@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, node = btrfs_root_node(root); start = node->start; level = btrfs_header_level(node); generation = btrfs_header_generation(node); free_extent_buffer(node); ret = reada_add_block(rc, start, &max_key, level, generation); ret = reada_add_block(rc, start, &max_key, generation); if (ret) { kfree(rc); return ERR_PTR(ret); Loading @@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; struct btrfs_fs_info *fs_info = rc->root->fs_info; while (atomic_read(&rc->elems)) { if (!atomic_read(&fs_info->reada_works_cnt)) reada_start_machine(fs_info); wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 5 * HZ); dump_devs(rc->root->fs_info, Loading @@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle) int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; struct btrfs_fs_info *fs_info = rc->root->fs_info; while (atomic_read(&rc->elems)) { wait_event(rc->wait, atomic_read(&rc->elems) == 0); if (!atomic_read(&fs_info->reada_works_cnt)) reada_start_machine(fs_info); wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, (HZ + 9) / 10); } kref_put(&rc->refcnt, reada_control_release); Loading