Loading drivers/md/md.c +54 −20 Original line number Diff line number Diff line Loading @@ -2482,8 +2482,7 @@ static int add_bound_rdev(struct md_rdev *rdev) if (add_journal) mddev_resume(mddev); if (err) { unbind_rdev_from_array(rdev); export_rdev(rdev); md_kick_rdev_from_array(rdev); return err; } } Loading Loading @@ -2600,6 +2599,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) else err = -EBUSY; } else if (cmd_match(buf, "remove")) { if (rdev->mddev->pers) { clear_bit(Blocked, &rdev->flags); remove_and_add_spares(rdev->mddev, rdev); } if (rdev->raid_disk >= 0) err = -EBUSY; else { Loading Loading @@ -3176,8 +3179,7 @@ int md_rdev_init(struct md_rdev *rdev) rdev->data_offset = 0; rdev->new_data_offset = 0; rdev->sb_events = 0; rdev->last_read_error.tv_sec = 0; rdev->last_read_error.tv_nsec = 0; rdev->last_read_error = 0; rdev->sb_loaded = 0; rdev->bb_page = NULL; atomic_set(&rdev->nr_pending, 0); Loading Loading @@ -3583,6 +3585,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->to_remove = &md_redundancy_group; } module_put(oldpers->owner); rdev_for_each(rdev, mddev) { if (rdev->raid_disk < 0) continue; Loading Loading @@ -3940,6 +3944,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) } else err = -EBUSY; } if (!err) sysfs_notify_dirent_safe(mddev->sysfs_state); spin_unlock(&mddev->lock); return err ?: len; } Loading Loading @@ -4191,6 +4197,7 @@ size_store(struct mddev *mddev, const char *buf, size_t len) return err; if (mddev->pers) { err = update_size(mddev, sectors); if (err == 0) md_update_sb(mddev, 1); } else { if (mddev->dev_sectors == 0 || Loading Loading @@ -7813,6 +7820,7 @@ void md_do_sync(struct md_thread *thread) if (ret) goto skip; set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags); if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) || test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) Loading Loading @@ -8151,18 +8159,11 @@ void md_do_sync(struct md_thread *thread) } } skip: if (mddev_is_clustered(mddev) && ret == 0) { /* set CHANGE_PENDING here since maybe another * update is needed, so other nodes are informed */ /* set CHANGE_PENDING here since maybe another update is needed, * so other nodes are informed. It should be harmless for normal * raid */ set_mask_bits(&mddev->flags, 0, BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS)); md_wakeup_thread(mddev->thread); wait_event(mddev->sb_wait, !test_bit(MD_CHANGE_PENDING, &mddev->flags)); md_cluster_ops->resync_finish(mddev); } else set_bit(MD_CHANGE_DEVS, &mddev->flags); spin_lock(&mddev->lock); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { Loading @@ -8188,15 +8189,34 @@ static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *rdev; int spares = 0; int removed = 0; bool remove_some = false; rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev) { if ((this == NULL || rdev == this) && rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)==0) { /* Faulty non-Blocked devices with nr_pending == 0 * never get nr_pending incremented, * never get Faulty cleared, and never get Blocked set. * So we can synchronize_rcu now rather than once per device */ remove_some = true; set_bit(RemoveSynchronized, &rdev->flags); } } if (remove_some) synchronize_rcu(); rdev_for_each(rdev, mddev) { if ((this == NULL || rdev == this) && rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || ((test_bit(RemoveSynchronized, &rdev->flags) || (!test_bit(In_sync, &rdev->flags) && !test_bit(Journal, &rdev->flags))) && atomic_read(&rdev->nr_pending)==0) { atomic_read(&rdev->nr_pending)==0)) { if (mddev->pers->hot_remove_disk( mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); Loading @@ -8204,6 +8224,10 @@ static int remove_and_add_spares(struct mddev *mddev, removed++; } } if (remove_some && test_bit(RemoveSynchronized, &rdev->flags)) clear_bit(RemoveSynchronized, &rdev->flags); } if (removed && mddev->kobj.sd) sysfs_notify(&mddev->kobj, NULL, "degraded"); Loading Loading @@ -8506,6 +8530,11 @@ void md_reap_sync_thread(struct mddev *mddev) rdev->saved_raid_disk = -1; md_update_sb(mddev, 1); /* MD_CHANGE_PENDING should be cleared by md_update_sb, so we can * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by * clustered raid */ if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags)) md_cluster_ops->resync_finish(mddev); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); Loading Loading @@ -8803,6 +8832,7 @@ EXPORT_SYMBOL(md_reload_sb); * at boot time. */ static DEFINE_MUTEX(detected_devices_mutex); static LIST_HEAD(all_detected_devices); struct detected_devices_node { struct list_head list; Loading @@ -8816,7 +8846,9 @@ void md_autodetect_dev(dev_t dev) node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL); if (node_detected_dev) { node_detected_dev->dev = dev; mutex_lock(&detected_devices_mutex); list_add_tail(&node_detected_dev->list, &all_detected_devices); mutex_unlock(&detected_devices_mutex); } else { printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed" ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev)); Loading @@ -8835,6 +8867,7 @@ static void autostart_arrays(int part) printk(KERN_INFO "md: Autodetecting RAID arrays.\n"); mutex_lock(&detected_devices_mutex); while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) { i_scanned++; node_detected_dev = list_entry(all_detected_devices.next, Loading @@ -8853,6 +8886,7 @@ static void autostart_arrays(int part) list_add(&rdev->same_set, &pending_raid_disks); i_passed++; } mutex_unlock(&detected_devices_mutex); printk(KERN_INFO "md: Scanned %d and added %d devices.\n", i_scanned, i_passed); Loading drivers/md/md.h +9 −1 Original line number Diff line number Diff line Loading @@ -99,7 +99,7 @@ struct md_rdev { atomic_t read_errors; /* number of consecutive read errors that * we have tried to ignore. */ struct timespec last_read_error; /* monotonic time since our time64_t last_read_error; /* monotonic time since our * last read error */ atomic_t corrected_errors; /* number of corrected read errors, Loading Loading @@ -163,6 +163,11 @@ enum flag_bits { * than other devices in the array */ ClusterRemove, RemoveSynchronized, /* synchronize_rcu() was called after * this device was known to be faulty, * so it is safe to remove without * another synchronize_rcu() call. */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, Loading Loading @@ -204,6 +209,9 @@ struct mddev { #define MD_RELOAD_SB 7 /* Reload the superblock because another node * updated it. */ #define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node * already took resync lock, need to * release the lock */ int suspended; atomic_t active_io; Loading drivers/md/multipath.c +17 −12 Original line number Diff line number Diff line Loading @@ -43,7 +43,8 @@ static int multipath_map (struct mpconf *conf) rcu_read_lock(); for (i = 0; i < disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { if (rdev && test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); return i; Loading Loading @@ -148,10 +149,12 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev) seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", conf->multipaths[i].rdev && test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_"); rcu_read_lock(); for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); } rcu_read_unlock(); seq_printf (seq, "]"); } Loading Loading @@ -295,6 +298,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } p->rdev = NULL; if (!test_bit(RemoveSynchronized, &rdev->flags)) { synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { /* lost the race, try later */ Loading @@ -302,6 +306,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) p->rdev = rdev; goto abort; } } err = md_integrity_register(mddev); } abort: Loading drivers/md/raid1.c +67 −63 Original line number Diff line number Diff line Loading @@ -319,14 +319,13 @@ static void raid1_end_read_request(struct bio *bio) { int uptodate = !bio->bi_error; struct r1bio *r1_bio = bio->bi_private; int mirror; struct r1conf *conf = r1_bio->mddev->private; struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev; mirror = r1_bio->read_disk; /* * this branch is our 'one mirror IO has finished' event handler: */ update_head_pos(mirror, r1_bio); update_head_pos(r1_bio->read_disk, r1_bio); if (uptodate) set_bit(R1BIO_Uptodate, &r1_bio->state); Loading @@ -339,14 +338,14 @@ static void raid1_end_read_request(struct bio *bio) spin_lock_irqsave(&conf->device_lock, flags); if (r1_bio->mddev->degraded == conf->raid_disks || (r1_bio->mddev->degraded == conf->raid_disks-1 && test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) test_bit(In_sync, &rdev->flags))) uptodate = 1; spin_unlock_irqrestore(&conf->device_lock, flags); } if (uptodate) { raid_end_bio_io(r1_bio); rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev); } else { /* * oops, read error: Loading @@ -356,7 +355,7 @@ static void raid1_end_read_request(struct bio *bio) KERN_ERR "md/raid1:%s: %s: " "rescheduling sector %llu\n", mdname(conf->mddev), bdevname(conf->mirrors[mirror].rdev->bdev, bdevname(rdev->bdev, b), (unsigned long long)r1_bio->sector); set_bit(R1BIO_ReadError, &r1_bio->state); Loading Loading @@ -403,20 +402,18 @@ static void r1_bio_write_done(struct r1bio *r1_bio) static void raid1_end_write_request(struct bio *bio) { struct r1bio *r1_bio = bio->bi_private; int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); int behind = test_bit(R1BIO_BehindIO, &r1_bio->state); struct r1conf *conf = r1_bio->mddev->private; struct bio *to_put = NULL; mirror = find_bio_disk(r1_bio, bio); int mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[mirror].rdev; /* * 'one mirror IO has finished' event handler: */ if (bio->bi_error) { set_bit(WriteErrorSeen, &conf->mirrors[mirror].rdev->flags); if (!test_and_set_bit(WantReplacement, &conf->mirrors[mirror].rdev->flags)) set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & conf->mddev->recovery); Loading Loading @@ -445,13 +442,12 @@ static void raid1_end_write_request(struct bio *bio) * before rdev->recovery_offset, but for simplicity we don't * check this here. */ if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) && !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)) if (test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_Uptodate, &r1_bio->state); /* Maybe we can clear some bad blocks. */ if (is_badblock(conf->mirrors[mirror].rdev, r1_bio->sector, r1_bio->sectors, if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, &first_bad, &bad_sectors)) { r1_bio->bios[mirror] = IO_MADE_GOOD; set_bit(R1BIO_MadeGood, &r1_bio->state); Loading @@ -459,7 +455,7 @@ static void raid1_end_write_request(struct bio *bio) } if (behind) { if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); /* Loading @@ -483,8 +479,7 @@ static void raid1_end_write_request(struct bio *bio) } } if (r1_bio->bios[mirror] == NULL) rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev); /* * Let's see if all mirrored write operations have finished Loading Loading @@ -689,13 +684,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if (!rdev) goto retry; atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { /* cannot risk returning a device that failed * before we inc'ed nr_pending */ rdev_dec_pending(rdev, conf->mddev); goto retry; } sectors = best_good_sectors; if (conf->mirrors[best_disk].next_seq_sect != this_sector) Loading Loading @@ -1666,13 +1654,16 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } p->rdev = NULL; if (!test_bit(RemoveSynchronized, &rdev->flags)) { synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; goto abort; } else if (conf->mirrors[conf->raid_disks + number].rdev) { } } if (conf->mirrors[conf->raid_disks + number].rdev) { /* We just removed a device that is being replaced. * Move down the replacement. We drain all IO before * doing this to avoid confusion. Loading Loading @@ -1719,11 +1710,9 @@ static void end_sync_write(struct bio *bio) struct r1bio *r1_bio = bio->bi_private; struct mddev *mddev = r1_bio->mddev; struct r1conf *conf = mddev->private; int mirror=0; sector_t first_bad; int bad_sectors; mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; if (!uptodate) { sector_t sync_blocks = 0; Loading @@ -1736,16 +1725,12 @@ static void end_sync_write(struct bio *bio) s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); set_bit(WriteErrorSeen, &conf->mirrors[mirror].rdev->flags); if (!test_and_set_bit(WantReplacement, &conf->mirrors[mirror].rdev->flags)) set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & mddev->recovery); set_bit(R1BIO_WriteError, &r1_bio->state); } else if (is_badblock(conf->mirrors[mirror].rdev, r1_bio->sector, r1_bio->sectors, } else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, &first_bad, &bad_sectors) && !is_badblock(conf->mirrors[r1_bio->read_disk].rdev, r1_bio->sector, Loading Loading @@ -2072,29 +2057,30 @@ static void fix_read_error(struct r1conf *conf, int read_disk, s = PAGE_SIZE >> 9; do { /* Note: no rcu protection needed here * as this is synchronous in the raid1d thread * which is the thread that might remove * a device. If raid1d ever becomes multi-threaded.... */ sector_t first_bad; int bad_sectors; rdev = conf->mirrors[d].rdev; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && (test_bit(In_sync, &rdev->flags) || (!test_bit(Faulty, &rdev->flags) && rdev->recovery_offset >= sect + s)) && is_badblock(rdev, sect, s, &first_bad, &bad_sectors) == 0 && sync_page_io(rdev, sect, s<<9, &first_bad, &bad_sectors) == 0) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); if (sync_page_io(rdev, sect, s<<9, conf->tmppage, REQ_OP_READ, 0, false)) success = 1; else { rdev_dec_pending(rdev, mddev); if (success) break; } else rcu_read_unlock(); d++; if (d == conf->raid_disks * 2) d = 0; } } while (!success && d != read_disk); if (!success) { Loading @@ -2110,11 +2096,17 @@ static void fix_read_error(struct r1conf *conf, int read_disk, if (d==0) d = conf->raid_disks * 2; d--; rdev = conf->mirrors[d].rdev; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); r1_sync_page_io(rdev, sect, s, conf->tmppage, WRITE); rdev_dec_pending(rdev, mddev); } else rcu_read_unlock(); } d = start; while (d != read_disk) { Loading @@ -2122,9 +2114,12 @@ static void fix_read_error(struct r1conf *conf, int read_disk, if (d==0) d = conf->raid_disks * 2; d--; rdev = conf->mirrors[d].rdev; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); if (r1_sync_page_io(rdev, sect, s, conf->tmppage, READ)) { atomic_add(s, &rdev->corrected_errors); Loading @@ -2136,7 +2131,9 @@ static void fix_read_error(struct r1conf *conf, int read_disk, rdev->data_offset), bdevname(rdev->bdev, b)); } } rdev_dec_pending(rdev, mddev); } else rcu_read_unlock(); } sectors -= s; sect += s; Loading Loading @@ -2534,6 +2531,13 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, return sync_blocks; } /* * If there is non-resync activity waiting for a turn, then let it * though before starting on this new sync request. */ if (conf->nr_waiting) schedule_timeout_uninterruptible(1); /* we are incrementing sector_nr below. To be safe, we check against * sector_nr + two times RESYNC_SECTORS */ Loading Loading
drivers/md/md.c +54 −20 Original line number Diff line number Diff line Loading @@ -2482,8 +2482,7 @@ static int add_bound_rdev(struct md_rdev *rdev) if (add_journal) mddev_resume(mddev); if (err) { unbind_rdev_from_array(rdev); export_rdev(rdev); md_kick_rdev_from_array(rdev); return err; } } Loading Loading @@ -2600,6 +2599,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) else err = -EBUSY; } else if (cmd_match(buf, "remove")) { if (rdev->mddev->pers) { clear_bit(Blocked, &rdev->flags); remove_and_add_spares(rdev->mddev, rdev); } if (rdev->raid_disk >= 0) err = -EBUSY; else { Loading Loading @@ -3176,8 +3179,7 @@ int md_rdev_init(struct md_rdev *rdev) rdev->data_offset = 0; rdev->new_data_offset = 0; rdev->sb_events = 0; rdev->last_read_error.tv_sec = 0; rdev->last_read_error.tv_nsec = 0; rdev->last_read_error = 0; rdev->sb_loaded = 0; rdev->bb_page = NULL; atomic_set(&rdev->nr_pending, 0); Loading Loading @@ -3583,6 +3585,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->to_remove = &md_redundancy_group; } module_put(oldpers->owner); rdev_for_each(rdev, mddev) { if (rdev->raid_disk < 0) continue; Loading Loading @@ -3940,6 +3944,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) } else err = -EBUSY; } if (!err) sysfs_notify_dirent_safe(mddev->sysfs_state); spin_unlock(&mddev->lock); return err ?: len; } Loading Loading @@ -4191,6 +4197,7 @@ size_store(struct mddev *mddev, const char *buf, size_t len) return err; if (mddev->pers) { err = update_size(mddev, sectors); if (err == 0) md_update_sb(mddev, 1); } else { if (mddev->dev_sectors == 0 || Loading Loading @@ -7813,6 +7820,7 @@ void md_do_sync(struct md_thread *thread) if (ret) goto skip; set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags); if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) || test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) Loading Loading @@ -8151,18 +8159,11 @@ void md_do_sync(struct md_thread *thread) } } skip: if (mddev_is_clustered(mddev) && ret == 0) { /* set CHANGE_PENDING here since maybe another * update is needed, so other nodes are informed */ /* set CHANGE_PENDING here since maybe another update is needed, * so other nodes are informed. It should be harmless for normal * raid */ set_mask_bits(&mddev->flags, 0, BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS)); md_wakeup_thread(mddev->thread); wait_event(mddev->sb_wait, !test_bit(MD_CHANGE_PENDING, &mddev->flags)); md_cluster_ops->resync_finish(mddev); } else set_bit(MD_CHANGE_DEVS, &mddev->flags); spin_lock(&mddev->lock); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { Loading @@ -8188,15 +8189,34 @@ static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *rdev; int spares = 0; int removed = 0; bool remove_some = false; rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev) { if ((this == NULL || rdev == this) && rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)==0) { /* Faulty non-Blocked devices with nr_pending == 0 * never get nr_pending incremented, * never get Faulty cleared, and never get Blocked set. * So we can synchronize_rcu now rather than once per device */ remove_some = true; set_bit(RemoveSynchronized, &rdev->flags); } } if (remove_some) synchronize_rcu(); rdev_for_each(rdev, mddev) { if ((this == NULL || rdev == this) && rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || ((test_bit(RemoveSynchronized, &rdev->flags) || (!test_bit(In_sync, &rdev->flags) && !test_bit(Journal, &rdev->flags))) && atomic_read(&rdev->nr_pending)==0) { atomic_read(&rdev->nr_pending)==0)) { if (mddev->pers->hot_remove_disk( mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); Loading @@ -8204,6 +8224,10 @@ static int remove_and_add_spares(struct mddev *mddev, removed++; } } if (remove_some && test_bit(RemoveSynchronized, &rdev->flags)) clear_bit(RemoveSynchronized, &rdev->flags); } if (removed && mddev->kobj.sd) sysfs_notify(&mddev->kobj, NULL, "degraded"); Loading Loading @@ -8506,6 +8530,11 @@ void md_reap_sync_thread(struct mddev *mddev) rdev->saved_raid_disk = -1; md_update_sb(mddev, 1); /* MD_CHANGE_PENDING should be cleared by md_update_sb, so we can * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by * clustered raid */ if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags)) md_cluster_ops->resync_finish(mddev); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); Loading Loading @@ -8803,6 +8832,7 @@ EXPORT_SYMBOL(md_reload_sb); * at boot time. */ static DEFINE_MUTEX(detected_devices_mutex); static LIST_HEAD(all_detected_devices); struct detected_devices_node { struct list_head list; Loading @@ -8816,7 +8846,9 @@ void md_autodetect_dev(dev_t dev) node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL); if (node_detected_dev) { node_detected_dev->dev = dev; mutex_lock(&detected_devices_mutex); list_add_tail(&node_detected_dev->list, &all_detected_devices); mutex_unlock(&detected_devices_mutex); } else { printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed" ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev)); Loading @@ -8835,6 +8867,7 @@ static void autostart_arrays(int part) printk(KERN_INFO "md: Autodetecting RAID arrays.\n"); mutex_lock(&detected_devices_mutex); while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) { i_scanned++; node_detected_dev = list_entry(all_detected_devices.next, Loading @@ -8853,6 +8886,7 @@ static void autostart_arrays(int part) list_add(&rdev->same_set, &pending_raid_disks); i_passed++; } mutex_unlock(&detected_devices_mutex); printk(KERN_INFO "md: Scanned %d and added %d devices.\n", i_scanned, i_passed); Loading
drivers/md/md.h +9 −1 Original line number Diff line number Diff line Loading @@ -99,7 +99,7 @@ struct md_rdev { atomic_t read_errors; /* number of consecutive read errors that * we have tried to ignore. */ struct timespec last_read_error; /* monotonic time since our time64_t last_read_error; /* monotonic time since our * last read error */ atomic_t corrected_errors; /* number of corrected read errors, Loading Loading @@ -163,6 +163,11 @@ enum flag_bits { * than other devices in the array */ ClusterRemove, RemoveSynchronized, /* synchronize_rcu() was called after * this device was known to be faulty, * so it is safe to remove without * another synchronize_rcu() call. */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, Loading Loading @@ -204,6 +209,9 @@ struct mddev { #define MD_RELOAD_SB 7 /* Reload the superblock because another node * updated it. */ #define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node * already took resync lock, need to * release the lock */ int suspended; atomic_t active_io; Loading
drivers/md/multipath.c +17 −12 Original line number Diff line number Diff line Loading @@ -43,7 +43,8 @@ static int multipath_map (struct mpconf *conf) rcu_read_lock(); for (i = 0; i < disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { if (rdev && test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); return i; Loading Loading @@ -148,10 +149,12 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev) seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", conf->multipaths[i].rdev && test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_"); rcu_read_lock(); for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); } rcu_read_unlock(); seq_printf (seq, "]"); } Loading Loading @@ -295,6 +298,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } p->rdev = NULL; if (!test_bit(RemoveSynchronized, &rdev->flags)) { synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { /* lost the race, try later */ Loading @@ -302,6 +306,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) p->rdev = rdev; goto abort; } } err = md_integrity_register(mddev); } abort: Loading
drivers/md/raid1.c +67 −63 Original line number Diff line number Diff line Loading @@ -319,14 +319,13 @@ static void raid1_end_read_request(struct bio *bio) { int uptodate = !bio->bi_error; struct r1bio *r1_bio = bio->bi_private; int mirror; struct r1conf *conf = r1_bio->mddev->private; struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev; mirror = r1_bio->read_disk; /* * this branch is our 'one mirror IO has finished' event handler: */ update_head_pos(mirror, r1_bio); update_head_pos(r1_bio->read_disk, r1_bio); if (uptodate) set_bit(R1BIO_Uptodate, &r1_bio->state); Loading @@ -339,14 +338,14 @@ static void raid1_end_read_request(struct bio *bio) spin_lock_irqsave(&conf->device_lock, flags); if (r1_bio->mddev->degraded == conf->raid_disks || (r1_bio->mddev->degraded == conf->raid_disks-1 && test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) test_bit(In_sync, &rdev->flags))) uptodate = 1; spin_unlock_irqrestore(&conf->device_lock, flags); } if (uptodate) { raid_end_bio_io(r1_bio); rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev); } else { /* * oops, read error: Loading @@ -356,7 +355,7 @@ static void raid1_end_read_request(struct bio *bio) KERN_ERR "md/raid1:%s: %s: " "rescheduling sector %llu\n", mdname(conf->mddev), bdevname(conf->mirrors[mirror].rdev->bdev, bdevname(rdev->bdev, b), (unsigned long long)r1_bio->sector); set_bit(R1BIO_ReadError, &r1_bio->state); Loading Loading @@ -403,20 +402,18 @@ static void r1_bio_write_done(struct r1bio *r1_bio) static void raid1_end_write_request(struct bio *bio) { struct r1bio *r1_bio = bio->bi_private; int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); int behind = test_bit(R1BIO_BehindIO, &r1_bio->state); struct r1conf *conf = r1_bio->mddev->private; struct bio *to_put = NULL; mirror = find_bio_disk(r1_bio, bio); int mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[mirror].rdev; /* * 'one mirror IO has finished' event handler: */ if (bio->bi_error) { set_bit(WriteErrorSeen, &conf->mirrors[mirror].rdev->flags); if (!test_and_set_bit(WantReplacement, &conf->mirrors[mirror].rdev->flags)) set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & conf->mddev->recovery); Loading Loading @@ -445,13 +442,12 @@ static void raid1_end_write_request(struct bio *bio) * before rdev->recovery_offset, but for simplicity we don't * check this here. */ if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) && !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)) if (test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_Uptodate, &r1_bio->state); /* Maybe we can clear some bad blocks. */ if (is_badblock(conf->mirrors[mirror].rdev, r1_bio->sector, r1_bio->sectors, if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, &first_bad, &bad_sectors)) { r1_bio->bios[mirror] = IO_MADE_GOOD; set_bit(R1BIO_MadeGood, &r1_bio->state); Loading @@ -459,7 +455,7 @@ static void raid1_end_write_request(struct bio *bio) } if (behind) { if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); /* Loading @@ -483,8 +479,7 @@ static void raid1_end_write_request(struct bio *bio) } } if (r1_bio->bios[mirror] == NULL) rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev); /* * Let's see if all mirrored write operations have finished Loading Loading @@ -689,13 +684,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if (!rdev) goto retry; atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { /* cannot risk returning a device that failed * before we inc'ed nr_pending */ rdev_dec_pending(rdev, conf->mddev); goto retry; } sectors = best_good_sectors; if (conf->mirrors[best_disk].next_seq_sect != this_sector) Loading Loading @@ -1666,13 +1654,16 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } p->rdev = NULL; if (!test_bit(RemoveSynchronized, &rdev->flags)) { synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; goto abort; } else if (conf->mirrors[conf->raid_disks + number].rdev) { } } if (conf->mirrors[conf->raid_disks + number].rdev) { /* We just removed a device that is being replaced. * Move down the replacement. We drain all IO before * doing this to avoid confusion. Loading Loading @@ -1719,11 +1710,9 @@ static void end_sync_write(struct bio *bio) struct r1bio *r1_bio = bio->bi_private; struct mddev *mddev = r1_bio->mddev; struct r1conf *conf = mddev->private; int mirror=0; sector_t first_bad; int bad_sectors; mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; if (!uptodate) { sector_t sync_blocks = 0; Loading @@ -1736,16 +1725,12 @@ static void end_sync_write(struct bio *bio) s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); set_bit(WriteErrorSeen, &conf->mirrors[mirror].rdev->flags); if (!test_and_set_bit(WantReplacement, &conf->mirrors[mirror].rdev->flags)) set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & mddev->recovery); set_bit(R1BIO_WriteError, &r1_bio->state); } else if (is_badblock(conf->mirrors[mirror].rdev, r1_bio->sector, r1_bio->sectors, } else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, &first_bad, &bad_sectors) && !is_badblock(conf->mirrors[r1_bio->read_disk].rdev, r1_bio->sector, Loading Loading @@ -2072,29 +2057,30 @@ static void fix_read_error(struct r1conf *conf, int read_disk, s = PAGE_SIZE >> 9; do { /* Note: no rcu protection needed here * as this is synchronous in the raid1d thread * which is the thread that might remove * a device. If raid1d ever becomes multi-threaded.... */ sector_t first_bad; int bad_sectors; rdev = conf->mirrors[d].rdev; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && (test_bit(In_sync, &rdev->flags) || (!test_bit(Faulty, &rdev->flags) && rdev->recovery_offset >= sect + s)) && is_badblock(rdev, sect, s, &first_bad, &bad_sectors) == 0 && sync_page_io(rdev, sect, s<<9, &first_bad, &bad_sectors) == 0) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); if (sync_page_io(rdev, sect, s<<9, conf->tmppage, REQ_OP_READ, 0, false)) success = 1; else { rdev_dec_pending(rdev, mddev); if (success) break; } else rcu_read_unlock(); d++; if (d == conf->raid_disks * 2) d = 0; } } while (!success && d != read_disk); if (!success) { Loading @@ -2110,11 +2096,17 @@ static void fix_read_error(struct r1conf *conf, int read_disk, if (d==0) d = conf->raid_disks * 2; d--; rdev = conf->mirrors[d].rdev; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); r1_sync_page_io(rdev, sect, s, conf->tmppage, WRITE); rdev_dec_pending(rdev, mddev); } else rcu_read_unlock(); } d = start; while (d != read_disk) { Loading @@ -2122,9 +2114,12 @@ static void fix_read_error(struct r1conf *conf, int read_disk, if (d==0) d = conf->raid_disks * 2; d--; rdev = conf->mirrors[d].rdev; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); if (r1_sync_page_io(rdev, sect, s, conf->tmppage, READ)) { atomic_add(s, &rdev->corrected_errors); Loading @@ -2136,7 +2131,9 @@ static void fix_read_error(struct r1conf *conf, int read_disk, rdev->data_offset), bdevname(rdev->bdev, b)); } } rdev_dec_pending(rdev, mddev); } else rcu_read_unlock(); } sectors -= s; sect += s; Loading Loading @@ -2534,6 +2531,13 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, return sync_blocks; } /* * If there is non-resync activity waiting for a turn, then let it * though before starting on this new sync request. */ if (conf->nr_waiting) schedule_timeout_uninterruptible(1); /* we are incrementing sector_nr below. To be safe, we check against * sector_nr + two times RESYNC_SECTORS */ Loading