]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'for-linus' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 8 Jan 2009 22:03:34 +0000 (14:03 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 8 Jan 2009 22:03:34 +0000 (14:03 -0800)
* 'for-linus' of git://neil.brown.name/md:
  md: don't retry recovery of raid1 that fails due to error on source drive.
  md: Allow md devices to be created by name.
  md: make devices disappear when they are no longer needed.
  md: centralise all freeing of an 'mddev' in 'md_free'
  md: move allocation of ->queue from mddev_find to md_probe
  md: need another print_sb for mdp_superblock_1
  md: use list_for_each_entry macro directly
  md: raid0: make hash_spacing and preshift sector-based.
  md: raid0: Represent the size of strip zones in sectors.
  md: raid0 create_strip_zones(): Add KERN_INFO/KERN_ERR to printk's.
  md: raid0 create_strip_zones(): Make two local variables sector-based.
  md: raid0: Represent zone->zone_offset in sectors.
  md: raid0: Represent device offset in sectors.
  md: raid0_make_request(): Replace local variable block by sector.
  md: raid0_make_request(): Remove local variable chunk_size.
  md: raid0_make_request(): Replace chunksize_bits by chunksect_bits.
  md: use sysfs_notify_dirent to notify changes to md/sync_action.
  md: fix bitmap-on-external-file bug.

13 files changed:
drivers/md/bitmap.c
drivers/md/faulty.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
fs/block_dev.c
include/linux/raid/md_k.h
include/linux/raid/md_p.h
include/linux/raid/raid0.h

index ab7c8e4a61f943c516ae5a3534e8775c33180f6b..719943763391263c7a4ab98ad0201244ce1d3d62 100644 (file)
@@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
        /* choose a good rdev and read the page from there */
 
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        sector_t target;
 
        if (!page)
@@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
        if (!page)
                return ERR_PTR(-ENOMEM);
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (! test_bit(In_sync, &rdev->flags)
                    || test_bit(Faulty, &rdev->flags))
                        continue;
@@ -964,9 +963,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
                                 */
                                page = bitmap->sb_page;
                                offset = sizeof(bitmap_super_t);
-                               read_sb_page(bitmap->mddev, bitmap->offset,
-                                            page,
-                                            index, count);
+                               if (!file)
+                                       read_sb_page(bitmap->mddev,
+                                                    bitmap->offset,
+                                                    page,
+                                                    index, count);
                        } else if (file) {
                                page = read_page(file, index, bitmap, count);
                                offset = 0;
index f26c1f9a475b8d9cbad5f885016e50723b580cd5..86d9adf90e791857efdf674117168398b40735a1 100644 (file)
@@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
 static int run(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        int i;
 
        conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL);
@@ -296,7 +295,7 @@ static int run(mddev_t *mddev)
        }
        conf->nfaults = 0;
 
-       rdev_for_each(rdev, tmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                conf->rdev = rdev;
 
        mddev->array_sectors = mddev->size * 2;
index 3b90c5c924ecc25a33ba4268d22d65b74142fece..1e3aea9eecf18009f4ff0bc2e4c474914e1bcf74 100644 (file)
@@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
        int i, nb_zone, cnt;
        sector_t min_sectors;
        sector_t curr_sector;
-       struct list_head *tmp;
 
        conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
                        GFP_KERNEL);
@@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
        cnt = 0;
        conf->array_sectors = 0;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                int j = rdev->raid_disk;
                dev_info_t *disk = conf->disks + j;
 
index 1b1d32694f6fc8eddf7a8c4cccc6d836b5e88054..41e2509bf896c9356f9f9bd55f93f811120d7639 100644 (file)
@@ -214,20 +214,33 @@ static inline mddev_t *mddev_get(mddev_t *mddev)
        return mddev;
 }
 
+static void mddev_delayed_delete(struct work_struct *ws)
+{
+       mddev_t *mddev = container_of(ws, mddev_t, del_work);
+       kobject_del(&mddev->kobj);
+       kobject_put(&mddev->kobj);
+}
+
 static void mddev_put(mddev_t *mddev)
 {
        if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
                return;
-       if (!mddev->raid_disks && list_empty(&mddev->disks)) {
+       if (!mddev->raid_disks && list_empty(&mddev->disks) &&
+           !mddev->hold_active) {
                list_del(&mddev->all_mddevs);
-               spin_unlock(&all_mddevs_lock);
-               blk_cleanup_queue(mddev->queue);
-               if (mddev->sysfs_state)
-                       sysfs_put(mddev->sysfs_state);
-               mddev->sysfs_state = NULL;
-               kobject_put(&mddev->kobj);
-       } else
-               spin_unlock(&all_mddevs_lock);
+               if (mddev->gendisk) {
+                       /* we did a probe so need to clean up.
+                        * Call schedule_work inside the spinlock
+                        * so that flush_scheduled_work() after
+                        * mddev_find will succeed in waiting for the
+                        * work to be done.
+                        */
+                       INIT_WORK(&mddev->del_work, mddev_delayed_delete);
+                       schedule_work(&mddev->del_work);
+               } else
+                       kfree(mddev);
+       }
+       spin_unlock(&all_mddevs_lock);
 }
 
 static mddev_t * mddev_find(dev_t unit)
@@ -236,15 +249,50 @@ static mddev_t * mddev_find(dev_t unit)
 
  retry:
        spin_lock(&all_mddevs_lock);
-       list_for_each_entry(mddev, &all_mddevs, all_mddevs)
-               if (mddev->unit == unit) {
-                       mddev_get(mddev);
+
+       if (unit) {
+               list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+                       if (mddev->unit == unit) {
+                               mddev_get(mddev);
+                               spin_unlock(&all_mddevs_lock);
+                               kfree(new);
+                               return mddev;
+                       }
+
+               if (new) {
+                       list_add(&new->all_mddevs, &all_mddevs);
                        spin_unlock(&all_mddevs_lock);
-                       kfree(new);
-                       return mddev;
+                       new->hold_active = UNTIL_IOCTL;
+                       return new;
                }
-
-       if (new) {
+       } else if (new) {
+               /* find an unused unit number */
+               static int next_minor = 512;
+               int start = next_minor;
+               int is_free = 0;
+               int dev = 0;
+               while (!is_free) {
+                       dev = MKDEV(MD_MAJOR, next_minor);
+                       next_minor++;
+                       if (next_minor > MINORMASK)
+                               next_minor = 0;
+                       if (next_minor == start) {
+                               /* Oh dear, all in use. */
+                               spin_unlock(&all_mddevs_lock);
+                               kfree(new);
+                               return NULL;
+                       }
+                               
+                       is_free = 1;
+                       list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+                               if (mddev->unit == dev) {
+                                       is_free = 0;
+                                       break;
+                               }
+               }
+               new->unit = dev;
+               new->md_minor = MINOR(dev);
+               new->hold_active = UNTIL_STOP;
                list_add(&new->all_mddevs, &all_mddevs);
                spin_unlock(&all_mddevs_lock);
                return new;
@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit)
        new->resync_max = MaxSector;
        new->level = LEVEL_NONE;
 
-       new->queue = blk_alloc_queue(GFP_KERNEL);
-       if (!new->queue) {
-               kfree(new);
-               return NULL;
-       }
-       /* Can be unlocked because the queue is new: no concurrency */
-       queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
-
-       blk_queue_make_request(new->queue, md_fail_request);
-
        goto retry;
 }
 
@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev)
 
 static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 {
-       mdk_rdev_t * rdev;
-       struct list_head *tmp;
+       mdk_rdev_t *rdev;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->desc_nr == nr)
                        return rdev;
-       }
+
        return NULL;
 }
 
 static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 {
-       struct list_head *tmp;
        mdk_rdev_t *rdev;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->bdev->bd_dev == dev)
                        return rdev;
-       }
+
        return NULL;
 }
 
@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        mdp_super_t *sb;
-       struct list_head *tmp;
        mdk_rdev_t *rdev2;
        int next_spare = mddev->raid_disks;
 
@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                sb->state |= (1<<MD_SB_BITMAP_PRESENT);
 
        sb->disks[0].state = (1<<MD_DISK_REMOVED);
-       rdev_for_each(rdev2, tmp, mddev) {
+       list_for_each_entry(rdev2, &mddev->disks, same_set) {
                mdp_disk_t *d;
                int desc_nr;
                if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        struct mdp_superblock_1 *sb;
-       struct list_head *tmp;
        mdk_rdev_t *rdev2;
        int max_dev, i;
        /* make rdev->sb match mddev and rdev data. */
@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        }
 
        max_dev = 0;
-       rdev_for_each(rdev2, tmp, mddev)
+       list_for_each_entry(rdev2, &mddev->disks, same_set)
                if (rdev2->desc_nr+1 > max_dev)
                        max_dev = rdev2->desc_nr+1;
 
@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        for (i=0; i<max_dev;i++)
                sb->dev_roles[i] = cpu_to_le16(0xfffe);
        
-       rdev_for_each(rdev2, tmp, mddev) {
+       list_for_each_entry(rdev2, &mddev->disks, same_set) {
                i = rdev2->desc_nr;
                if (test_bit(Faulty, &rdev2->flags))
                        sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 
        list_add_rcu(&rdev->same_set, &mddev->disks);
        bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+
+       /* May as well allow recovery to be retried once */
+       mddev->recovery_disabled = 0;
        return 0;
 
  fail:
@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev)
 
 static void export_array(mddev_t *mddev)
 {
-       struct list_head *tmp;
-       mdk_rdev_t *rdev;
+       mdk_rdev_t *rdev, *tmp;
 
        rdev_for_each(rdev, tmp, mddev) {
                if (!rdev->mddev) {
@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc)
                desc->major,desc->minor,desc->raid_disk,desc->state);
 }
 
-static void print_sb(mdp_super_t *sb)
+static void print_sb_90(mdp_super_t *sb)
 {
        int i;
 
@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb)
        }
        printk(KERN_INFO "md:     THIS: ");
        print_desc(&sb->this_disk);
-
 }
 
-static void print_rdev(mdk_rdev_t *rdev)
+static void print_sb_1(struct mdp_superblock_1 *sb)
+{
+       __u8 *uuid;
+
+       uuid = sb->set_uuid;
+       printk(KERN_INFO "md:  SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
+                       ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
+              KERN_INFO "md:    Name: \"%s\" CT:%llu\n",
+               le32_to_cpu(sb->major_version),
+               le32_to_cpu(sb->feature_map),
+               uuid[0], uuid[1], uuid[2], uuid[3],
+               uuid[4], uuid[5], uuid[6], uuid[7],
+               uuid[8], uuid[9], uuid[10], uuid[11],
+               uuid[12], uuid[13], uuid[14], uuid[15],
+               sb->set_name,
+               (unsigned long long)le64_to_cpu(sb->ctime)
+                      & MD_SUPERBLOCK_1_TIME_SEC_MASK);
+
+       uuid = sb->device_uuid;
+       printk(KERN_INFO "md:       L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
+                       " RO:%llu\n"
+              KERN_INFO "md:     Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+                       ":%02x%02x%02x%02x%02x%02x\n"
+              KERN_INFO "md:       (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
+              KERN_INFO "md:         (MaxDev:%u) \n",
+               le32_to_cpu(sb->level),
+               (unsigned long long)le64_to_cpu(sb->size),
+               le32_to_cpu(sb->raid_disks),
+               le32_to_cpu(sb->layout),
+               le32_to_cpu(sb->chunksize),
+               (unsigned long long)le64_to_cpu(sb->data_offset),
+               (unsigned long long)le64_to_cpu(sb->data_size),
+               (unsigned long long)le64_to_cpu(sb->super_offset),
+               (unsigned long long)le64_to_cpu(sb->recovery_offset),
+               le32_to_cpu(sb->dev_number),
+               uuid[0], uuid[1], uuid[2], uuid[3],
+               uuid[4], uuid[5], uuid[6], uuid[7],
+               uuid[8], uuid[9], uuid[10], uuid[11],
+               uuid[12], uuid[13], uuid[14], uuid[15],
+               sb->devflags,
+               (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
+               (unsigned long long)le64_to_cpu(sb->events),
+               (unsigned long long)le64_to_cpu(sb->resync_offset),
+               le32_to_cpu(sb->sb_csum),
+               le32_to_cpu(sb->max_dev)
+               );
+}
+
+static void print_rdev(mdk_rdev_t *rdev, int major_version)
 {
        char b[BDEVNAME_SIZE];
        printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev)
                test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
                rdev->desc_nr);
        if (rdev->sb_loaded) {
-               printk(KERN_INFO "md: rdev superblock:\n");
-               print_sb((mdp_super_t*)page_address(rdev->sb_page));
+               printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
+               switch (major_version) {
+               case 0:
+                       print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+                       break;
+               case 1:
+                       print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+                       break;
+               }
        } else
                printk(KERN_INFO "md: no rdev superblock!\n");
 }
 
 static void md_print_devices(void)
 {
-       struct list_head *tmp, *tmp2;
+       struct list_head *tmp;
        mdk_rdev_t *rdev;
        mddev_t *mddev;
        char b[BDEVNAME_SIZE];
@@ -1658,12 +1748,12 @@ static void md_print_devices(void)
                        bitmap_print_sb(mddev->bitmap);
                else
                        printk("%s: ", mdname(mddev));
-               rdev_for_each(rdev, tmp2, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        printk("<%s>", bdevname(rdev->bdev,b));
                printk("\n");
 
-               rdev_for_each(rdev, tmp2, mddev)
-                       print_rdev(rdev);
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       print_rdev(rdev, mddev->major_version);
        }
        printk("md:     **********************************\n");
        printk("\n");
@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares)
         * with the rest of the array)
         */
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (rdev->sb_events == mddev->events ||
                    (nospares &&
                     rdev->raid_disk < 0 &&
@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
 
 static void md_update_sb(mddev_t * mddev, int force_change)
 {
-       struct list_head *tmp;
        mdk_rdev_t *rdev;
        int sync_req;
        int nospares = 0;
@@ -1790,7 +1878,7 @@ repeat:
                mdname(mddev),mddev->in_sync);
 
        bitmap_update_sb(mddev->bitmap);
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                char b[BDEVNAME_SIZE];
                dprintk(KERN_INFO "md: ");
                if (rdev->sb_loaded != 1)
@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                md_wakeup_thread(rdev->mddev->thread);
        } else if (rdev->mddev->pers) {
                mdk_rdev_t *rdev2;
-               struct list_head *tmp;
                /* Activating a spare .. or possibly reactivating
                 * if we every get bitmaps working here.
                 */
@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                if (rdev->mddev->pers->hot_add_disk == NULL)
                        return -EINVAL;
 
-               rdev_for_each(rdev2, tmp, rdev->mddev)
+               list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
                        if (rdev2->raid_disk == slot)
                                return -EEXIST;
 
@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                 */
                mddev_t *mddev;
                int overlap = 0;
-               struct list_head *tmp, *tmp2;
+               struct list_head *tmp;
 
                mddev_unlock(my_mddev);
                for_each_mddev(mddev, tmp) {
                        mdk_rdev_t *rdev2;
 
                        mddev_lock(mddev);
-                       rdev_for_each(rdev2, tmp2, mddev)
+                       list_for_each_entry(rdev2, &mddev->disks, same_set)
                                if (test_bit(AllReserved, &rdev2->flags) ||
                                    (rdev->bdev == rdev2->bdev &&
                                     rdev != rdev2 &&
@@ -2328,8 +2415,7 @@ abort_free:
 static void analyze_sbs(mddev_t * mddev)
 {
        int i;
-       struct list_head *tmp;
-       mdk_rdev_t *rdev, *freshest;
+       mdk_rdev_t *rdev, *freshest, *tmp;
        char b[BDEVNAME_SIZE];
 
        freshest = NULL;
@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
        }
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
-       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+       sysfs_notify_dirent(mddev->sysfs_action);
        return len;
 }
 
@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
        rv = mddev_lock(mddev);
+       if (mddev->hold_active == UNTIL_IOCTL)
+               mddev->hold_active = 0;
        if (!rv) {
                rv = entry->store(mddev, page, length);
                mddev_unlock(mddev);
@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
 static void md_free(struct kobject *ko)
 {
        mddev_t *mddev = container_of(ko, mddev_t, kobj);
+
+       if (mddev->sysfs_state)
+               sysfs_put(mddev->sysfs_state);
+
+       if (mddev->gendisk) {
+               del_gendisk(mddev->gendisk);
+               put_disk(mddev->gendisk);
+       }
+       if (mddev->queue)
+               blk_cleanup_queue(mddev->queue);
+
        kfree(mddev);
 }
 
@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = {
 
 int mdp_major = 0;
 
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
+static int md_alloc(dev_t dev, char *name)
 {
        static DEFINE_MUTEX(disks_mutex);
        mddev_t *mddev = mddev_find(dev);
        struct gendisk *disk;
-       int partitioned = (MAJOR(dev) != MD_MAJOR);
-       int shift = partitioned ? MdpMinorShift : 0;
-       int unit = MINOR(dev) >> shift;
+       int partitioned;
+       int shift;
+       int unit;
        int error;
 
        if (!mddev)
-               return NULL;
+               return -ENODEV;
+
+       partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
+       shift = partitioned ? MdpMinorShift : 0;
+       unit = MINOR(mddev->unit) >> shift;
+
+       /* wait for any previous instance if this device
+        * to be completed removed (mddev_delayed_delete).
+        */
+       flush_scheduled_work();
 
        mutex_lock(&disks_mutex);
        if (mddev->gendisk) {
                mutex_unlock(&disks_mutex);
                mddev_put(mddev);
-               return NULL;
+               return -EEXIST;
+       }
+
+       if (name) {
+               /* Need to ensure that 'name' is not a duplicate.
+                */
+               mddev_t *mddev2;
+               spin_lock(&all_mddevs_lock);
+
+               list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
+                       if (mddev2->gendisk &&
+                           strcmp(mddev2->gendisk->disk_name, name) == 0) {
+                               spin_unlock(&all_mddevs_lock);
+                               return -EEXIST;
+                       }
+               spin_unlock(&all_mddevs_lock);
+       }
+
+       mddev->queue = blk_alloc_queue(GFP_KERNEL);
+       if (!mddev->queue) {
+               mutex_unlock(&disks_mutex);
+               mddev_put(mddev);
+               return -ENOMEM;
        }
+       /* Can be unlocked because the queue is new: no concurrency */
+       queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
+
+       blk_queue_make_request(mddev->queue, md_fail_request);
+
        disk = alloc_disk(1 << shift);
        if (!disk) {
                mutex_unlock(&disks_mutex);
+               blk_cleanup_queue(mddev->queue);
+               mddev->queue = NULL;
                mddev_put(mddev);
-               return NULL;
+               return -ENOMEM;
        }
-       disk->major = MAJOR(dev);
+       disk->major = MAJOR(mddev->unit);
        disk->first_minor = unit << shift;
-       if (partitioned)
+       if (name)
+               strcpy(disk->disk_name, name);
+       else if (partitioned)
                sprintf(disk->disk_name, "md_d%d", unit);
        else
                sprintf(disk->disk_name, "md%d", unit);
@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
        disk->private_data = mddev;
        disk->queue = mddev->queue;
        /* Allow extended partitions.  This makes the
-        * 'mdp' device redundant, but we can really
+        * 'mdp' device redundant, but we can't really
         * remove it now.
         */
        disk->flags |= GENHD_FL_EXT_DEVT;
@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
                kobject_uevent(&mddev->kobj, KOBJ_ADD);
                mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
        }
+       mddev_put(mddev);
+       return 0;
+}
+
+static struct kobject *md_probe(dev_t dev, int *part, void *data)
+{
+       md_alloc(dev, NULL);
        return NULL;
 }
 
+static int add_named_array(const char *val, struct kernel_param *kp)
+{
+       /* val must be "md_*" where * is not all digits.
+        * We allocate an array with a large free minor number, and
+        * set the name to val.  val must not already be an active name.
+        */
+       int len = strlen(val);
+       char buf[DISK_NAME_LEN];
+
+       while (len && val[len-1] == '\n')
+               len--;
+       if (len >= DISK_NAME_LEN)
+               return -E2BIG;
+       strlcpy(buf, val, len+1);
+       if (strncmp(buf, "md_", 3) != 0)
+               return -EINVAL;
+       return md_alloc(0, buf);
+}
+
 static void md_safemode_timeout(unsigned long data)
 {
        mddev_t *mddev = (mddev_t *) data;
@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev)
 {
        int err;
        int chunk_size;
-       struct list_head *tmp;
        mdk_rdev_t *rdev;
        struct gendisk *disk;
        struct mdk_personality *pers;
@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev)
                }
 
                /* devices must have minimum size of one chunk */
-               rdev_for_each(rdev, tmp, mddev) {
+               list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (test_bit(Faulty, &rdev->flags))
                                continue;
                        if (rdev->size < chunk_size / 1024) {
@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev)
         * the only valid external interface is through the md
         * device.
         */
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (test_bit(Faulty, &rdev->flags))
                        continue;
                sync_blockdev(rdev->bdev);
@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev)
                 */
                char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
                mdk_rdev_t *rdev2;
-               struct list_head *tmp2;
                int warned = 0;
-               rdev_for_each(rdev, tmp, mddev) {
-                       rdev_for_each(rdev2, tmp2, mddev) {
+
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       list_for_each_entry(rdev2, &mddev->disks, same_set) {
                                if (rdev < rdev2 &&
                                    rdev->bdev->bd_contains ==
                                    rdev2->bdev->bd_contains) {
@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev)
                                        warned = 1;
                                }
                        }
-               }
+
                if (warned)
                        printk(KERN_WARNING
                               "True protection against single-disk"
@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev)
                        printk(KERN_WARNING
                               "md: cannot register extra attributes for %s\n",
                               mdname(mddev));
+               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
        } else if (mddev->ro == 2) /* auto-readonly not meaningful */
                mddev->ro = 0;
 
@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev)
        mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
        mddev->in_sync = 1;
 
-       rdev_for_each(rdev, tmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk >= 0) {
                        char nm[20];
                        sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev)
         * it will remove the drives and not do the right thing
         */
        if (mddev->degraded && !mddev->sync_thread) {
-               struct list_head *rtmp;
                int spares = 0;
-               rdev_for_each(rdev, rtmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
                            !test_bit(Faulty, &rdev->flags))
@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev)
        mddev->changed = 1;
        md_new_event(mddev);
        sysfs_notify_dirent(mddev->sysfs_state);
-       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+       if (mddev->sysfs_action)
+               sysfs_notify_dirent(mddev->sysfs_action);
        sysfs_notify(&mddev->kobj, NULL, "degraded");
        kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
        return 0;
@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                        mddev->queue->merge_bvec_fn = NULL;
                        mddev->queue->unplug_fn = NULL;
                        mddev->queue->backing_dev_info.congested_fn = NULL;
-                       if (mddev->pers->sync_request)
+                       if (mddev->pers->sync_request) {
                                sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
-
+                               if (mddev->sysfs_action)
+                                       sysfs_put(mddev->sysfs_action);
+                               mddev->sysfs_action = NULL;
+                       }
                        module_put(mddev->pers->owner);
                        mddev->pers = NULL;
                        /* tell userspace to handle 'inactive' */
@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
         */
        if (mode == 0) {
                mdk_rdev_t *rdev;
-               struct list_head *tmp;
 
                printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
 
@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                }
                mddev->bitmap_offset = 0;
 
-               rdev_for_each(rdev, tmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0) {
                                char nm[20];
                                sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                mddev->barriers_work = 0;
                mddev->safemode = 0;
                kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
+               if (mddev->hold_active == UNTIL_STOP)
+                       mddev->hold_active = 0;
 
        } else if (mddev->pers)
                printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -3956,7 +4125,6 @@ out:
 static void autorun_array(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        int err;
 
        if (list_empty(&mddev->disks))
@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev)
 
        printk(KERN_INFO "md: running: ");
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                char b[BDEVNAME_SIZE];
                printk("<%s>", bdevname(rdev->bdev,b));
        }
@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev)
  */
 static void autorun_devices(int part)
 {
-       struct list_head *tmp;
-       mdk_rdev_t *rdev0, *rdev;
+       mdk_rdev_t *rdev0, *rdev, *tmp;
        mddev_t *mddev;
        char b[BDEVNAME_SIZE];
 
@@ -4007,7 +4174,7 @@ static void autorun_devices(int part)
                printk(KERN_INFO "md: considering %s ...\n",
                        bdevname(rdev0->bdev,b));
                INIT_LIST_HEAD(&candidates);
-               rdev_for_each_list(rdev, tmp, pending_raid_disks)
+               rdev_for_each_list(rdev, tmp, &pending_raid_disks)
                        if (super_90_load(rdev, rdev0, 0) >= 0) {
                                printk(KERN_INFO "md:  adding %s ...\n",
                                        bdevname(rdev->bdev,b));
@@ -4053,7 +4220,7 @@ static void autorun_devices(int part)
                } else {
                        printk(KERN_INFO "md: created %s\n", mdname(mddev));
                        mddev->persistent = 1;
-                       rdev_for_each_list(rdev, tmp, candidates) {
+                       rdev_for_each_list(rdev, tmp, &candidates) {
                                list_del_init(&rdev->same_set);
                                if (bind_rdev_to_array(rdev, mddev))
                                        export_rdev(rdev);
@@ -4064,7 +4231,7 @@ static void autorun_devices(int part)
                /* on success, candidates will be empty, on error
                 * it won't...
                 */
-               rdev_for_each_list(rdev, tmp, candidates) {
+               rdev_for_each_list(rdev, tmp, &candidates) {
                        list_del_init(&rdev->same_set);
                        export_rdev(rdev);
                }
@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        mdu_array_info_t info;
        int nr,working,active,failed,spare;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        nr=working=active=failed=spare=0;
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                nr++;
                if (test_bit(Faulty, &rdev->flags))
                        failed++;
@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 
 static int update_size(mddev_t *mddev, sector_t num_sectors)
 {
-       mdk_rdev_t * rdev;
+       mdk_rdev_t *rdev;
        int rv;
-       struct list_head *tmp;
        int fit = (num_sectors == 0);
 
        if (mddev->pers->resize == NULL)
@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
                 * grow, and re-add.
                 */
                return -EBUSY;
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                sector_t avail;
                avail = rdev->size * 2;
 
@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 
 done_unlock:
 abort_unlock:
+       if (mddev->hold_active == UNTIL_IOCTL &&
+           err != -EINVAL)
+               mddev->hold_active = 0;
        mddev_unlock(mddev);
 
        return err;
@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode)
         * Succeed if we can lock the mddev, which confirms that
         * it isn't being stopped right now.
         */
-       mddev_t *mddev = bdev->bd_disk->private_data;
+       mddev_t *mddev = mddev_find(bdev->bd_dev);
        int err;
 
+       if (mddev->gendisk != bdev->bd_disk) {
+               /* we are racing with mddev_put which is discarding this
+                * bd_disk.
+                */
+               mddev_put(mddev);
+               /* Wait until bdev->bd_disk is definitely gone */
+               flush_scheduled_work();
+               /* Then retry the open from the top */
+               return -ERESTARTSYS;
+       }
+       BUG_ON(mddev != bdev->bd_disk->private_data);
+
        if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
                goto out;
 
        err = 0;
-       mddev_get(mddev);
        atomic_inc(&mddev->openers);
        mddev_unlock(mddev);
 
@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq)
 {
        int i = 0;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        seq_printf(seq, "unused devices: ");
 
-       rdev_for_each_list(rdev, tmp, pending_raid_disks) {
+       list_for_each_entry(rdev, &pending_raid_disks, same_set) {
                char b[BDEVNAME_SIZE];
                i++;
                seq_printf(seq, "%s ",
@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v)
 {
        mddev_t *mddev = v;
        sector_t size;
-       struct list_head *tmp2;
        mdk_rdev_t *rdev;
        struct mdstat_info *mi = seq->private;
        struct bitmap *bitmap;
@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
                }
 
                size = 0;
-               rdev_for_each(rdev, tmp2, mddev) {
+               list_for_each_entry(rdev, &mddev->disks, same_set) {
                        char b[BDEVNAME_SIZE];
                        seq_printf(seq, " %s[%d]",
                                bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev)
        struct list_head *tmp;
        sector_t last_check;
        int skipped = 0;
-       struct list_head *rtmp;
        mdk_rdev_t *rdev;
        char *desc;
 
@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev)
                /* recovery follows the physical size of devices */
                max_sectors = mddev->size << 1;
                j = MaxSector;
-               rdev_for_each(rdev, rtmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(Faulty, &rdev->flags) &&
                            !test_bit(In_sync, &rdev->flags) &&
@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev)
                } else {
                        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                                mddev->curr_resync = MaxSector;
-                       rdev_for_each(rdev, rtmp, mddev)
+                       list_for_each_entry(rdev, &mddev->disks, same_set)
                                if (rdev->raid_disk >= 0 &&
                                    !test_bit(Faulty, &rdev->flags) &&
                                    !test_bit(In_sync, &rdev->flags) &&
@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync);
 static int remove_and_add_spares(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *rtmp;
        int spares = 0;
 
-       rdev_for_each(rdev, rtmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk >= 0 &&
                    !test_bit(Blocked, &rdev->flags) &&
                    (test_bit(Faulty, &rdev->flags) ||
@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev)
                        }
                }
 
-       if (mddev->degraded && ! mddev->ro) {
-               rdev_for_each(rdev, rtmp, mddev) {
+       if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
+               list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
                            !test_bit(Blocked, &rdev->flags))
@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev)
 void md_check_recovery(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *rtmp;
 
 
        if (mddev->bitmap)
@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev)
                if (mddev->flags)
                        md_update_sb(mddev, 0);
 
-               rdev_for_each(rdev, rtmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (test_and_clear_bit(StateChanged, &rdev->flags))
                                sysfs_notify_dirent(rdev->sysfs_state);
 
@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev)
                         * information must be scrapped
                         */
                        if (!mddev->degraded)
-                               rdev_for_each(rdev, rtmp, mddev)
+                               list_for_each_entry(rdev, &mddev->disks, same_set)
                                        rdev->saved_raid_disk = -1;
 
                        mddev->recovery = 0;
                        /* flag recovery needed just to double check */
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-                       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+                       sysfs_notify_dirent(mddev->sysfs_action);
                        md_new_event(mddev);
                        goto unlock;
                }
@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev)
                                mddev->recovery = 0;
                        } else
                                md_wakeup_thread(mddev->sync_thread);
-                       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+                       sysfs_notify_dirent(mddev->sysfs_action);
                        md_new_event(mddev);
                }
        unlock:
@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev)
                        clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                        if (test_and_clear_bit(MD_RECOVERY_RECOVER,
                                               &mddev->recovery))
-                               sysfs_notify(&mddev->kobj, NULL, "sync_action");
+                               if (mddev->sysfs_action)
+                                       sysfs_notify_dirent(mddev->sysfs_action);
                }
                mddev_unlock(mddev);
        }
@@ -6386,14 +6561,8 @@ static __exit void md_exit(void)
        unregister_sysctl_table(raid_table_header);
        remove_proc_entry("mdstat", NULL);
        for_each_mddev(mddev, tmp) {
-               struct gendisk *disk = mddev->gendisk;
-               if (!disk)
-                       continue;
                export_array(mddev);
-               del_gendisk(disk);
-               put_disk(disk);
-               mddev->gendisk = NULL;
-               mddev_put(mddev);
+               mddev->hold_active = 0;
        }
 }
 
@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
 module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
 module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
 
+module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
 
 EXPORT_SYMBOL(register_md_personality);
 EXPORT_SYMBOL(unregister_md_personality);
index d4ac47d112797f4f3343420175b4312397337de2..f6d08f2416716f7207fe49aba23d154a5eb41fae 100644 (file)
@@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev)
        int disk_idx;
        struct multipath_info *disk;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        if (mddev->level != LEVEL_MULTIPATH) {
                printk("multipath: %s: raid level not set to multipath IO (%d)\n",
@@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev)
        }
 
        conf->working_disks = 0;
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx < 0 ||
                    disk_idx >= mddev->raid_disks)
index 8ac6488ad0dc6c1a86463c4e732c1205a6cbb9a3..c605ba8055863d2d0ede52e9fe7e9c4e374bd9d3 100644 (file)
@@ -53,11 +53,10 @@ static int raid0_congested(void *data, int bits)
 static int create_strip_zones (mddev_t *mddev)
 {
        int i, c, j;
-       sector_t current_offset, curr_zone_offset;
+       sector_t current_start, curr_zone_start;
        sector_t min_spacing;
        raid0_conf_t *conf = mddev_to_conf(mddev);
        mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
-       struct list_head *tmp1, *tmp2;
        struct strip_zone *zone;
        int cnt;
        char b[BDEVNAME_SIZE];
@@ -67,19 +66,19 @@ static int create_strip_zones (mddev_t *mddev)
         */
        conf->nr_strip_zones = 0;
  
-       rdev_for_each(rdev1, tmp1, mddev) {
-               printk("raid0: looking at %s\n",
+       list_for_each_entry(rdev1, &mddev->disks, same_set) {
+               printk(KERN_INFO "raid0: looking at %s\n",
                        bdevname(rdev1->bdev,b));
                c = 0;
-               rdev_for_each(rdev2, tmp2, mddev) {
-                       printk("raid0:   comparing %s(%llu)",
+               list_for_each_entry(rdev2, &mddev->disks, same_set) {
+                       printk(KERN_INFO "raid0:   comparing %s(%llu)",
                               bdevname(rdev1->bdev,b),
                               (unsigned long long)rdev1->size);
-                       printk(" with %s(%llu)\n",
+                       printk(KERN_INFO " with %s(%llu)\n",
                               bdevname(rdev2->bdev,b),
                               (unsigned long long)rdev2->size);
                        if (rdev2 == rdev1) {
-                               printk("raid0:   END\n");
+                               printk(KERN_INFO "raid0:   END\n");
                                break;
                        }
                        if (rdev2->size == rdev1->size)
@@ -88,19 +87,20 @@ static int create_strip_zones (mddev_t *mddev)
                                 * Not unique, don't count it as a new
                                 * group
                                 */
-                               printk("raid0:   EQUAL\n");
+                               printk(KERN_INFO "raid0:   EQUAL\n");
                                c = 1;
                                break;
                        }
-                       printk("raid0:   NOT EQUAL\n");
+                       printk(KERN_INFO "raid0:   NOT EQUAL\n");
                }
                if (!c) {
-                       printk("raid0:   ==> UNIQUE\n");
+                       printk(KERN_INFO "raid0:   ==> UNIQUE\n");
                        conf->nr_strip_zones++;
-                       printk("raid0: %d zones\n", conf->nr_strip_zones);
+                       printk(KERN_INFO "raid0: %d zones\n",
+                               conf->nr_strip_zones);
                }
        }
-       printk("raid0: FINAL %d zones\n", conf->nr_strip_zones);
+       printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
 
        conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
                                conf->nr_strip_zones, GFP_KERNEL);
@@ -119,16 +119,17 @@ static int create_strip_zones (mddev_t *mddev)
        cnt = 0;
        smallest = NULL;
        zone->dev = conf->devlist;
-       rdev_for_each(rdev1, tmp1, mddev) {
+       list_for_each_entry(rdev1, &mddev->disks, same_set) {
                int j = rdev1->raid_disk;
 
                if (j < 0 || j >= mddev->raid_disks) {
-                       printk("raid0: bad disk number %d - aborting!\n", j);
+                       printk(KERN_ERR "raid0: bad disk number %d - "
+                               "aborting!\n", j);
                        goto abort;
                }
                if (zone->dev[j]) {
-                       printk("raid0: multiple devices for %d - aborting!\n",
-                               j);
+                       printk(KERN_ERR "raid0: multiple devices for %d - "
+                               "aborting!\n", j);
                        goto abort;
                }
                zone->dev[j] = rdev1;
@@ -149,16 +150,16 @@ static int create_strip_zones (mddev_t *mddev)
                cnt++;
        }
        if (cnt != mddev->raid_disks) {
-               printk("raid0: too few disks (%d of %d) - aborting!\n",
-                       cnt, mddev->raid_disks);
+               printk(KERN_ERR "raid0: too few disks (%d of %d) - "
+                       "aborting!\n", cnt, mddev->raid_disks);
                goto abort;
        }
        zone->nb_dev = cnt;
-       zone->size = smallest->size * cnt;
-       zone->zone_offset = 0;
+       zone->sectors = smallest->size * cnt * 2;
+       zone->zone_start = 0;
 
-       current_offset = smallest->size;
-       curr_zone_offset = zone->size;
+       current_start = smallest->size * 2;
+       curr_zone_start = zone->sectors;
 
        /* now do the other zones */
        for (i = 1; i < conf->nr_strip_zones; i++)
@@ -166,40 +167,41 @@ static int create_strip_zones (mddev_t *mddev)
                zone = conf->strip_zone + i;
                zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks;
 
-               printk("raid0: zone %d\n", i);
-               zone->dev_offset = current_offset;
+               printk(KERN_INFO "raid0: zone %d\n", i);
+               zone->dev_start = current_start;
                smallest = NULL;
                c = 0;
 
                for (j=0; j<cnt; j++) {
                        char b[BDEVNAME_SIZE];
                        rdev = conf->strip_zone[0].dev[j];
-                       printk("raid0: checking %s ...", bdevname(rdev->bdev,b));
-                       if (rdev->size > current_offset)
-                       {
-                               printk(" contained as device %d\n", c);
+                       printk(KERN_INFO "raid0: checking %s ...",
+                               bdevname(rdev->bdev, b));
+                       if (rdev->size > current_start / 2) {
+                               printk(KERN_INFO " contained as device %d\n",
+                                       c);
                                zone->dev[c] = rdev;
                                c++;
                                if (!smallest || (rdev->size <smallest->size)) {
                                        smallest = rdev;
-                                       printk("  (%llu) is smallest!.\n", 
+                                       printk(KERN_INFO "  (%llu) is smallest!.\n",
                                                (unsigned long long)rdev->size);
                                }
                        } else
-                               printk(" nope.\n");
+                               printk(KERN_INFO " nope.\n");
                }
 
                zone->nb_dev = c;
-               zone->size = (smallest->size - current_offset) * c;
-               printk("raid0: zone->nb_dev: %d, size: %llu\n",
-                       zone->nb_dev, (unsigned long long)zone->size);
+               zone->sectors = (smallest->size * 2 - current_start) * c;
+               printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
+                       zone->nb_dev, (unsigned long long)zone->sectors);
 
-               zone->zone_offset = curr_zone_offset;
-               curr_zone_offset += zone->size;
+               zone->zone_start = curr_zone_start;
+               curr_zone_start += zone->sectors;
 
-               current_offset = smallest->size;
-               printk("raid0: current zone offset: %llu\n",
-                       (unsigned long long)current_offset);
+               current_start = smallest->size * 2;
+               printk(KERN_INFO "raid0: current zone start: %llu\n",
+                       (unsigned long long)current_start);
        }
 
        /* Now find appropriate hash spacing.
@@ -210,16 +212,16 @@ static int create_strip_zones (mddev_t *mddev)
         * strip though as it's size has no bearing on the efficacy of the hash
         * table.
         */
-       conf->hash_spacing = curr_zone_offset;
-       min_spacing = curr_zone_offset;
+       conf->spacing = curr_zone_start;
+       min_spacing = curr_zone_start;
        sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*));
        for (i=0; i < conf->nr_strip_zones-1; i++) {
-               sector_t sz = 0;
-               for (j=i; j<conf->nr_strip_zones-1 &&
-                            sz < min_spacing ; j++)
-                       sz += conf->strip_zone[j].size;
-               if (sz >= min_spacing && sz < conf->hash_spacing)
-                       conf->hash_spacing = sz;
+               sector_t s = 0;
+               for (j = i; j < conf->nr_strip_zones - 1 &&
+                               s < min_spacing; j++)
+                       s += conf->strip_zone[j].sectors;
+               if (s >= min_spacing && s < conf->spacing)
+                       conf->spacing = s;
        }
 
        mddev->queue->unplug_fn = raid0_unplug;
@@ -227,7 +229,7 @@ static int create_strip_zones (mddev_t *mddev)
        mddev->queue->backing_dev_info.congested_fn = raid0_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
 
-       printk("raid0: done.\n");
+       printk(KERN_INFO "raid0: done.\n");
        return 0;
  abort:
        return 1;
@@ -262,10 +264,9 @@ static int raid0_mergeable_bvec(struct request_queue *q,
 static int raid0_run (mddev_t *mddev)
 {
        unsigned  cur=0, i=0, nb_zone;
-       s64 size;
+       s64 sectors;
        raid0_conf_t *conf;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        if (mddev->chunk_size == 0) {
                printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
@@ -291,54 +292,54 @@ static int raid0_run (mddev_t *mddev)
 
        /* calculate array device size */
        mddev->array_sectors = 0;
-       rdev_for_each(rdev, tmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                mddev->array_sectors += rdev->size * 2;
 
-       printk("raid0 : md_size is %llu blocks.\n", 
-               (unsigned long long)mddev->array_sectors / 2);
-       printk("raid0 : conf->hash_spacing is %llu blocks.\n",
-               (unsigned long long)conf->hash_spacing);
+       printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
+               (unsigned long long)mddev->array_sectors);
+       printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
+               (unsigned long long)conf->spacing);
        {
-               sector_t s = mddev->array_sectors / 2;
-               sector_t space = conf->hash_spacing;
+               sector_t s = mddev->array_sectors;
+               sector_t space = conf->spacing;
                int round;
-               conf->preshift = 0;
+               conf->sector_shift = 0;
                if (sizeof(sector_t) > sizeof(u32)) {
                        /*shift down space and s so that sector_div will work */
                        while (space > (sector_t) (~(u32)0)) {
                                s >>= 1;
                                space >>= 1;
                                s += 1; /* force round-up */
-                               conf->preshift++;
+                               conf->sector_shift++;
                        }
                }
                round = sector_div(s, (u32)space) ? 1 : 0;
                nb_zone = s + round;
        }
-       printk("raid0 : nb_zone is %d.\n", nb_zone);
+       printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone);
 
-       printk("raid0 : Allocating %Zd bytes for hash.\n",
+       printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n",
                                nb_zone*sizeof(struct strip_zone*));
        conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL);
        if (!conf->hash_table)
                goto out_free_conf;
-       size = conf->strip_zone[cur].size;
+       sectors = conf->strip_zone[cur].sectors;
 
        conf->hash_table[0] = conf->strip_zone + cur;
        for (i=1; i< nb_zone; i++) {
-               while (size <= conf->hash_spacing) {
+               while (sectors <= conf->spacing) {
                        cur++;
-                       size += conf->strip_zone[cur].size;
+                       sectors += conf->strip_zone[cur].sectors;
                }
-               size -= conf->hash_spacing;
+               sectors -= conf->spacing;
                conf->hash_table[i] = conf->strip_zone + cur;
        }
-       if (conf->preshift) {
-               conf->hash_spacing >>= conf->preshift;
-               /* round hash_spacing up so when we divide by it, we
+       if (conf->sector_shift) {
+               conf->spacing >>= conf->sector_shift;
+               /* round spacing up so when we divide by it, we
                 * err on the side of too-low, which is safest
                 */
-               conf->hash_spacing++;
+               conf->spacing++;
        }
 
        /* calculate the max read-ahead size.
@@ -387,12 +388,12 @@ static int raid0_stop (mddev_t *mddev)
 static int raid0_make_request (struct request_queue *q, struct bio *bio)
 {
        mddev_t *mddev = q->queuedata;
-       unsigned int sect_in_chunk, chunksize_bits,  chunk_size, chunk_sects;
+       unsigned int sect_in_chunk, chunksect_bits, chunk_sects;
        raid0_conf_t *conf = mddev_to_conf(mddev);
        struct strip_zone *zone;
        mdk_rdev_t *tmp_dev;
        sector_t chunk;
-       sector_t block, rsect;
+       sector_t sector, rsect;
        const int rw = bio_data_dir(bio);
        int cpu;
 
@@ -407,11 +408,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
                      bio_sectors(bio));
        part_stat_unlock();
 
-       chunk_size = mddev->chunk_size >> 10;
        chunk_sects = mddev->chunk_size >> 9;
-       chunksize_bits = ffz(~chunk_size);
-       block = bio->bi_sector >> 1;
-       
+       chunksect_bits = ffz(~chunk_sects);
+       sector = bio->bi_sector;
 
        if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
                struct bio_pair *bp;
@@ -434,28 +433,27 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
  
 
        {
-               sector_t x = block >> conf->preshift;
-               sector_div(x, (u32)conf->hash_spacing);
+               sector_t x = sector >> conf->sector_shift;
+               sector_div(x, (u32)conf->spacing);
                zone = conf->hash_table[x];
        }
-       while (block >= (zone->zone_offset + zone->size)) 
+
+       while (sector >= zone->zone_start + zone->sectors)
                zone++;
-    
-       sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1);
+
+       sect_in_chunk = bio->bi_sector & (chunk_sects - 1);
 
 
        {
-               sector_t x =  (block - zone->zone_offset) >> chunksize_bits;
+               sector_t x = (sector - zone->zone_start) >> chunksect_bits;
 
                sector_div(x, zone->nb_dev);
                chunk = x;
 
-               x = block >> chunksize_bits;
+               x = sector >> chunksect_bits;
                tmp_dev = zone->dev[sector_div(x, zone->nb_dev)];
        }
-       rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1)
-               + sect_in_chunk;
+       rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk;
  
        bio->bi_bdev = tmp_dev->bdev;
        bio->bi_sector = rsect + tmp_dev->data_offset;
@@ -467,7 +465,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
 
 bad_map:
        printk("raid0_make_request bug: can't convert block across chunks"
-               " or bigger than %dk %llu %d\n", chunk_size, 
+               " or bigger than %dk %llu %d\n", chunk_sects / 2,
                (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 
        bio_io_error(bio);
@@ -492,10 +490,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
                        seq_printf(seq, "%s/", bdevname(
                                conf->strip_zone[j].dev[k]->bdev,b));
 
-               seq_printf(seq, "] zo=%d do=%d s=%d\n",
-                               conf->strip_zone[j].zone_offset,
-                               conf->strip_zone[j].dev_offset,
-                               conf->strip_zone[j].size);
+               seq_printf(seq, "] zs=%d ds=%d s=%d\n",
+                               conf->strip_zone[j].zone_start,
+                               conf->strip_zone[j].dev_start,
+                               conf->strip_zone[j].sectors);
        }
 #endif
        seq_printf(seq, " %dk chunks", mddev->chunk_size/1024);
index 9c788e2489b18934eadb62cac4d4b33b3c121620..7b4f5f7155d8726705c1f42757f68dd1de869624 100644 (file)
@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
         * else mark the drive as failed
         */
        if (test_bit(In_sync, &rdev->flags)
-           && (conf->raid_disks - mddev->degraded) == 1)
+           && (conf->raid_disks - mddev->degraded) == 1) {
                /*
                 * Don't fail the drive, act as though we were just a
-                * normal single drive
+                * normal single drive.
+                * However don't try a recovery from this drive as
+                * it is very likely to fail.
                 */
+               mddev->recovery_disabled = 1;
                return;
+       }
        if (test_and_clear_bit(In_sync, &rdev->flags)) {
                unsigned long flags;
                spin_lock_irqsave(&conf->device_lock, flags);
@@ -1919,7 +1923,6 @@ static int run(mddev_t *mddev)
        int i, j, disk_idx;
        mirror_info_t *disk;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        if (mddev->level != 1) {
                printk("raid1: %s: raid level not set to mirroring (%d)\n",
@@ -1964,7 +1967,7 @@ static int run(mddev_t *mddev)
        spin_lock_init(&conf->device_lock);
        mddev->queue->queue_lock = &conf->device_lock;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx >= mddev->raid_disks
                    || disk_idx < 0)
index 970a96ef9b1841badb7b43657d46263e67a9e722..6736d6dff981c8a1a5c12e773c3d788410a0f191 100644 (file)
@@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev)
        int i, disk_idx;
        mirror_info_t *disk;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        int nc, fc, fo;
        sector_t stride, size;
 
@@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev)
        spin_lock_init(&conf->device_lock);
        mddev->queue->queue_lock = &conf->device_lock;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx >= mddev->raid_disks
                    || disk_idx < 0)
index a36a7435edf51bd29cc32681cb8b211b29a9f8cb..a5ba080d303b93bb3a4d764ea621f4bf9bc191d0 100644 (file)
@@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev)
        int raid_disk, memory;
        mdk_rdev_t *rdev;
        struct disk_info *disk;
-       struct list_head *tmp;
        int working_disks = 0;
 
        if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
@@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev)
 
        pr_debug("raid5: run(%s) called.\n", mdname(mddev));
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                raid_disk = rdev->raid_disk;
                if (raid_disk >= conf->raid_disks
                    || raid_disk < 0)
@@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev)
 {
        raid5_conf_t *conf = mddev_to_conf(mddev);
        mdk_rdev_t *rdev;
-       struct list_head *rtmp;
        int spares = 0;
        int added_devices = 0;
        unsigned long flags;
@@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev)
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                return -EBUSY;
 
-       rdev_for_each(rdev, rtmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk < 0 &&
                    !test_bit(Faulty, &rdev->flags))
                        spares++;
@@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev)
        /* Add some new drives, as many as will fit.
         * We know there are enough to make the newly sized array work.
         */
-       rdev_for_each(rdev, rtmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk < 0 &&
                    !test_bit(Faulty, &rdev->flags)) {
                        if (raid5_add_disk(mddev, rdev) == 0) {
index b957717e25abbbe2dc16aa655084d0f779f6c5c6..8ebbfdf708c24c9d70bc4cc88e8266202d56ec06 100644 (file)
@@ -1005,6 +1005,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        }
 
        lock_kernel();
+ restart:
 
        ret = -ENXIO;
        disk = get_gendisk(bdev->bd_dev, &partno);
@@ -1025,6 +1026,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 
                        if (disk->fops->open) {
                                ret = disk->fops->open(bdev, mode);
+                               if (ret == -ERESTARTSYS) {
+                                       /* Lost a race with 'disk' being
+                                        * deleted, try again.
+                                        * See md.c
+                                        */
+                                       disk_put_part(bdev->bd_part);
+                                       bdev->bd_part = NULL;
+                                       module_put(disk->fops->owner);
+                                       put_disk(disk);
+                                       bdev->bd_disk = NULL;
+                                       mutex_unlock(&bdev->bd_mutex);
+                                       goto restart;
+                               }
                                if (ret)
                                        goto out_clear;
                        }
index 8fc909ef6787738b983b45243fc1786d4867fc2a..9743e4dbc9188031b0c180584df4c5e6ba898513 100644 (file)
@@ -137,6 +137,9 @@ struct mddev_s
        struct gendisk                  *gendisk;
 
        struct kobject                  kobj;
+       int                             hold_active;
+#define        UNTIL_IOCTL     1
+#define        UNTIL_STOP      2
 
        /* Superblock information */
        int                             major_version,
@@ -215,6 +218,9 @@ struct mddev_s
 #define        MD_RECOVERY_FROZEN      9
 
        unsigned long                   recovery;
+       int                             recovery_disabled; /* if we detect that recovery
+                                                           * will always fail, set this
+                                                           * so we don't loop trying */
 
        int                             in_sync;        /* know to not need resync */
        struct mutex                    reconfig_mutex;
@@ -244,6 +250,9 @@ struct mddev_s
        struct sysfs_dirent             *sysfs_state;   /* handle for 'array_state'
                                                         * file in sysfs.
                                                         */
+       struct sysfs_dirent             *sysfs_action;  /* handle for 'sync_action' */
+
+       struct work_struct del_work;    /* used for delayed sysfs removal */
 
        spinlock_t                      write_lock;
        wait_queue_head_t               sb_wait;        /* for waiting on superblock updates */
@@ -334,17 +343,14 @@ static inline char * mdname (mddev_t * mddev)
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
  */
-#define rdev_for_each_list(rdev, tmp, list)                            \
-                                                                       \
-       for ((tmp) = (list).next;                                       \
-               (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),     \
-                       (tmp) = (tmp)->next, (tmp)->prev != &(list)     \
-               ; )
+#define rdev_for_each_list(rdev, tmp, head)                            \
+       list_for_each_entry_safe(rdev, tmp, head, same_set)
+
 /*
  * iterates through the 'same array disks' ringlist
  */
 #define rdev_for_each(rdev, tmp, mddev)                                \
-       rdev_for_each_list(rdev, tmp, (mddev)->disks)
+       list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
 
 #define rdev_for_each_rcu(rdev, mddev)                         \
        list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
index 8b4de4a41ff14c15f33550a2a9ad99955861c31e..9491026afe6610bc5f4897fbe2f30c5b75484f2c 100644 (file)
@@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) {
        return (ev<<32)| sb->events_lo;
 }
 
+#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1)
+
 /*
  * The version-1 superblock :
  * All numeric fields are little-endian.
index 1b2dda035f8efcfeb5e979ed463d735e9f3f70b1..fd42aa87c39186791d1a06481fecb6ddd7c03326 100644 (file)
@@ -5,9 +5,9 @@
 
 struct strip_zone
 {
-       sector_t zone_offset;   /* Zone offset in md_dev */
-       sector_t dev_offset;    /* Zone offset in real dev */
-       sector_t size;          /* Zone size */
+       sector_t zone_start;    /* Zone offset in md_dev (in sectors) */
+       sector_t dev_start;     /* Zone offset in real dev (in sectors) */
+       sector_t sectors;       /* Zone size in sectors */
        int nb_dev;             /* # of devices attached to the zone */
        mdk_rdev_t **dev;       /* Devices attached to the zone */
 };
@@ -19,8 +19,8 @@ struct raid0_private_data
        mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
        int nr_strip_zones;
 
-       sector_t hash_spacing;
-       int preshift;                   /* shift this before divide by hash_spacing */
+       sector_t spacing;
+       int sector_shift; /* shift this before divide by spacing */
 };
 
 typedef struct raid0_private_data raid0_conf_t;