/*
md.c : Multiple Devices driver for Linux
Copyright (C) 1994-96 Marc ZYNGIER
<zyngier@ufr-info-p7.ibp.fr> or
<maz@gloups.fdn.fr>
A lot of inspiration came from hd.c ...
kerneld support by Boris Tobotras <boris@xtalk.msk.su>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/malloc.h>
#include <linux/mm.h>
#include <linux/md.h>
#include <linux/hdreg.h>
#include <linux/stat.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
#ifdef CONFIG_KERNELD
#include <linux/kerneld.h>
#endif
#include <linux/errno.h>
#define MAJOR_NR MD_MAJOR
#define MD_DRIVER
#include <linux/blk.h>
static struct hd_struct md_hd_struct[MAX_MD_DEV];
static int md_blocksizes[MAX_MD_DEV];
int md_size[MAX_MD_DEV]={0, };
static void md_geninit (struct gendisk *);
static struct gendisk md_gendisk=
{
MD_MAJOR,
"md",
0,
1,
MAX_MD_DEV,
md_geninit,
md_hd_struct,
md_size,
MAX_MD_DEV,
NULL,
NULL
};
static struct md_personality *pers[MAX_PERSONALITY]={NULL, };
struct md_dev md_dev[MAX_MD_DEV];
static struct gendisk *find_gendisk (kdev_t dev)
{
struct gendisk *tmp=gendisk_head;
while (tmp != NULL)
{
if (tmp->major==MAJOR(dev))
return (tmp);
tmp=tmp->next;
}
return (NULL);
}
char *partition_name (kdev_t dev)
{
static char name[40]; /* This should be long
enough for a device name ! */
struct gendisk *hd = find_gendisk (dev);
if (!hd)
{
printk ("No gendisk entry for dev %s\n", kdevname(dev));
sprintf (name, "dev %s", kdevname(dev));
return (name);
}
return disk_name (hd, MINOR(dev), name); /* routine in genhd.c */
}
static void set_ra (void)
{
int i, j, minra=INT_MAX;
for (i=0; i<MAX_MD_DEV; i++)
{
if (!md_dev[i].pers)
continue;
for (j=0; j<md_dev[i].nb_dev; j++)
if (read_ahead[MAJOR(md_dev[i].devices[j].dev)]<minra)
minra=read_ahead[MAJOR(md_dev[i].devices[j].dev)];
}
read_ahead[MD_MAJOR]=minra;
}
static int do_md_run (int minor, int repart)
{
int pnum, i, min, current_ra, err;
if (!md_dev[minor].nb_dev)
return -EINVAL;
if (md_dev[minor].pers)
return -EBUSY;
md_dev[minor].repartition=repart;
if ((pnum=PERSONALITY(md_dev+minor) >> (PERSONALITY_SHIFT))
>= MAX_PERSONALITY)
return -EINVAL;
if (!pers[pnum])
{
#ifdef CONFIG_KERNELD
char module_name[80];
sprintf (module_name, "md-personality-%d", pnum);
request_module (module_name);
if (!pers[pnum])
#endif
return -EINVAL;
}
min=1 << FACTOR_SHIFT(FACTOR((md_dev+minor)));
for (i=0; i<md_dev[minor].nb_dev; i++)
if (md_dev[minor].devices[i].size<min)
{
printk ("Dev %s smaller than %dk, cannot shrink\n",
partition_name (md_dev[minor].devices[i].dev), min);
return -EINVAL;
}
/* Resize devices according to the factor. It is used to align
partitions size on a given chunk size. */
md_size[minor]=0;
for (i=0; i<md_dev[minor].nb_dev; i++)
{
md_dev[minor].devices[i].size &= ~(min - 1);
md_size[minor] += md_dev[minor].devices[i].size;
md_dev[minor].devices[i].offset=i ? (md_dev[minor].devices[i-1].offset + md_dev[minor].devices[i-1].size) : 0;
}
md_dev[minor].pers=pers[pnum];
if ((err=md_dev[minor].pers->run (minor, md_dev+minor)))
{
md_dev[minor].pers=NULL;
return (err);
}
/* FIXME : We assume here we have blocks
that are twice as large as sectors.
THIS MAY NOT BE TRUE !!! */
md_hd_struct[minor].start_sect=0;
md_hd_struct[minor].nr_sects=md_size[minor]<<1;
/* It would be better to have a per-md-dev read_ahead. Currently,
we only use the smallest read_ahead among md-attached devices */
current_ra=read_ahead[MD_MAJOR];
for (i=0; i<md_dev[minor].nb_dev; i++)
if (current_ra>read_ahead[MAJOR(md_dev[minor].devices[i].dev)])
current_ra=read_ahead[MAJOR(md_dev[minor].devices[i].dev)];
read_ahead[MD_MAJOR]=current_ra;
printk ("START_DEV md%x %s\n", minor, md_dev[minor].pers->name);
return (0);
}
static int do_md_stop (int minor, struct inode *inode)
{
int i;
if (inode->i_count>1 || md_dev[minor].busy>1) /* ioctl : one open channel */
{
printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", minor, inode->i_count, md_dev[minor].busy);
return -EBUSY;
}
if (md_dev[minor].pers)
{
/* The device won't exist anymore -> flush it now */
fsync_dev (inode->i_rdev);
invalidate_buffers (inode->i_rdev);
md_dev[minor].pers->stop (minor, md_dev+minor);
}
/* Remove locks. */
for (i=0; i<md_dev[minor].nb_dev; i++)
clear_inode (md_dev[minor].devices[i].inode);
md_dev[minor].nb_dev=md_size[minor]=0;
md_hd_struct[minor].nr_sects=0;
md_dev[minor].pers=NULL;
set_ra (); /* calculate new read_ahead */
printk ("STOP_DEV md%x\n", minor);
return (0);
}
static int do_md_add (int minor, kdev_t dev)
{
struct gendisk *gen_real;
int i;
if (MAJOR(dev)==MD_MAJOR || md_dev[minor].nb_dev==MAX_REAL)
return -EINVAL;
if (!fs_may_mount (dev) || md_dev[minor].pers)
return -EBUSY;
if (!(gen_real=find_gendisk (dev)))
return -ENOENT;
i=md_dev[minor].nb_dev++;
md_dev[minor].devices[i].dev=dev;
/* Lock the device by inserting a dummy inode. This doesn't
smell very good, but I need to be consistent with the
mount stuff, specially with fs_may_mount. If someone have
a better idea, please help ! */
md_dev[minor].devices[i].inode=get_empty_inode ();
md_dev[minor].devices[i].inode->i_dev=dev; /* don't care about
other fields */
insert_inode_hash (md_dev[minor].devices[i].inode);
/* Sizes are now rounded at run time */
md_dev[minor].devices[i].size=gen_real->sizes[MINOR(dev)];
printk ("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor);
return (0);
}
static int md_ioctl (struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
int minor, err;
struct hd_geometry *loc = (struct hd_geometry *) arg;
if (!suser())
return -EACCES;
if (((minor=MINOR(inode->i_rdev)) & 0x80) &&
(minor & 0x7f) < MAX_PERSONALITY &&
pers[minor & 0x7f] &&
pers[minor & 0x7f]->ioctl)
return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg));
if (minor >= MAX_MD_DEV)
return -EINVAL;
switch (cmd)
{
case REGISTER_DEV:
return do_md_add (minor, to_kdev_t ((dev_t) arg));
case START_MD:
return do_md_run (minor, (int) arg);
case STOP_MD:
return do_md_stop (minor, inode);
case BLKGETSIZE: /* Return device size */
if (!arg) return -EINVAL;
err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
if (err)
return err;
put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg);
break;
case BLKFLSBUF:
fsync_dev (inode->i_rdev);
invalidate_buffers (inode->i_rdev);
break;
case BLKRASET:
if (arg > 0xff)
return -EINVAL;
read_ahead[MAJOR(inode->i_rdev)] = arg;
return 0;
case BLKRAGET:
if (!arg) return -EINVAL;
err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
if (err)
return err;
put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg);
break;
/* We have a problem here : there is no easy way to give a CHS
virtual geometry. We currently pretend that we have a 2 heads
4 sectors (with a BIG number of cylinders...). This drives dosfs
just mad... ;-) */
case HDIO_GETGEO:
if (!loc) return -EINVAL;
err = verify_area(VERIFY_WRITE, loc, sizeof(*loc));
if (err)
return err;
put_user (2, (char *) &loc->heads);
put_user (4, (char *) &loc->sectors);
put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders);
put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect,
(long *) &loc->start);
break;
RO_IOCTLS(inode->i_rdev,arg);
default:
printk ("Unknown md_ioctl %d\n", cmd);
return -EINVAL;
}
return (0);
}
static int md_open (struct inode *inode, struct file *file)
{
int minor=MINOR(inode->i_rdev);
md_dev[minor].busy++;
return (0); /* Always succeed */
}
static void md_release (struct inode *inode, struct file *file)
{
int minor=MINOR(inode->i_rdev);
sync_dev (inode->i_rdev);
md_dev[minor].busy--;
}
static int md_read (struct inode *inode, struct file *file,
char *buf, int count)
{
int minor=MINOR(inode->i_rdev);
if (!md_dev[minor].pers) /* Check if device is being run */
return -ENXIO;
return block_read (inode, file, buf, count);
}
static int md_write (struct inode *inode, struct file *file,
const char *buf, int count)
{
int minor=MINOR(inode->i_rdev);
if (!md_dev[minor].pers) /* Check if device is being run */
return -ENXIO;
return block_write (inode, file, buf, count);
}
static struct file_operations md_fops=
{
NULL,
md_read,
md_write,
NULL,
NULL,
md_ioctl,
NULL,
md_open,
md_release,
block_fsync
};
int md_map (int minor, kdev_t *rdev, unsigned long *rsector, unsigned long size)
{
if ((unsigned int) minor >= MAX_MD_DEV)
{
printk ("Bad md device %d\n", minor);
return (-1);
}
if (!md_dev[minor].pers)
{
printk ("Oops ! md%d not running, giving up !\n", minor);
return (-1);
}
return (md_dev[minor].pers->map(md_dev+minor, rdev, rsector, size));
}
static void do_md_request (void)
{
printk ("Got md request, not good...");
return;
}
static struct symbol_table md_symbol_table=
{
#include <linux/symtab_begin.h>
X(md_size),
X(register_md_personality),
X(unregister_md_personality),
X(partition_name),
#include <linux/symtab_end.h>
};
static void md_geninit (struct gendisk *gdisk)
{
int i;
for(i=0;i<MAX_MD_DEV;i++)
{
md_blocksizes[i] = 1024;
md_gendisk.part[i].start_sect=-1; /* avoid partition check */
md_gendisk.part[i].nr_sects=0;
md_dev[i].pers=NULL;
}
blksize_size[MAJOR_NR] = md_blocksizes;
register_symtab (&md_symbol_table);
proc_register(&proc_root,
&(struct proc_dir_entry)
{
PROC_MD, 6, "mdstat",
S_IFREG | S_IRUGO, 1, 0, 0,
});
}
int get_md_status (char *page)
{
int sz=0, i, j, size;
sz+=sprintf( page+sz, "Personalities : ");
for (i=0; i<MAX_PERSONALITY; i++)
if (pers[i])
sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name);
page[sz-1]='\n';
sz+=sprintf (page+sz, "read_ahead ");
if (read_ahead[MD_MAJOR]==INT_MAX)
sz+=sprintf (page+sz, "not set\n");
else
sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
for (i=0; i<MAX_MD_DEV; i++)
{
sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in");
if (md_dev[i].pers)
sz+=sprintf (page+sz, " %s", md_dev[i].pers->name);
size=0;
for (j=0; j<md_dev[i].nb_dev; j++)
{
sz+=sprintf (page+sz, " %s",
partition_name(md_dev[i].devices[j].dev));
size+=md_dev[i].devices[j].size;
}
if (md_dev[i].nb_dev)
sz+=sprintf (page+sz, " %d blocks", size);
if (!md_dev[i].pers)
{
sz+=sprintf (page+sz, "\n");
continue;
}
if (md_dev[i].pers->max_invalid_dev)
sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i));
sz+=sprintf (page+sz, " %dk %s\n", 1<<FACTOR_SHIFT(FACTOR(md_dev+i)),
md_dev[i].pers == pers[LINEAR>>PERSONALITY_SHIFT] ?
"rounding" : "chunks");
sz+=md_dev[i].pers->status (page+sz, i, md_dev+i);
}
return (sz);
}
int register_md_personality (int p_num, struct md_personality *p)
{
int i=(p_num >> PERSONALITY_SHIFT);
if (i >= MAX_PERSONALITY)
return -EINVAL;
if (pers[i])
return -EBUSY;
pers[i]=p;
printk ("%s personality registered\n", p->name);
return 0;
}
int unregister_md_personality (int p_num)
{
int i=(p_num >> PERSONALITY_SHIFT);
if (i >= MAX_PERSONALITY)
return -EINVAL;
printk ("%s personality unregistered\n", pers[i]->name);
pers[i]=NULL;
return 0;
}
void linear_init (void);
void raid0_init (void);
void raid1_init (void);
void raid5_init (void);
int md_init (void)
{
printk ("md driver %s MAX_MD_DEV=%d, MAX_REAL=%d\n", MD_VERSION, MAX_MD_DEV, MAX_REAL);
if (register_blkdev (MD_MAJOR, "md", &md_fops))
{
printk ("Unable to get major %d for md\n", MD_MAJOR);
return (-1);
}
blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST;
blk_dev[MD_MAJOR].current_request=NULL;
read_ahead[MD_MAJOR]=INT_MAX;
md_gendisk.next=gendisk_head;
gendisk_head=&md_gendisk;
#ifdef CONFIG_MD_LINEAR
linear_init ();
#endif
#ifdef CONFIG_MD_STRIPED
raid0_init ();
#endif
return (0);
}