Filter driver updates:

- optionally vectorize I/O requests to work around hardware bugs
- extend default buffer size to cover MFS's default maximum request size
- use mmap directly, rather than alloc_contig
- add 'nil' checksum type for comparison with layout
- minor style corrections
This commit is contained in:
David van Moolenbroek 2009-12-21 23:30:01 +00:00
parent 492d663444
commit 92ae5c81ae
5 changed files with 151 additions and 64 deletions

View File

@ -793,48 +793,118 @@ static int paired_sendrec(message *m1, message *m2, int both)
return r; return r;
} }
/*===========================================================================*
* single_grant *
*===========================================================================*/
static int single_grant(endpoint_t endpt, vir_bytes buf, int access,
cp_grant_id_t *gid, iovec_s_t vector[NR_IOREQS], size_t *sizep)
{
/* Create grants for a vectored request to a single driver.
*/
cp_grant_id_t grant;
size_t size, chunk;
int count;
size = *sizep;
/* Split up the request into chunks, if requested. This makes no
* difference at all, except that this works around a weird performance
* bug with large DMA PRDs on some machines.
*/
if (CHUNK_SIZE > 0) chunk = CHUNK_SIZE;
else chunk = size;
/* Fill in the vector, creating a grant for each item. */
for (count = 0; size > 0 && count < NR_IOREQS; count++) {
/* The last chunk will contain all the remaining data. */
if (chunk > size || count == NR_IOREQS - 1)
chunk = size;
grant = cpf_grant_direct(endpt, buf, chunk, access);
if (!GRANT_VALID(grant))
panic(__FILE__, "invalid grant", grant);
vector[count].iov_grant = grant;
vector[count].iov_size = chunk;
buf += chunk;
size -= chunk;
}
/* Then create a grant for the vector itself. */
*gid = cpf_grant_direct(endpt, (vir_bytes) vector,
sizeof(vector[0]) * count, CPF_READ | CPF_WRITE);
if (!GRANT_VALID(*gid))
panic(__FILE__, "invalid grant", *gid);
return count;
}
/*===========================================================================* /*===========================================================================*
* paired_grant * * paired_grant *
*===========================================================================*/ *===========================================================================*/
static void paired_grant(char *buf1, char *buf2, size_t size, int request, static int paired_grant(char *buf1, char *buf2, int request,
cp_grant_id_t *gids, int both) cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS], size_t *sizes,
int both)
{ {
/* Create memory grants. If USE_MIRROR, grant to both drivers, /* Create memory grants, either to one or to both drivers.
* otherwise only to the main one.
*/ */
cp_grant_id_t gid; cp_grant_id_t gid;
int access; int count, access;
count = 0;
access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE; access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE;
if(driver[DRIVER_MAIN].endpt > 0) { if(driver[DRIVER_MAIN].endpt > 0) {
gid = cpf_grant_direct(driver[DRIVER_MAIN].endpt, count = single_grant(driver[DRIVER_MAIN].endpt,
(vir_bytes) buf1, size, access); (vir_bytes) buf1, access, &gids[0], vectors[0],
if(!GRANT_VALID(gid)) &sizes[0]);
panic(__FILE__, "invalid grant", gid);
gids[0] = gid;
} }
if (both) { if (both) {
if(driver[DRIVER_BACKUP].endpt > 0) { if(driver[DRIVER_BACKUP].endpt > 0) {
gid = cpf_grant_direct(driver[DRIVER_BACKUP].endpt, count = single_grant(driver[DRIVER_BACKUP].endpt,
(vir_bytes) buf2, size, access); (vir_bytes) buf2, access, &gids[1],
if(!GRANT_VALID(gid)) vectors[1], &sizes[1]);
panic(__FILE__, "invalid grant", gid);
gids[1] = gid;
} }
} }
} }
/*===========================================================================*
* single_revoke *
*===========================================================================*/
void single_revoke(cp_grant_id_t gid, iovec_s_t vector[NR_IOREQS],
size_t *sizep, int count)
{
/* Revoke all grants associated with a request to a single driver.
* Modify the given size to reflect the actual I/O performed.
*/
int i;
/* Revoke the grants for all the elements of the vector. */
for (i = 0; i < count; i++) {
cpf_revoke(vector[i].iov_grant);
*sizep -= vector[i].iov_size;
}
/* Then revoke the grant for the vector itself. */
cpf_revoke(gid);
}
/*===========================================================================* /*===========================================================================*
* paired_revoke * * paired_revoke *
*===========================================================================*/ *===========================================================================*/
static void paired_revoke(cp_grant_id_t gid1, cp_grant_id_t gid2, int both) static void paired_revoke(cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS],
size_t *sizes, int count, int both)
{ {
cpf_revoke(gid1); /* Revoke grants to drivers for a single request.
*/
single_revoke(gids[0], vectors[0], &sizes[0], count);
if (both) if (both)
cpf_revoke(gid2); single_revoke(gids[1], vectors[1], &sizes[1], count);
} }
/*===========================================================================* /*===========================================================================*
@ -842,30 +912,35 @@ static void paired_revoke(cp_grant_id_t gid1, cp_grant_id_t gid2, int both)
*===========================================================================*/ *===========================================================================*/
int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request) int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
{ {
iovec_s_t vectors[2][NR_IOREQS];
message m1, m2; message m1, m2;
cp_grant_id_t gids[2]; cp_grant_id_t gids[2];
int r, both; size_t sizes[2];
int r, both, count;
gids[0] = gids[1] = GRANT_INVALID; gids[0] = gids[1] = GRANT_INVALID;
sizes[0] = sizes[1] = *sizep;
/* Send two requests only if mirroring is enabled and the given request /* Send two requests only if mirroring is enabled and the given request
* is either FLT_READ2 or FLT_WRITE. * is either FLT_READ2 or FLT_WRITE.
*/ */
both = (USE_MIRROR && request != FLT_READ); both = (USE_MIRROR && request != FLT_READ);
m1.m_type = (request == FLT_WRITE) ? DEV_WRITE_S : DEV_READ_S; count = paired_grant(bufa, bufb, request, gids, vectors, sizes, both);
m1.COUNT = *sizep;
m1.m_type = (request == FLT_WRITE) ? DEV_SCATTER_S : DEV_GATHER_S;
m1.COUNT = count;
m1.POSITION = ex64lo(pos); m1.POSITION = ex64lo(pos);
m1.HIGHPOS = ex64hi(pos); m1.HIGHPOS = ex64hi(pos);
m2 = m1; m2 = m1;
paired_grant(bufa, bufb, *sizep, request, gids, both);
m1.IO_GRANT = (char *) gids[0]; m1.IO_GRANT = (char *) gids[0];
m2.IO_GRANT = (char *) gids[1]; m2.IO_GRANT = (char *) gids[1];
r = paired_sendrec(&m1, &m2, both); r = paired_sendrec(&m1, &m2, both);
paired_revoke(gids[0], gids[1], both); paired_revoke(gids, vectors, sizes, count, both);
if(r != OK) { if(r != OK) {
#if DEBUG #if DEBUG
@ -875,7 +950,7 @@ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
return r; return r;
} }
if (m1.m_type != TASK_REPLY || m1.REP_STATUS < 0) { if (m1.m_type != TASK_REPLY || m1.REP_STATUS != OK) {
printf("Filter: unexpected/invalid reply from main driver: " printf("Filter: unexpected/invalid reply from main driver: "
"(%x, %d)\n", m1.m_type, m1.REP_STATUS); "(%x, %d)\n", m1.m_type, m1.REP_STATUS);
@ -883,26 +958,23 @@ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
(m1.m_type == TASK_REPLY) ? m1.REP_STATUS : EFAULT); (m1.m_type == TASK_REPLY) ? m1.REP_STATUS : EFAULT);
} }
if (m1.REP_STATUS != *sizep) { if (sizes[0] != *sizep) {
printf("Filter: truncated reply %u to I/O request of size " printf("Filter: truncated reply from main driver\n");
"0x%x at 0x%s; size 0x%s\n",
m1.REP_STATUS, *sizep,
print64(pos), print64(disk_size));
/* If the driver returned a value *larger* than we requested, /* If the driver returned a value *larger* than we requested,
* OR if we did NOT exceed the disk size, then we should * OR if we did NOT exceed the disk size, then we should
* report the driver for acting strangely! * report the driver for acting strangely!
*/ */
if (m1.REP_STATUS > *sizep || if (sizes[0] < 0 || sizes[0] > *sizep ||
cmp64(add64u(pos, *sizep), disk_size) < 0) cmp64(add64u(pos, sizes[0]), disk_size) < 0)
return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT); return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT);
/* Return the actual size. */ /* Return the actual size. */
*sizep = m1.REP_STATUS; *sizep = sizes[0];
} }
if (both) { if (both) {
if (m2.m_type != TASK_REPLY || m2.REP_STATUS < 0) { if (m2.m_type != TASK_REPLY || m2.REP_STATUS != OK) {
printf("Filter: unexpected/invalid reply from " printf("Filter: unexpected/invalid reply from "
"backup driver (%x, %d)\n", "backup driver (%x, %d)\n",
m2.m_type, m2.REP_STATUS); m2.m_type, m2.REP_STATUS);
@ -911,18 +983,18 @@ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
m2.m_type == TASK_REPLY ? m2.REP_STATUS : m2.m_type == TASK_REPLY ? m2.REP_STATUS :
EFAULT); EFAULT);
} }
if (m2.REP_STATUS != *sizep) { if (sizes[1] != *sizep) {
printf("Filter: truncated reply from backup driver\n"); printf("Filter: truncated reply from backup driver\n");
/* As above */ /* As above */
if (m2.REP_STATUS > *sizep || if (sizes[1] < 0 || sizes[1] > *sizep ||
cmp64(add64u(pos, *sizep), disk_size) < 0) cmp64(add64u(pos, sizes[1]), disk_size) < 0)
return bad_driver(DRIVER_BACKUP, BD_PROTO, return bad_driver(DRIVER_BACKUP, BD_PROTO,
EFAULT); EFAULT);
/* Return the actual size. */ /* Return the actual size. */
if (*sizep >= m2.REP_STATUS) if (*sizep >= sizes[1])
*sizep = m2.REP_STATUS; *sizep = sizes[1];
} }
} }

View File

@ -21,6 +21,7 @@
#define SECTOR_SIZE 512 #define SECTOR_SIZE 512
enum { enum {
ST_NIL, /* Zero checksums */
ST_XOR, /* XOR-based checksums */ ST_XOR, /* XOR-based checksums */
ST_CRC, /* CRC32-based checksums */ ST_CRC, /* CRC32-based checksums */
ST_MD5 /* MD5-based checksums */ ST_MD5 /* MD5-based checksums */
@ -53,8 +54,8 @@ enum {
#define DRIVER_MAIN 0 #define DRIVER_MAIN 0
#define DRIVER_BACKUP 1 #define DRIVER_BACKUP 1
/* Requests for more than this many bytes need to go through malloc(). */ /* Requests for more than this many bytes will be allocated dynamically. */
#define BUF_SIZE (128 * 1024) #define BUF_SIZE (256 * 1024)
#define SBUF_SIZE (BUF_SIZE * 2) #define SBUF_SIZE (BUF_SIZE * 2)
#define LABEL_SIZE 32 #define LABEL_SIZE 32
@ -72,6 +73,7 @@ extern int NR_SUM_SEC;
extern int NR_RETRIES; extern int NR_RETRIES;
extern int NR_RESTARTS; extern int NR_RESTARTS;
extern int DRIVER_TIMEOUT; extern int DRIVER_TIMEOUT;
extern int CHUNK_SIZE;
extern char MAIN_LABEL[LABEL_SIZE]; extern char MAIN_LABEL[LABEL_SIZE];
extern char BACKUP_LABEL[LABEL_SIZE]; extern char BACKUP_LABEL[LABEL_SIZE];

View File

@ -22,13 +22,15 @@ int BAD_SUM_ERROR = 1; /* bad checksums are considered a driver error */
int USE_SUM_LAYOUT = 0; /* use checksumming layout on disk */ int USE_SUM_LAYOUT = 0; /* use checksumming layout on disk */
int NR_SUM_SEC = 8; /* number of checksums per checksum sector */ int NR_SUM_SEC = 8; /* number of checksums per checksum sector */
int SUM_TYPE = 0; /* use XOR, CRC or MD5 */ int SUM_TYPE = ST_CRC; /* use NIL, XOR, CRC, or MD5 */
int SUM_SIZE = 0; /* size of the stored checksum */ int SUM_SIZE = 0; /* size of the stored checksum */
int NR_RETRIES = 3; /* number of times the request will be retried (N) */ int NR_RETRIES = 3; /* number of times the request will be retried (N) */
int NR_RESTARTS = 3; /* number of times a driver will be restarted (M) */ int NR_RESTARTS = 3; /* number of times a driver will be restarted (M) */
int DRIVER_TIMEOUT = 5; /* timeout in seconds to declare a driver dead (T) */ int DRIVER_TIMEOUT = 5; /* timeout in seconds to declare a driver dead (T) */
int CHUNK_SIZE = 0; /* driver requests will be vectorized at this size */
char MAIN_LABEL[LABEL_SIZE] = ""; /* main disk driver label */ char MAIN_LABEL[LABEL_SIZE] = ""; /* main disk driver label */
char BACKUP_LABEL[LABEL_SIZE] = ""; /* backup disk driver label */ char BACKUP_LABEL[LABEL_SIZE] = ""; /* backup disk driver label */
int MAIN_MINOR = -1; /* main partition minor nr */ int MAIN_MINOR = -1; /* main partition minor nr */
@ -46,6 +48,7 @@ struct optset optset_table[] = {
{ "nosum", OPT_BOOL, &USE_CHECKSUM, 0 }, { "nosum", OPT_BOOL, &USE_CHECKSUM, 0 },
{ "mirror", OPT_BOOL, &USE_MIRROR, 1 }, { "mirror", OPT_BOOL, &USE_MIRROR, 1 },
{ "nomirror", OPT_BOOL, &USE_MIRROR, 0 }, { "nomirror", OPT_BOOL, &USE_MIRROR, 0 },
{ "nil", OPT_BOOL, &SUM_TYPE, ST_NIL },
{ "xor", OPT_BOOL, &SUM_TYPE, ST_XOR }, { "xor", OPT_BOOL, &SUM_TYPE, ST_XOR },
{ "crc", OPT_BOOL, &SUM_TYPE, ST_CRC }, { "crc", OPT_BOOL, &SUM_TYPE, ST_CRC },
{ "md5", OPT_BOOL, &SUM_TYPE, ST_MD5 }, { "md5", OPT_BOOL, &SUM_TYPE, ST_MD5 },
@ -57,6 +60,7 @@ struct optset optset_table[] = {
{ "M", OPT_INT, &NR_RESTARTS, 10 }, { "M", OPT_INT, &NR_RESTARTS, 10 },
{ "timeout", OPT_INT, &DRIVER_TIMEOUT, 10 }, { "timeout", OPT_INT, &DRIVER_TIMEOUT, 10 },
{ "T", OPT_INT, &DRIVER_TIMEOUT, 10 }, { "T", OPT_INT, &DRIVER_TIMEOUT, 10 },
{ "chunk", OPT_INT, &CHUNK_SIZE, 10 },
{ NULL } { NULL }
}; };
@ -298,6 +302,9 @@ static int parse_arguments(int argc, char *argv[])
/* Determine the checksum size for the chosen checksum type. */ /* Determine the checksum size for the chosen checksum type. */
switch (SUM_TYPE) { switch (SUM_TYPE) {
case ST_NIL:
SUM_SIZE = 4; /* for the sector number */
break;
case ST_XOR: case ST_XOR:
SUM_SIZE = 16; /* compatibility */ SUM_SIZE = 16; /* compatibility */
break; break;
@ -327,6 +334,7 @@ static int parse_arguments(int argc, char *argv[])
printf(" SUM_TYPE : "); printf(" SUM_TYPE : ");
switch (SUM_TYPE) { switch (SUM_TYPE) {
case ST_NIL: printf("nil"); break;
case ST_XOR: printf("xor"); break; case ST_XOR: printf("xor"); break;
case ST_CRC: printf("crc"); break; case ST_CRC: printf("crc"); break;
case ST_MD5: printf("md5"); break; case ST_MD5: printf("md5"); break;
@ -376,8 +384,16 @@ static void got_signal(void)
exit(0); exit(0);
} }
/* SEF functions and variables. */ /*===========================================================================*
FORWARD _PROTOTYPE( void sef_local_startup, (void) ); * sef_local_startup *
*===========================================================================*/
static void sef_local_startup(void)
{
/* No live update support for now. */
/* Let SEF perform startup. */
sef_startup();
}
/*===========================================================================* /*===========================================================================*
* main * * main *
@ -396,7 +412,7 @@ int main(int argc, char *argv[])
return 1; return 1;
} }
if ((buf_array = alloc_contig(BUF_SIZE, 0, NULL)) == NULL) if ((buf_array = flt_malloc(BUF_SIZE, NULL, 0)) == NULL)
panic(__FILE__, "no memory available", NO_NUM); panic(__FILE__, "no memory available", NO_NUM);
sum_init(); sum_init();
@ -450,15 +466,3 @@ int main(int argc, char *argv[])
return 0; return 0;
} }
/*===========================================================================*
* sef_local_startup *
*===========================================================================*/
PRIVATE void sef_local_startup()
{
/* No live update support for now. */
/* Let SEF perform startup. */
sef_startup();
}

View File

@ -23,9 +23,9 @@ void sum_init(void)
{ {
/* Initialize buffers. */ /* Initialize buffers. */
ext_array = alloc_contig(SBUF_SIZE, 0, NULL); ext_array = flt_malloc(SBUF_SIZE, NULL, 0);
rb0_array = alloc_contig(SBUF_SIZE, 0, NULL); rb0_array = flt_malloc(SBUF_SIZE, NULL, 0);
rb1_array = alloc_contig(SBUF_SIZE, 0, NULL); rb1_array = flt_malloc(SBUF_SIZE, NULL, 0);
if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL) if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL)
panic(__FILE__, "no memory available", NO_NUM); panic(__FILE__, "no memory available", NO_NUM);
@ -44,6 +44,14 @@ static void calc_sum(unsigned sector, char *data, char *sum)
struct MD5Context ctx; struct MD5Context ctx;
switch(SUM_TYPE) { switch(SUM_TYPE) {
case ST_NIL:
/* No checksum at all */
q = (unsigned long *) sum;
*q = sector;
break;
case ST_XOR: case ST_XOR:
/* Basic XOR checksum */ /* Basic XOR checksum */
p = (unsigned long *) data; p = (unsigned long *) data;

View File

@ -20,8 +20,9 @@ char *flt_malloc(size_t size, char *sbuf, size_t ssize)
if (size <= ssize) if (size <= ssize)
return sbuf; return sbuf;
p = alloc_contig(size, 0, NULL); p = mmap(NULL, size, PROT_READ | PROT_WRITE,
if (p == NULL) MAP_PREALLOC | MAP_CONTIG | MAP_ANON, -1, 0);
if (p == MAP_FAILED)
panic(__FILE__, "out of memory", size); panic(__FILE__, "out of memory", size);
return p; return p;